Remove all obvious near-duplicate tests from the compatibility suite (#496)

- whitespace diffs - trivial differences (mostly bugfixes merged to master) - use of the kernel helpers to create programs Signed-off-by: Kévin Petit <kpet@free.fr>
2026-03-19 06:09:01 +00:00 · 2019-11-24 12:47:26 +00:00
parent 2dca46bc00
commit a018d6a3be
51 changed files with 1 additions and 13558 deletions
--- a/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/api/CMakeLists.txt
@@ -2,16 +2,9 @@ set(MODULE_NAME COMPATIBILITY_API)

 set(${MODULE_NAME}_SOURCES
         main.c
-         test_retain.cpp
-         test_retain_program.c
         test_queries.cpp
-         test_create_kernels.c
-         test_kernels.c
         test_api_min_max.c
-         test_binary.cpp
-         test_create_context_from_type.cpp
         test_mem_object_info.cpp
-         test_null_buffer_arg.c
         test_kernel_arg_info.c
         test_queue_properties.cpp
 )
--- a/test_conformance/compatibility/test_conformance/api/main.c
+++ b/test_conformance/compatibility/test_conformance/api/main.c
@@ -36,27 +36,9 @@ test_definition test_list[] = {
    ADD_TEST( get_command_queue_info ),
    ADD_TEST( get_context_info ),
    ADD_TEST( get_device_info ),
-    ADD_TEST( enqueue_task ),
-    ADD_TEST( binary_get ),
-    ADD_TEST( binary_create ),
    ADD_TEST( kernel_required_group_size ),

-    ADD_TEST( release_kernel_order ),
-    ADD_TEST( release_during_execute ),
-
-    ADD_TEST( load_single_kernel ),
-    ADD_TEST( load_two_kernels ),
-    ADD_TEST( load_two_kernels_in_one ),
-    ADD_TEST( load_two_kernels_manually ),
-    ADD_TEST( get_program_info_kernel_names ),
    ADD_TEST( get_kernel_arg_info ),
-    ADD_TEST( create_kernels_in_program ),
-    ADD_TEST( get_kernel_info ),
-    ADD_TEST( execute_kernel_local_sizes ),
-    ADD_TEST( set_kernel_arg_by_index ),
-    ADD_TEST( set_kernel_arg_constant ),
-    ADD_TEST( set_kernel_arg_struct_array ),
-    ADD_TEST( kernel_global_constant ),

    ADD_TEST( min_max_thread_dimensions ),
    ADD_TEST( min_max_work_items_sizes ),
@@ -86,16 +68,6 @@ test_definition test_list[] = {
    ADD_TEST( min_max_device_version ),
    ADD_TEST( min_max_language_version ),

-    ADD_TEST( create_context_from_type ),
-
-    ADD_TEST( repeated_setup_cleanup ),
-
-    ADD_TEST( retain_queue_single ),
-    ADD_TEST( retain_queue_multiple ),
-    ADD_TEST( retain_mem_object_single ),
-    ADD_TEST( retain_mem_object_multiple ),
-
-    ADD_TEST( null_buffer_arg ),
    ADD_TEST( get_buffer_info ),
    ADD_TEST( get_image2d_info ),
    ADD_TEST( get_image3d_info ),
--- a/test_conformance/compatibility/test_conformance/api/test_binary.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_binary.cpp
@@ -1,226 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-static const char *sample_binary_kernel_source[] = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid] + 1;\n"
-"\n"
-"}\n" };
-
-
-int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    size_t            binarySize;
-
-
-    program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
-    test_error( error, "Unable to create program from source" );
-
-    // Build so we have a binary to get
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build test program" );
-
-    // Get the size of the resulting binary (only one device)
-    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
-    test_error( error, "Unable to get binary size" );
-
-    // Sanity check
-    if( binarySize == 0 )
-    {
-        log_error( "ERROR: Binary size of program is zero\n" );
-        return -1;
-    }
-
-    // Create a buffer and get the actual binary
-    unsigned char *binary;
-  binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
-    unsigned char *buffers[ 1 ] = { binary };
-
-    // Do another sanity check here first
-    size_t size;
-    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
-    test_error( error, "Unable to get expected size of binaries array" );
-    if( size != sizeof( buffers ) )
-    {
-        log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
-        free(binary);
-    return -1;
-    }
-
-    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
-    test_error( error, "Unable to get program binary" );
-
-    // No way to verify the binary is correct, so just be good with that
-  free(binary);
-    return 0;
-}
-
-
-int test_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    /* To test this in a self-contained fashion, we have to create a program with
-   source, then get the binary, then use that binary to reload the program, and then verify */
-
-    int error;
-    clProgramWrapper program, program_from_binary;
-    size_t            binarySize;
-
-
-    program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
-    test_error( error, "Unable to create program from source" );
-
-    // Build so we have a binary to get
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build test program" );
-
-    // Get the size of the resulting binary (only one device)
-    error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
-    test_error( error, "Unable to get binary size" );
-
-    // Sanity check
-    if( binarySize == 0 )
-    {
-        log_error( "ERROR: Binary size of program is zero\n" );
-        return -1;
-    }
-
-    // Create a buffer and get the actual binary
-    unsigned char *binary = (unsigned char*)malloc(binarySize);
-    const unsigned char *buffers[ 1 ] = { binary };
-
-    error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
-    test_error( error, "Unable to get program binary" );
-
-    cl_int loadErrors[ 1 ];
-    program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
-    test_error( error, "Unable to load valid program binary" );
-    test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
-
-  error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
-  test_error( error, "Unable to build binary program" );
-
-    // Get the size of the binary built from the first binary
-    size_t binary2Size;
-    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
-    test_error( error, "Unable to get size for the binary program" );
-
-    // Now get the binary one more time and verify it loaded the right binary
-    unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
-    buffers[ 0 ] = binary2;
-    error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
-    test_error( error, "Unable to get program binary second time" );
-
-    // Try again, this time without passing the status ptr in, to make sure we still
-    // get a valid binary
-    clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
-    test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
-
-    error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build binary program created without binary_status" );
-
-    // Get the size of the binary created without passing binary_status
-    size_t binary3Size;
-    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
-    test_error( error, "Unable to get size for the binary program created without binary_status" );
-
-    // Now get the binary one more time
-    unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
-    buffers[ 0 ] = binary3;
-    error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
-    test_error( error, "Unable to get program binary from the program created without binary_status" );
-
-    // We no longer need these intermediate binaries
-    free(binary);
-    free(binary2);
-    free(binary3);
-
-  // Now execute them both to see that they both do the same thing.
-  clMemWrapper in, out, out_binary;
-  clKernelWrapper kernel, kernel_binary;
-  cl_int *out_data, *out_data_binary;
-  cl_float *in_data;
-  size_t size_to_run = 1000;
-
-  // Allocate some data
-  in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
-  out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
-  out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
-  memset(out_data, 0, sizeof(cl_int)*size_to_run);
-  memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
-  for (size_t i=0; i<size_to_run; i++)
-    in_data[i] = (cl_float)i;
-
-  // Create the buffers
-  in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
-  test_error( error, "clCreateBuffer failed");
-  out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
-  test_error( error, "clCreateBuffer failed");
-  out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
-  test_error( error, "clCreateBuffer failed");
-
-  // Create the kernels
-  kernel = clCreateKernel(program, "sample_test", &error);
-  test_error( error, "clCreateKernel failed");
-  kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
-  test_error( error, "clCreateKernel from binary failed");
-
-  // Set the arguments
-  error = clSetKernelArg(kernel, 0, sizeof(in), &in);
-  test_error( error, "clSetKernelArg failed");
-  error = clSetKernelArg(kernel, 1, sizeof(out), &out);
-  test_error( error, "clSetKernelArg failed");
-  error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
-  test_error( error, "clSetKernelArg failed");
-  error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
-  test_error( error, "clSetKernelArg failed");
-
-  // Execute the kernels
-  error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
-  test_error( error, "clEnqueueNDRangeKernel failed");
-  error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
-  test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
-
-  // Finish up
-  error = clFinish(queue);
-  test_error( error, "clFinish failed");
-
-  // Get the results back
-  error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
-  test_error( error, "clEnqueueReadBuffer failed");
-  error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
-  test_error( error, "clEnqueueReadBuffer failed");
-
-  // Compare the results
-    if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
-    {
-        log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
-        return -1;
-    }
-
-    // All done!
-  free(in_data);
-  free(out_data);
-  free(out_data_binary);
-    return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_create_context_from_type.cpp
@@ -1,130 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include "harness/testHarness.h"
-#include "harness/conversions.h"
-
-extern cl_uint gRandomSeed;
-
-int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper    streams[2];
-    clContextWrapper context_to_test;
-    clCommandQueueWrapper queue_to_test;
-    size_t    threads[1], localThreads[1];
-    cl_float inputData[10];
-    cl_int outputData[10];
-    int i;
-    RandomSeed seed( gRandomSeed );
-
-    const char *sample_single_test_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n" };
-
-    cl_device_type type;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
-
-    cl_platform_id platform;
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
-
-    cl_context_properties properties[3] = {
-      (cl_context_properties)CL_CONTEXT_PLATFORM,
-      (cl_context_properties)platform,
-      NULL
-    };
-
-    context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
-    test_error(error, "clCreateContextFromType failed");
-    if (context_to_test == NULL) {
-        log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
-        return -1;
-    }
-
-    queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
-    test_error(error, "clCreateCommandQueue failed");
-    if (queue_to_test == NULL) {
-        log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
-        return -1;
-    }
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Write some test data */
-    memset( outputData, 0, sizeof( outputData ) );
-
-    for (i=0; i<10; i++)
-        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
-
-    error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
-    test_error( error, "Unable to set testing kernel data" );
-
-    /* Test setting the arguments by index manually */
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)10;
-
-    error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<10; i++)
-    {
-        if (outputData[i] != (int)inputData[i])
-        {
-            log_error( "ERROR: Data did not verify on first pass!\n" );
-            return -1;
-        }
-    }
-
-  return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/api/test_create_kernels.c
+++ b/test_conformance/compatibility/test_conformance/api/test_create_kernels.c
@@ -1,643 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/testHarness.h"
-
-
-const char *sample_single_kernel[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n" };
-
-size_t sample_single_kernel_lengths[1];
-
-const char *sample_two_kernels[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n",
-    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (float)src[tid];\n"
-    "\n"
-    "}\n" };
-
-size_t sample_two_kernel_lengths[2];
-
-const char *sample_two_kernels_in_1[] = {
-    "__kernel void sample_test(__global float *src, __global int *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (int)src[tid];\n"
-    "\n"
-    "}\n"
-    "__kernel void sample_test2(__global int *src, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = (float)src[tid];\n"
-    "\n"
-    "}\n" };
-
-size_t sample_two_kernels_in_1_lengths[1];
-
-
-const char *repeate_test_kernel =
-"__kernel void test_kernel(__global int *src, __global int *dst)\n"
-"{\n"
-" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
-"}\n";
-
-
-
-int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    cl_program testProgram;
-    clKernelWrapper kernel;
-    cl_context testContext;
-    unsigned int numKernels;
-    cl_char testName[512];
-    cl_uint testArgCount;
-    size_t realSize;
-
-
-    /* Preprocess: calc the length of each source file line */
-    sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
-
-    /* Create a program */
-    program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
-    if( program == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Unable to create single kernel program" );
-        return -1;
-    }
-
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build single kernel program" );
-    error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
-    test_error( error, "Unable to create single kernel program" );
-
-    /* Check program and context pointers */
-    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
-    test_error( error, "Unable to get kernel's program" );
-    if( (cl_program)testProgram != (cl_program)program )
-    {
-        log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
-        return -1;
-    }
-    if( realSize != sizeof( cl_program ) )
-    {
-        log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
-    test_error( error, "Unable to get kernel's context" );
-    if( (cl_context)testContext != (cl_context)context )
-    {
-        log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
-        return -1;
-    }
-    if( realSize != sizeof( cl_context ) )
-    {
-        log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
-        return -1;
-    }
-
-    /* Test arg count */
-    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
-    test_error( error, "Unable to get size of arg count info from kernel" );
-
-    if( realSize != sizeof( testArgCount ) )
-    {
-        log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
-    test_error( error, "Unable to get arg count from kernel" );
-
-    if( testArgCount != 2 )
-    {
-        log_error( "ERROR: Kernel arg count does not match!\n" );
-        return -1;
-    }
-
-
-    /* Test function name */
-    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
-    test_error( error, "Unable to get name from kernel" );
-
-    if( strcmp( (char *)testName, "sample_test" ) != 0 )
-    {
-        log_error( "ERROR: Kernel names do not match!\n" );
-        return -1;
-    }
-    if( realSize != strlen( (char *)testName ) + 1 )
-    {
-        log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
-        return -1;
-    }
-
-    /* All done */
-
-    return 0;
-}
-
-int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel[2];
-    unsigned int numKernels;
-    cl_char testName[ 512 ];
-    cl_uint testArgCount;
-
-
-    /* Preprocess: calc the length of each source file line */
-    sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
-    sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
-
-    /* Now create a test program */
-    program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
-    if( program == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Unable to create dual kernel program!" );
-        return -1;
-    }
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build dual kernel program" );
-    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
-    test_error( error, "Unable to create dual kernel program" );
-
-    if( numKernels != 2 )
-    {
-        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
-        return -1;
-    }
-
-    /* Check first kernel */
-    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
-    test_error( error, "Unable to get function name from kernel" );
-
-    int found_kernel1 = 0, found_kernel2 = 0;
-
-    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
-        found_kernel1 = 1;
-    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
-        found_kernel2 = 1;
-    } else {
-        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
-    test_error( error, "Unable to get function name from second kernel" );
-
-    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
-        if (found_kernel1) {
-            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
-            return -1;
-        }
-        found_kernel1 = 1;
-    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
-        if (found_kernel2) {
-            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
-            return -1;
-        }
-        found_kernel2 = 1;
-    } else {
-        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
-        return -1;
-    }
-
-    if( !found_kernel1 || !found_kernel2 )
-    {
-        log_error( "ERROR: Kernel names do not match.\n" );
-        if (!found_kernel1)
-            log_error("Kernel \"%s\" not returned.\n", "sample_test");
-        if (!found_kernel2)
-            log_error("Kernel \"%s\" not returned.\n", "sample_test");
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
-    test_error( error, "Unable to get arg count from kernel" );
-
-    if( testArgCount != 2 )
-    {
-        log_error( "ERROR: wrong # of args for kernel\n" );
-        return -1;
-    }
-
-    /* All done */
-    return 0;
-}
-
-int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel[2];
-    unsigned int numKernels;
-    cl_char testName[512];
-    cl_uint testArgCount;
-
-
-    /* Preprocess: calc the length of each source file line */
-    sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
-
-    /* Now create a test program */
-    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
-    if( program == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Unable to create dual kernel program" );
-        return -1;
-    }
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build dual kernel program" );
-    error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
-    test_error( error, "Unable to create dual kernel program" );
-
-    if( numKernels != 2 )
-    {
-        log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
-        return -1;
-    }
-
-    /* Check first kernel */
-    error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
-    test_error( error, "Unable to get function name from kernel" );
-
-    int found_kernel1 = 0, found_kernel2 = 0;
-
-    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
-        found_kernel1 = 1;
-    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
-        found_kernel2 = 1;
-    } else {
-        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
-    test_error( error, "Unable to get arg count from kernel" );
-
-    if( testArgCount != 2 )
-    {
-        log_error( "ERROR: wrong # of args for kernel\n" );
-        return -1;
-    }
-
-    /* Check second kernel */
-    error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
-    test_error( error, "Unable to get function name from kernel" );
-
-    if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
-        if (found_kernel1) {
-            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
-            return -1;
-        }
-        found_kernel1 = 1;
-    } else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
-        if (found_kernel2) {
-            log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
-            return -1;
-        }
-        found_kernel2 = 1;
-    } else {
-        log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
-        return -1;
-    }
-
-    if( !found_kernel1 || !found_kernel2 )
-    {
-        log_error( "ERROR: Kernel names do not match.\n" );
-        if (!found_kernel1)
-            log_error("Kernel \"%s\" not returned.\n", "sample_test");
-        if (!found_kernel2)
-            log_error("Kernel \"%s\" not returned.\n", "sample_test");
-        return -1;
-    }
-
-    /* All done */
-    return 0;
-}
-
-int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel1, kernel2;
-    int error;
-
-
-    /* Now create a test program */
-    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
-    if( program == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Unable to create dual kernel program" );
-        return -1;
-    }
-
-    /* Compile the program */
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build kernel program" );
-
-    /* Try manually creating kernels (backwards just in case) */
-    kernel1 = clCreateKernel( program, "sample_test2", &error );
-
-    if( kernel1 == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Could not get kernel 1" );
-        return -1;
-    }
-
-    kernel2 = clCreateKernel( program, "sample_test", &error );
-
-    if( kernel2 == NULL )
-    {
-        print_error( error, "Could not get kernel 2" );
-        return -1;
-    }
-
-    return 0;
-}
-
-int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel1, kernel2;
-    int error;
-    size_t i;
-
-    /* Now create a test program */
-    program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
-    if( program == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Unable to create dual kernel program" );
-        return -1;
-    }
-
-    /* Compile the program */
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build kernel program" );
-
-    /* Lookup the number of kernels in the program. */
-    size_t total_kernels = 0;
-    error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
-    test_error( error, "Unable to get program info num kernels");
-
-    if (total_kernels != 2)
-    {
-        print_error( error, "Program did not contain two kernels" );
-        return -1;
-    }
-
-    /* Lookup the kernel names. */
-    const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
-
-    size_t kernel_names_len = 0;
-    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
-    test_error( error, "Unable to get length of kernel names list." );
-
-    if (kernel_names_len != (strlen(actual_names[0])+1))
-    {
-        print_error( error, "Kernel names length did not match");
-        return -1;
-    }
-
-    const size_t len = (kernel_names_len+1)*sizeof(char);
-    char* kernel_names = (char*)malloc(len);
-    error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
-    test_error( error, "Unable to get kernel names list." );
-
-    /* Check to see if the kernel name array is null terminated. */
-    if (kernel_names[kernel_names_len-1] != '\0')
-    {
-        free(kernel_names);
-        print_error( error, "Kernel name list was not null terminated");
-        return -1;
-    }
-
-    /* Check to see if the correct kernel name string was returned. */
-    for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
-        if( 0 == strcmp(actual_names[i],kernel_names) )
-            break;
-
-    if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
-    {
-        free(kernel_names);
-        log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
-        for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
-            log_error( "\t\t\"%s\"\n", actual_names[0] );
-        return -1;
-    }
-    free(kernel_names);
-
-    /* Try manually creating kernels (backwards just in case) */
-    kernel1 = clCreateKernel( program, "sample_test", &error );
-    if( kernel1 == NULL || error != CL_SUCCESS )
-    {
-        print_error( error, "Could not get kernel 1" );
-        return -1;
-    }
-
-    kernel2 = clCreateKernel( program, "sample_test2", &error );
-    if( kernel2 == NULL )
-    {
-        print_error( error, "Could not get kernel 2" );
-        return -1;
-    }
-
-    return 0;
-}
-
-static const char *single_task_kernel[] = {
-    "__kernel void sample_test(__global int *dst, int count)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    for( int i = 0; i < count; i++ )\n"
-    "        dst[i] = tid + i;\n"
-    "\n"
-    "}\n" };
-
-int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper output;
-    cl_int count;
-
-
-    if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
-        return -1;
-
-    // Create args
-    count = 100;
-    output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
-    test_error( error, "Unable to create output buffer" );
-
-    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
-    test_error( error, "Unable to set kernel argument" );
-    error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
-    test_error( error, "Unable to set kernel argument" );
-
-    // Run task
-    error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
-    test_error( error, "Unable to run task" );
-
-    // Read results
-    cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
-    error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
-    test_error( error, "Unable to read results" );
-
-    // Validate
-    for( cl_int i = 0; i < count; i++ )
-    {
-        if( results[ i ] != i )
-        {
-            log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
-            free(results);
-            return -1;
-        }
-    }
-
-    /* All done */
-    free(results);
-    return 0;
-}
-
-
-
-#define TEST_SIZE 1000
-int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-
-    cl_context local_context;
-    cl_command_queue local_queue;
-    cl_program local_program;
-    cl_kernel local_kernel;
-    cl_mem local_mem_in, local_mem_out;
-    cl_event local_event;
-    size_t global_dim[3];
-    int i, j, error;
-    global_dim[0] = TEST_SIZE;
-    global_dim[1] = 1; global_dim[2] = 1;
-    cl_int *inData, *outData;
-    cl_int status;
-
-    inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
-    outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
-    for (i=0; i<TEST_SIZE; i++) {
-        inData[i] = i;
-    }
-
-
-    for (i=0; i<100; i++) {
-        memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
-
-        local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
-        test_error( error, "clCreateContext failed");
-
-        local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
-        test_error( error, "clCreateCommandQueue failed");
-
-        local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
-        test_error( error, "clCreateProgramWithSource failed");
-
-        error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
-        test_error( error, "clBuildProgram failed");
-
-        local_kernel = clCreateKernel(local_program, "test_kernel", &error);
-        test_error( error, "clCreateKernel failed");
-
-        local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
-        test_error( error, "clCreateBuffer failed");
-
-        local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
-        test_error( error, "clCreateBuffer failed");
-
-        error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
-        test_error( error, "clEnqueueWriteBuffer failed");
-
-        error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
-        test_error( error, "clEnqueueWriteBuffer failed");
-
-        error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
-        test_error( error, "clSetKernelArg failed");
-
-        error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
-        test_error( error, "clSetKernelArg failed");
-
-        error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
-        test_error( error, "clEnqueueNDRangeKernel failed");
-
-        error = clWaitForEvents(1, &local_event);
-        test_error( error, "clWaitForEvents failed");
-
-        error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
-        test_error( error, "clGetEventInfo failed");
-
-        if (status != CL_COMPLETE) {
-            log_error( "Kernel execution not complete: status %d.\n", status);
-            free(inData);
-            free(outData);
-            return -1;
-        }
-
-        error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
-        test_error( error, "clEnqueueReadBuffer failed");
-
-        clReleaseEvent(local_event);
-        clReleaseMemObject(local_mem_in);
-        clReleaseMemObject(local_mem_out);
-        clReleaseKernel(local_kernel);
-        clReleaseProgram(local_program);
-        clReleaseCommandQueue(local_queue);
-        clReleaseContext(local_context);
-
-        for (j=0; j<TEST_SIZE; j++) {
-            if (outData[j] != inData[j] + 1) {
-                log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
-                free(inData);
-                free(outData);
-                return -1;
-            }
-        }
-    }
-
-    free(inData);
-    free(outData);
-
-    return 0;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/api/test_kernels.c
+++ b/test_conformance/compatibility/test_conformance/api/test_kernels.c
@@ -1,704 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/typeWrappers.h"
-#include "harness/conversions.h"
-
-extern cl_uint gRandomSeed;
-
-const char *sample_single_test_kernel[] = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n" };
-
-const char *sample_struct_test_kernel[] = {
-"typedef struct {\n"
-"__global int *A;\n"
-"__global int *B;\n"
-"} input_pair_t;\n"
-"\n"
-"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = src->A[tid] + src->B[tid];\n"
-"\n"
-"}\n" };
-
-const char *sample_struct_array_test_kernel[] = {
-"typedef struct {\n"
-"int A;\n"
-"int B;\n"
-"} input_pair_t;\n"
-"\n"
-"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = src[tid].A + src[tid].B;\n"
-"\n"
-"}\n" };
-
-const char *sample_const_test_kernel[] = {
-"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = src1[tid] + src2[tid];\n"
-"\n"
-"}\n" };
-
-const char *sample_const_global_test_kernel[] = {
-"__constant int addFactor = 1024;\n"
-"__kernel void sample_test(__global int *src1, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = src1[tid] + addFactor;\n"
-"\n"
-"}\n" };
-
-const char *sample_two_kernel_program[] = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n",
-"__kernel void sample_test2(__global int *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (float)src[tid];\n"
-"\n"
-"}\n" };
-
-
-
-
-int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    cl_program program, testProgram;
-    cl_context testContext;
-    cl_kernel kernel;
-    cl_char name[ 512 ];
-    cl_uint numArgs, numInstances;
-    size_t paramSize;
-
-
-    /* Create reference */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
-    test_error( error, "Unable to get kernel function name param size" );
-    if( paramSize != strlen( "sample_test" ) + 1 )
-    {
-        log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
-    test_error( error, "Unable to get kernel function name" );
-    if( strcmp( (char *)name, "sample_test" ) != 0 )
-    {
-        log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
-        return -1;
-    }
-
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
-    test_error( error, "Unable to get kernel arg count param size" );
-    if( paramSize != sizeof( numArgs ) )
-    {
-        log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
-    test_error( error, "Unable to get kernel arg count" );
-    if( numArgs != 2 )
-    {
-        log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
-        return -1;
-    }
-
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
-    test_error( error, "Unable to get kernel reference count param size" );
-    if( paramSize != sizeof( numInstances ) )
-    {
-        log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
-    test_error( error, "Unable to get kernel reference count" );
-
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
-    test_error( error, "Unable to get kernel program param size" );
-    if( paramSize != sizeof( testProgram ) )
-    {
-        log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
-    test_error( error, "Unable to get kernel program" );
-    if( testProgram != program )
-    {
-        log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
-        return -1;
-    }
-
-    error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
-    test_error( error, "Unable to get kernel context" );
-    if( testContext != context )
-    {
-        log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
-        return -1;
-    }
-
-    /* Release memory */
-    clReleaseKernel( kernel );
-    clReleaseProgram( program );
-    return 0;
-}
-
-int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper            streams[2];
-    size_t    threads[1], localThreads[1];
-    cl_float inputData[100];
-    cl_int outputData[100];
-    RandomSeed seed( gRandomSeed );
-    int i;
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 100, NULL, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 100, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Write some test data */
-    memset( outputData, 0, sizeof( outputData ) );
-
-    for (i=0; i<100; i++)
-        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
-
-    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
-    test_error( error, "Unable to set testing kernel data" );
-
-    /* Set the arguments */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set kernel arguments" );
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)100;
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<100; i++)
-    {
-        if (outputData[i] != (int)inputData[i])
-        {
-            log_error( "ERROR: Data did not verify on first pass!\n" );
-            return -1;
-        }
-    }
-
-    /* Try again */
-    if( localThreads[0] > 1 )
-        localThreads[0] /= 2;
-    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
-        localThreads[0]--;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<100; i++)
-    {
-        if (outputData[i] != (int)inputData[i])
-        {
-            log_error( "ERROR: Data did not verify on first pass!\n" );
-            return -1;
-        }
-    }
-
-    /* And again */
-    if( localThreads[0] > 1 )
-        localThreads[0] /= 2;
-    while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
-        localThreads[0]--;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<100; i++)
-    {
-        if (outputData[i] != (int)inputData[i])
-        {
-            log_error( "ERROR: Data did not verify on first pass!\n" );
-            return -1;
-        }
-    }
-
-    /* One more time */
-    localThreads[0] = (unsigned int)1;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<100; i++)
-    {
-        if (outputData[i] != (int)inputData[i])
-        {
-            log_error( "ERROR: Data did not verify on first pass!\n" );
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper    streams[2];
-    size_t    threads[1], localThreads[1];
-    cl_float inputData[10];
-    cl_int outputData[10];
-    RandomSeed seed( gRandomSeed );
-    int i;
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Write some test data */
-    memset( outputData, 0, sizeof( outputData ) );
-
-    for (i=0; i<10; i++)
-        inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
-
-    error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
-    test_error( error, "Unable to set testing kernel data" );
-
-    /* Test setting the arguments by index manually */
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)10;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<10; i++)
-    {
-        if (outputData[i] != (int)inputData[i])
-        {
-            log_error( "ERROR: Data did not verify on first pass!\n" );
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    cl_program program;
-    cl_kernel kernel;
-    void            *args[2];
-    cl_mem            outStream;
-    size_t    threads[1], localThreads[1];
-    cl_int outputData[10];
-    int i;
-    cl_int randomTestDataA[10], randomTestDataB[10];
-    MTdata  d;
-
-    struct img_pair_t
-    {
-        cl_mem streamA;
-        cl_mem streamB;
-    } image_pair;
-
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    d = init_genrand( gRandomSeed );
-    for( i = 0; i < 10; i++ )
-    {
-        randomTestDataA[i] = (cl_int)genrand_int32(d);
-        randomTestDataB[i] = (cl_int)genrand_int32(d);
-    }
-    free_mtdata(d); d = NULL;
-
-    image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
-    test_error( error, "Creating test array failed" );
-    image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
-    test_error( error, "Creating test array failed" );
-    outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Set the arguments */
-    args[0] = &image_pair;
-    args[1] = outStream;
-
-    error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)10;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<10; i++)
-    {
-        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
-        {
-            log_error( "ERROR: Data did not verify!\n" );
-            return -1;
-        }
-    }
-
-
-    clReleaseMemObject( image_pair.streamA );
-    clReleaseMemObject( image_pair.streamB );
-    clReleaseMemObject( outStream );
-    clReleaseKernel( kernel );
-    clReleaseProgram( program );
-
-    return 0;
-}
-
-int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper            streams[3];
-    size_t    threads[1], localThreads[1];
-    cl_int outputData[10];
-    int i;
-    cl_int randomTestDataA[10], randomTestDataB[10];
-    cl_ulong maxSize;
-    MTdata d;
-
-    /* Verify our test buffer won't be bigger than allowed */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
-    test_error( error, "Unable to get max constant buffer size" );
-    if( maxSize < sizeof( cl_int ) * 10 )
-    {
-        log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
-        return -1;
-    }
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    d = init_genrand( gRandomSeed );
-    for( i = 0; i < 10; i++ )
-    {
-        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* Make sure values are positive, just so we don't have to */
-        randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff;    /* deal with overflow on the verification */
-    }
-    free_mtdata(d); d = NULL;
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
-    test_error( error, "Creating test array failed" );
-    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)10;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<10; i++)
-    {
-        if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
-        {
-            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper            streams[2];
-    size_t    threads[1], localThreads[1];
-    cl_int outputData[10];
-    int i;
-    MTdata d;
-
-    typedef struct img_pair_type
-    {
-        int A;
-        int B;
-    } image_pair_t;
-
-    image_pair_t image_pair[ 10 ];
-
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    d = init_genrand( gRandomSeed );
-    for( i = 0; i < 10; i++ )
-    {
-        image_pair[i].A = (cl_int)genrand_int32(d);
-        image_pair[i].A = (cl_int)genrand_int32(d);
-    }
-    free_mtdata(d); d = NULL;
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)10;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<10; i++)
-    {
-        if (outputData[i] != image_pair[i].A + image_pair[i].B)
-        {
-            log_error( "ERROR: Data did not verify!\n" );
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    cl_program program;
-    cl_kernel  kernel[3];
-    unsigned int kernelCount;
-
-    /* Create a test program */
-    program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
-    if( program == NULL || error != CL_SUCCESS )
-    {
-        log_error( "ERROR: Unable to create test program!\n" );
-        return -1;
-    }
-
-    /* Build */
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build test program" );
-
-    /* Try getting the kernel count */
-    error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
-    test_error( error, "Unable to get kernel count for built program" );
-    if( kernelCount != 2 )
-    {
-        log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
-        return -1;
-    }
-
-    /* Try actually getting the kernels */
-    error = clCreateKernelsInProgram( program, 2, kernel, NULL );
-    test_error( error, "Unable to get kernels for built program" );
-    clReleaseKernel( kernel[0] );
-    clReleaseKernel( kernel[1] );
-
-    clReleaseProgram( program );
-    return 0;
-}
-
-int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper            streams[2];
-    size_t    threads[1], localThreads[1];
-    cl_int outputData[10];
-    int i;
-    cl_int randomTestDataA[10];
-    MTdata d;
-
-
-    /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
-    {
-        return -1;
-    }
-
-    /* Create some I/O streams */
-    d = init_genrand( gRandomSeed );
-    for( i = 0; i < 10; i++ )
-    {
-        randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff;    /* Make sure values are positive and small, just so we don't have to */
-    }
-    free_mtdata(d); d = NULL;
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Test running the kernel and verifying it */
-    threads[0] = (size_t)10;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Kernel execution failed" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
-    test_error( error, "Unable to get result data" );
-
-    for (i=0; i<10; i++)
-    {
-        if (outputData[i] != randomTestDataA[i] + 1024)
-        {
-            log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c
+++ b/test_conformance/compatibility/test_conformance/api/test_null_buffer_arg.c
@@ -1,162 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#if defined(__APPLE__)
-#include <OpenCL/opencl.h>
-#include <OpenCL/cl_platform.h>
-#else
-#include <CL/opencl.h>
-#include <CL/cl_platform.h>
-#endif
-#include "procs.h"
-
-
-enum { SUCCESS, FAILURE };
-typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
-
-#define NITEMS 4096
-
-/* places the casted long value of the src ptr into each element of the output
- * array, to allow testing that the kernel actually _gets_ the NULL value */
-const char *kernel_string =
-"kernel void test_kernel(global float *src, global long *dst)\n"
-"{\n"
-"    uint tid = get_global_id(0);\n"
-"    dst[tid] = (long)src;\n"
-"}\n";
-
-/*
- * The guts of the test:
- * call setKernelArgs with a regular buffer, &NULL, or NULL depending on
- * the value of 'test_type'
- */
-static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
-    cl_mem test_buf, cl_mem result_buf, test_type type)
-{
-    unsigned int test_success = 0;
-
-    unsigned int i;
-    cl_int status;
-    char *typestr;
-
-    if (type == NON_NULL_PATH) {
-        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
-        typestr = "non-NULL";
-    } else if (type == ADDROF_NULL_PATH) {
-        test_buf = NULL;
-        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
-        typestr = "&NULL";
-    } else if (type == NULL_PATH) {
-        status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
-        typestr = "NULL";
-    }
-
-    log_info("Testing setKernelArgs with %s buffer.\n", typestr);
-
-    if (status != CL_SUCCESS) {
-        log_error("clSetKernelArg failed with status: %d\n", status);
-        return FAILURE; // no point in continuing *this* test
-    }
-
-    size_t global = NITEMS;
-    status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
-        NULL, 0, NULL, NULL);
-    test_error(status, "NDRangeKernel failed.");
-
-    cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
-    status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
-        sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
-    test_error(status, "ReadBuffer failed.");
-
-    // in the non-null case, we expect NONZERO values:
-    if (type == NON_NULL_PATH) {
-        for (i=0; i<NITEMS; i++) {
-            if (host_result[i] == 0) {
-                log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
-                test_success = FAILURE; break;
-            }
-        }
-
-    } else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
-        for (i=0; i<NITEMS; i++) {
-            if (host_result[i] != 0) {
-                log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
-                test_success = FAILURE; break;
-            }
-        }
-    }
-
-    free(host_result);
-
-    if (test_success == SUCCESS) {
-        log_info("\t%s ok.\n", typestr);
-    }
-
-    return test_success;
-}
-
-int test_null_buffer_arg(cl_device_id device, cl_context context,
-    cl_command_queue queue, int num_elements)
-{
-    unsigned int test_success = 0;
-    unsigned int i;
-    cl_int status;
-    cl_program program;
-    cl_kernel kernel;
-
-    // prep kernel:
-    program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
-    test_error(status, "CreateProgramWithSource failed.");
-
-    status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
-    test_error(status, "BuildProgram failed.");
-
-    kernel = clCreateKernel(program, "test_kernel", &status);
-    test_error(status, "CreateKernel failed.");
-
-    cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
-        NULL, NULL);
-
-    cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
-        NULL, NULL);
-
-    // set the destination buffer normally:
-    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
-    test_error(status, "SetKernelArg failed.");
-
-    //
-    // we test three cases:
-    //
-    // - typical case, used everyday: non-null buffer
-    // - the case of src as &NULL (the spec-compliance test)
-    // - the case of src as NULL (the backwards-compatibility test, Apple only)
-    //
-
-    test_success  = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
-    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
-
-#ifdef __APPLE__
-    test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
-#endif
-
-    // clean up:
-    if (dev_src) clReleaseMemObject(dev_src);
-    clReleaseMemObject(dev_dst);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-
-    return test_success;
-}
--- a/test_conformance/compatibility/test_conformance/api/test_retain.cpp
+++ b/test_conformance/compatibility/test_conformance/api/test_retain.cpp
@@ -1,234 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#if !defined(_WIN32)
-#include <unistd.h>
-#endif // !_WIN32
-
-// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
-// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
-// this define.
-//#define VERIFY_AFTER_RELEASE    1
-
-#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
-#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
-
-#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
-log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
-return -1;    }
-
-int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
-{
-    cl_command_queue queue;
-    cl_uint numInstances;
-    int err;
-
-
-    /* Create a test queue */
-    queue = clCreateCommandQueue( context, deviceID, 0, &err );
-    test_error( err, "Unable to create command queue to test with" );
-
-    /* Test the instance count */
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    test_error( err, "Unable to get queue instance count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 1 );
-
-    /* Now release the program */
-    clReleaseCommandQueue( queue );
-#ifdef VERIFY_AFTER_RELEASE
-    /* We're not allowed to get the instance count after the object has been completely released. But that's
-     exactly how we can tell the release worked--by making sure getting the instance count fails! */
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    if( err != CL_INVALID_COMMAND_QUEUE )
-    {
-        print_error( err, "Command queue was not properly released" );
-        return -1;
-    }
-#endif
-
-    return 0;
-}
-
-int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
-{
-    cl_command_queue queue;
-    unsigned int numInstances, i;
-    int err;
-
-
-    /* Create a test program */
-    queue = clCreateCommandQueue( context, deviceID, 0, &err );
-    test_error( err, "Unable to create command queue to test with" );
-
-    /* Increment 9 times, which should bring the count to 10 */
-    for( i = 0; i < 9; i++ )
-    {
-        clRetainCommandQueue( queue );
-    }
-
-    /* Test the instance count */
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    test_error( err, "Unable to get queue instance count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 10 );
-
-    /* Now release 5 times, which should take us to 5 */
-    for( i = 0; i < 5; i++ )
-    {
-        clReleaseCommandQueue( queue );
-    }
-
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    test_error( err, "Unable to get queue instance count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 5 );
-
-    /* Retain again three times, which should take us to 8 */
-    for( i = 0; i < 3; i++ )
-    {
-        clRetainCommandQueue( queue );
-    }
-
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    test_error( err, "Unable to get queue instance count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 8 );
-
-    /* Release 7 times, which should take it to 1 */
-    for( i = 0; i < 7; i++ )
-    {
-        clReleaseCommandQueue( queue );
-    }
-
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    test_error( err, "Unable to get queue instance count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 1 );
-
-    /* And one last one */
-    clReleaseCommandQueue( queue );
-
-#ifdef VERIFY_AFTER_RELEASE
-    /* We're not allowed to get the instance count after the object has been completely released. But that's
-     exactly how we can tell the release worked--by making sure getting the instance count fails! */
-    GET_QUEUE_INSTANCE_COUNT( queue );
-    if( err != CL_INVALID_COMMAND_QUEUE )
-    {
-        print_error( err, "Command queue was not properly released" );
-        return -1;
-    }
-#endif
-
-    return 0;
-}
-
-int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem object;
-    cl_uint numInstances;
-    int err;
-
-
-    /* Create a test object */
-    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
-    test_error( err, "Unable to create buffer to test with" );
-
-    /* Test the instance count */
-    GET_MEM_INSTANCE_COUNT( object );
-    test_error( err, "Unable to get mem object count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 1 );
-
-    /* Now release the program */
-    clReleaseMemObject( object );
-#ifdef VERIFY_AFTER_RELEASE
-    /* We're not allowed to get the instance count after the object has been completely released. But that's
-     exactly how we can tell the release worked--by making sure getting the instance count fails! */
-    GET_MEM_INSTANCE_COUNT( object );
-    if( err != CL_INVALID_MEM_OBJECT )
-    {
-        print_error( err, "Mem object was not properly released" );
-        return -1;
-    }
-#endif
-
-    return 0;
-}
-
-int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem object;
-    unsigned int numInstances, i;
-    int err;
-
-
-    /* Create a test object */
-    object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
-    test_error( err, "Unable to create buffer to test with" );
-
-    /* Increment 9 times, which should bring the count to 10 */
-    for( i = 0; i < 9; i++ )
-    {
-        clRetainMemObject( object );
-    }
-
-    /* Test the instance count */
-    GET_MEM_INSTANCE_COUNT( object );
-    test_error( err, "Unable to get mem object count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 10 );
-
-    /* Now release 5 times, which should take us to 5 */
-    for( i = 0; i < 5; i++ )
-    {
-        clReleaseMemObject( object );
-    }
-
-    GET_MEM_INSTANCE_COUNT( object );
-    test_error( err, "Unable to get mem object count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 5 );
-
-    /* Retain again three times, which should take us to 8 */
-    for( i = 0; i < 3; i++ )
-    {
-        clRetainMemObject( object );
-    }
-
-    GET_MEM_INSTANCE_COUNT( object );
-    test_error( err, "Unable to get mem object count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 8 );
-
-    /* Release 7 times, which should take it to 1 */
-    for( i = 0; i < 7; i++ )
-    {
-        clReleaseMemObject( object );
-    }
-
-    GET_MEM_INSTANCE_COUNT( object );
-    test_error( err, "Unable to get mem object count" );
-    VERIFY_INSTANCE_COUNT( numInstances, 1 );
-
-    /* And one last one */
-    clReleaseMemObject( object );
-
-#ifdef VERIFY_AFTER_RELEASE
-    /* We're not allowed to get the instance count after the object has been completely released. But that's
-     exactly how we can tell the release worked--by making sure getting the instance count fails! */
-    GET_MEM_INSTANCE_COUNT( object );
-    if( err != CL_INVALID_MEM_OBJECT )
-    {
-        print_error( err, "Mem object was not properly released" );
-        return -1;
-    }
-#endif
-
-    return 0;
-}
-
--- a/test_conformance/compatibility/test_conformance/api/test_retain_program.c
+++ b/test_conformance/compatibility/test_conformance/api/test_retain_program.c
@@ -1,109 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-
-#if !defined(_WIN32)
-#include <unistd.h>
-#endif
-
-#include "harness/compat.h"
-
-int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_program program;
-    cl_kernel kernel;
-    int error;
-    const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
-
-    /* Create a test program */
-    program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
-    test_error( error, "Unable to create program to test with" );
-
-    /* Compile the program */
-    error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
-    test_error( error, "Unable to build sample program to test with" );
-
-    /* And create a kernel from it */
-    kernel = clCreateKernel( program, "sample_test", &error );
-    test_error( error, "Unable to create kernel" );
-
-    /* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
-    clReleaseProgram( program );
-    clReleaseKernel( kernel );
-
-    /* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
-    return 0;
-}
-
-const char *sample_delay_kernel[] = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    for( int i = 0; i < 1000000; i++ ); \n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n" };
-
-int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    cl_program program;
-    cl_kernel kernel;
-    cl_mem streams[2];
-    size_t threads[1] = { 10 }, localThreadSize;
-
-
-    /* We now need an event to test. So we'll execute a kernel to get one */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
-    {
-        return -1;
-    }
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * 10, NULL, &error);
-    test_error( error, "Creating test array failed" );
-
-    /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
-    test_error( error, "Unable to calc local thread size" );
-
-
-    /* Execute the kernel */
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-    /* The kernel should still be executing, but we should still be able to release it. It's not terribly
-       useful, but we should be able to do it, if the internal refcounting is indeed correct. */
-
-    clReleaseMemObject( streams[ 1 ] );
-    clReleaseMemObject( streams[ 0 ] );
-    clReleaseKernel( kernel );
-    clReleaseProgram( program );
-
-  /* Now make sure we're really finished before we go on. */
-  error = clFinish(queue);
-  test_error( error, "Unable to finish context.");
-
-    return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/compatibility/test_conformance/basic/CMakeLists.txt
@@ -2,47 +2,20 @@ set(MODULE_NAME COMPATIBILITY_BASIC)

 set(${MODULE_NAME}_SOURCES
    main.c
-    test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
-    test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
-    test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
-    test_hiloeo.c test_local.c test_pointercast.c
-    test_if.c test_loop.c
    test_readimage.c test_readimage_int16.c test_readimage_fp32.c
    test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
-    test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
+    test_writeimage.c
    test_multireadimageonefmt.c test_multireadimagemultifmt.c
    test_imagedim.c
-    test_vloadstore.c
-    test_int2float.c test_float2int.c
-    test_createkernelsinprogram.c
-    test_hostptr.c
-    test_explicit_s2v.cpp
-    test_constant.c
    test_image_multipass.c
-    test_imagereadwrite.c test_imagereadwrite3d.c
    test_image_param.c
    test_imagenpot.c
    test_image_r8.c
-    test_barrier.c
-    test_basic_parameter_types.c
-    test_arrayreadwrite.c
    test_imagearraycopy3d.c
-    test_imagecopy.c
-    test_imagerandomcopy.c
-    test_arrayimagecopy.c
-    test_arrayimagecopy3d.c
-    test_imagecopy3d.c
-    test_enqueue_map.cpp
-    test_work_item_functions.cpp
-    test_astype.cpp
    test_async_copy.cpp
    test_sizeof.c
-    test_vec_type_hint.c
-    test_constant_source.cpp
    test_bufferreadwriterect.c
    test_async_strided_copy.cpp
-    test_kernel_memory_alignment.cpp
-    test_local_kernel_scope.cpp
 )

 set(${MODULE_NAME}_LIBS harness-compat)
--- a/test_conformance/compatibility/test_conformance/basic/main.c
+++ b/test_conformance/compatibility/test_conformance/basic/main.c
@@ -31,99 +31,33 @@ cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
 bool gTestRounding = false;

 test_definition test_list[] = {
-    ADD_TEST( hostptr ),
-    ADD_TEST( fpmath_float ),
-    ADD_TEST( fpmath_float2 ),
-    ADD_TEST( fpmath_float4 ),
-    ADD_TEST( intmath_int ),
-    ADD_TEST( intmath_int2 ),
-    ADD_TEST( intmath_int4 ),
-    ADD_TEST( intmath_long ),
-    ADD_TEST( intmath_long2 ),
-    ADD_TEST( intmath_long4 ),
-    ADD_TEST( hiloeo ),
-    ADD_TEST( if ),
    ADD_TEST( sizeof ),
-    ADD_TEST( loop ),
-    ADD_TEST( pointer_cast ),
-    ADD_TEST( local_arg_def ),
-    ADD_TEST( local_kernel_def ),
-    ADD_TEST( local_kernel_scope ),
-    ADD_TEST( constant ),
-    ADD_TEST( constant_source ),
    ADD_TEST( readimage ),
    ADD_TEST( readimage_int16 ),
    ADD_TEST( readimage_fp32 ),
    ADD_TEST( writeimage ),
-    ADD_TEST( writeimage_int16 ),
-    ADD_TEST( writeimage_fp32 ),
    ADD_TEST( mri_one ),

    ADD_TEST( mri_multiple ),
    ADD_TEST( image_r8 ),
-    ADD_TEST( barrier ),
-    ADD_TEST( int2float ),
-    ADD_TEST( float2int ),
-    ADD_TEST( imagereadwrite ),
-    ADD_TEST( imagereadwrite3d ),
    ADD_TEST( readimage3d ),
    ADD_TEST( readimage3d_int16 ),
    ADD_TEST( readimage3d_fp32 ),
    ADD_TEST( bufferreadwriterect ),
-    ADD_TEST( arrayreadwrite ),
    ADD_TEST( imagearraycopy3d ),
-    ADD_TEST( imagecopy ),
-    ADD_TEST( imagecopy3d ),
-    ADD_TEST( imagerandomcopy ),
-    ADD_TEST( arrayimagecopy ),
-    ADD_TEST( arrayimagecopy3d ),
    ADD_TEST( imagenpot ),

-    ADD_TEST( vload_global ),
-    ADD_TEST( vload_local ),
-    ADD_TEST( vload_constant ),
-    ADD_TEST( vload_private ),
-    ADD_TEST( vstore_global ),
-    ADD_TEST( vstore_local ),
-    ADD_TEST( vstore_private ),
-
-    ADD_TEST( createkernelsinprogram ),
    ADD_TEST( imagedim_pow2 ),
    ADD_TEST( imagedim_non_pow2 ),
    ADD_TEST( image_param ),
    ADD_TEST( image_multipass_integer_coord ),
    ADD_TEST( image_multipass_float_coord ),
-    ADD_TEST( explicit_s2v_bool ),
-    ADD_TEST( explicit_s2v_char ),
-    ADD_TEST( explicit_s2v_uchar ),
-    ADD_TEST( explicit_s2v_short ),
-    ADD_TEST( explicit_s2v_ushort ),
-    ADD_TEST( explicit_s2v_int ),
-    ADD_TEST( explicit_s2v_uint ),
-    ADD_TEST( explicit_s2v_long ),
-    ADD_TEST( explicit_s2v_ulong ),
-    ADD_TEST( explicit_s2v_float ),
-    ADD_TEST( explicit_s2v_double ),
-
-    ADD_TEST( enqueue_map_buffer ),
-    ADD_TEST( enqueue_map_image ),
-
-    ADD_TEST( work_item_functions ),
-
-    ADD_TEST( astype ),

    ADD_TEST( async_copy_global_to_local ),
    ADD_TEST( async_copy_local_to_global ),
    ADD_TEST( async_strided_copy_global_to_local ),
    ADD_TEST( async_strided_copy_local_to_global ),
    ADD_TEST( prefetch ),
-
-    ADD_TEST( parameter_types ),
-    ADD_TEST( vec_type_hint ),
-    ADD_TEST( kernel_memory_alignment_local ),
-    ADD_TEST( kernel_memory_alignment_global ),
-    ADD_TEST( kernel_memory_alignment_constant ),
-    ADD_TEST( kernel_memory_alignment_private ),
 };

 const int test_num = ARRAY_SIZE( test_list );
--- a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy.c
@@ -1,143 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
-{
-  cl_uchar    *bufptr, *imgptr;
-  clMemWrapper      buffer, image;
-  int        img_width = 512;
-  int        img_height = 512;
-  size_t    elem_size;
-  size_t    buffer_size;
-  int        i;
-  cl_int          err;
-  MTdata          d;
-  cl_event  copyevent;
-
-  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-
-  image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
-  test_error(err, "create_image_2d failed");
-
-  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
-  test_error(err, "clGetImageInfo failed");
-
-  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
-
-  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-  bufptr = (cl_uchar*)malloc(buffer_size);
-
-  d = init_genrand( gRandomSeed );
-  bufptr = (cl_uchar*)malloc(buffer_size);
-  for (i=0; i<(int)buffer_size; i++) {
-     bufptr[i] = (cl_uchar)genrand_int32(d);
-  }
-  free_mtdata(d); d = NULL;
-
-  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
-  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueWriteBuffer failed");
-
-  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
-  test_error(err, "clEnqueueCopyImageToBuffer failed");
-
-  imgptr = (cl_uchar*)malloc(buffer_size);
-
-  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
-    log_error( "ERROR: Results did not validate!\n" );
-    unsigned char * inchar = (unsigned char*)bufptr;
-    unsigned char * outchar = (unsigned char*)imgptr;
-    int failuresPrinted = 0;
-    int i;
-    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
-        int failed = 0;
-        int j;
-        for (j=0; j<(int)elem_size; j++)
-            if (inchar[i+j] != outchar[i+j])
-                failed = 1;
-        char values[4096];
-        values[0] = 0;
-        if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
-            int j;
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != expected [");
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
-            sprintf(values + strlen(values), "]");
-            log_error("%s\n", values);
-            failuresPrinted++;
-        }
-        if (failuresPrinted > 5) {
-            log_error("Not printing further failures...\n");
-            break;
-        }
-    }
-    err = -1;
-  }
-
-  free(bufptr);
-  free(imgptr);
-
-  if (err)
-    log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
-              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
-
-  return err;
-}
-
-int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-  cl_int          err;
-  cl_image_format *formats;
-  cl_uint         num_formats;
-  cl_uint         i;
-
-  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
-
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  for (i = 0; i < num_formats; i++) {
-    err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
-  }
-
-  if (err)
-    log_error("ARRAY to IMAGE copy test failed\n");
-  else
-    log_info("ARRAY to IMAGE copy test passed\n");
-
-  return err;
-}
--- a/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayimagecopy3d.c
@@ -1,144 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
-{
-  cl_uchar    *bufptr, *imgptr;
-  clMemWrapper      buffer, image;
-  int        img_width = 128;
-  int        img_height = 128;
-  int        img_depth = 32;
-  size_t    elem_size;
-  size_t    buffer_size;
-  int        i;
-  cl_int          err;
-  MTdata          d;
-  cl_event  copyevent;
-
-  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-
-  image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-  test_error(err, "create_image_3d failed");
-
-  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
-  test_error(err, "clGetImageInfo failed");
-
-  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
-
-  buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  buffer_size, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-  bufptr = (cl_uchar*)malloc(buffer_size);
-
-  d = init_genrand( gRandomSeed );
-  bufptr = (cl_uchar*)malloc(buffer_size);
-  for (i=0; i<(int)buffer_size; i++) {
-     bufptr[i] = (cl_uchar)genrand_int32(d);
-  }
-  free_mtdata(d); d = NULL;
-
-  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
-  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueWriteBuffer failed");
-
-  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
-  test_error(err, "clEnqueueCopyImageToBuffer failed");
-
-  imgptr = (cl_uchar*)malloc(buffer_size);
-
-  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
-    log_error( "ERROR: Results did not validate!\n" );
-    unsigned char * inchar = (unsigned char*)bufptr;
-    unsigned char * outchar = (unsigned char*)imgptr;
-    int failuresPrinted = 0;
-    int i;
-    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
-        int failed = 0;
-        int j;
-        for (j=0; j<(int)elem_size; j++)
-            if (inchar[i+j] != outchar[i+j])
-                failed = 1;
-        char values[4096];
-        values[0] = 0;
-        if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
-            int j;
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != expected [");
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
-            sprintf(values + strlen(values), "]");
-            log_error("%s\n", values);
-            failuresPrinted++;
-        }
-        if (failuresPrinted > 5) {
-            log_error("Not printing further failures...\n");
-            break;
-        }
-    }
-    err = -1;
-  }
-
-  free(bufptr);
-  free(imgptr);
-
-  if (err)
-    log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
-              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
-
-  return err;
-}
-
-int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-  cl_int          err;
-  cl_image_format *formats;
-  cl_uint         num_formats;
-  cl_uint         i;
-
-  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
-
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  for (i = 0; i < num_formats; i++) {
-    err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
-  }
-
-  if (err)
-    log_error("ARRAY to IMAGE3D copy test failed\n");
-  else
-    log_info("ARRAY to IMAGE3D copy test passed\n");
-
-  return err;
-}
--- a/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_arrayreadwrite.c
@@ -1,94 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-
-int
-test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_uint                *inptr, *outptr;
-    cl_mem              streams[1];
-    int                 num_tries = 400;
-    num_elements = 1024 * 1024 * 4;
-    int                 i, j, err;
-    MTdata              d;
-
-    inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
-    outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
-
-    // randomize data
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_uint) * num_elements, NULL, &err);
-    test_error(err, "clCreateBuffer failed");
-
-    for (i=0; i<num_tries; i++)
-    {
-        int        offset;
-        int        cb;
-
-        do {
-            offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
-            if (offset > 0 && offset < num_elements)
-                break;
-        } while (1);
-        cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
-        if (cb > (num_elements - offset))
-            cb = num_elements - offset;
-
-        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
-        test_error(err, "clEnqueueWriteBuffer failed");
-
-        err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
-        test_error(err, "clEnqueueReadBuffer failed");
-
-        for (j=offset; j<offset+cb; j++)
-        {
-            if (inptr[j] != outptr[j])
-            {
-                log_error("ARRAY read, write test failed\n");
-                err = -1;
-                break;
-            }
-        }
-
-        if (err)
-            break;
-    }
-
-    free_mtdata(d);
-    clReleaseMemObject(streams[0]);
-    free(inptr);
-    free(outptr);
-
-    if (!err)
-        log_info("ARRAY read, write test passed\n");
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_astype.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_astype.cpp
@@ -1,289 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-
-static const char *astype_kernel_pattern =
-"%s\n"
-"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s%s tmp = as_%s%s( src[ tid ] );\n"
-"   dst[ tid ] = tmp;\n"
-"}\n";
-
-static const char *astype_kernel_pattern_V3srcV3dst =
-"%s\n"
-"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
-"   vstore3(tmp,tid,dst);\n"
-"}\n";
-// in the printf, remove the third and fifth argument, each of which
-// should be a "3", when copying from the printf for astype_kernel_pattern
-
-static const char *astype_kernel_pattern_V3dst =
-"%s\n"
-"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s3 tmp = as_%s3( src[ tid ] );\n"
-"   vstore3(tmp,tid,dst);\n"
-"}\n";
-// in the printf, remove the fifth argument, which
-// should be a "3", when copying from the printf for astype_kernel_pattern
-
-
-static const char *astype_kernel_pattern_V3src =
-"%s\n"
-"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
-"{\n"
-"    int tid = get_global_id( 0 );\n"
-"    %s%s tmp = as_%s%s( vload3(tid,src) );\n"
-"   dst[ tid ] = tmp;\n"
-"}\n";
-// in the printf, remove the third argument, which
-// should be a "3", when copying from the printf for astype_kernel_pattern
-
-
-int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
-                    unsigned int vecSize, unsigned int outVecSize,
-                    int numElements )
-{
-    int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[ 2 ];
-
-    char programSrc[ 10240 ];
-    size_t threads[ 1 ], localThreads[ 1 ];
-    size_t typeSize = get_explicit_type_size( inVecType );
-    size_t outTypeSize = get_explicit_type_size(outVecType);
-    char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
-    MTdata d;
-
-
-
-    // Create program
-    if(outVecSize == 3 && vecSize == 3) {
-        // astype_kernel_pattern_V3srcV3dst
-        sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
-    } else if(outVecSize == 3) {
-        // astype_kernel_pattern_V3dst
-        sprintf( programSrc, astype_kernel_pattern_V3dst,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ),
-                get_explicit_type_name( outVecType ),
-                get_explicit_type_name( outVecType ));
-
-    } else if(vecSize == 3) {
-        // astype_kernel_pattern_V3src
-        sprintf( programSrc, astype_kernel_pattern_V3src,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
-    } else {
-        sprintf( programSrc, astype_kernel_pattern,
-                (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
-                get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
-    }
-
-    const char *ptr = programSrc;
-    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
-    test_error( error, "Unable to create testing kernel" );
-
-
-    // Create some input values
-    size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
-    char *inBuffer = (char*)malloc( inBufferSize );
-    size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
-    char *outBuffer = (char*)malloc( outBufferSize );
-
-    d = init_genrand( gRandomSeed );
-    generate_random_data( inVecType, numElements * vecSize,
-                         d, inBuffer );
-    free_mtdata(d); d = NULL;
-
-    // Create I/O streams and set arguments
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
-    test_error( error, "Unable to create I/O stream" );
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
-    test_error( error, "Unable to create I/O stream" );
-
-    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
-    test_error( error, "Unable to set kernel argument" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
-    test_error( error, "Unable to set kernel argument" );
-
-
-    // Run the kernel
-    threads[ 0 ] = numElements;
-    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
-    test_error( error, "Unable to get group size to run with" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to run kernel" );
-
-
-    // Get the results and compare
-    // The beauty is that astype is supposed to return the bit pattern as a different type, which means
-    // the output should have the exact same bit pattern as the input. No interpretation necessary!
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
-    test_error( error, "Unable to read results" );
-
-    char *expected = inBuffer;
-    char *actual = outBuffer;
-    size_t compSize = typeSize*vecSize;
-    if(outTypeSize*outVecSize < compSize) {
-        compSize = outTypeSize*outVecSize;
-    }
-
-    if(outVecSize == 4 && vecSize == 3)
-    {
-        // as_type4(vec3) should compile but produce undefined results??
-        free(inBuffer);
-        free(outBuffer);
-        return 0;
-    }
-
-    if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
-    {
-        // as_typen(vecm) should compile and run but produce
-        // implementation-defined results for m != n
-        // and n*sizeof(type) = sizeof(vecm)
-        free(inBuffer);
-        free(outBuffer);
-        return 0;
-    }
-
-    for( int i = 0; i < numElements; i++ )
-    {
-        if( memcmp( expected, actual, compSize ) != 0 )
-        {
-            char expectedString[ 1024 ], actualString[ 1024 ];
-            log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
-                      (int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
-                      GetDataVectorString( expected, typeSize, vecSize, expectedString ),
-                      GetDataVectorString( actual, typeSize, vecSize, actualString ) );
-            log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
-                      programSrc, (int)threads[0],(int) localThreads[0]);
-            free(inBuffer);
-            free(outBuffer);
-            return 1;
-        }
-        expected += typeSize * vecSize;
-        actual += outTypeSize * outVecSize;
-    }
-
-    free(inBuffer);
-    free(outBuffer);
-    return 0;
-}
-
-int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
-    // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
-    // for us to verify what is "valid". So the only thing we can test are types that match in size independent
-    // of the element count (char -> uchar, etc)
-    ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
-    size_t inTypeSize, outTypeSize;
-    int error = 0;
-
-    for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
-    {
-        inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
-
-        if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
-            continue;
-
-        if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
-            continue;
-
-        for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
-        {
-            outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
-            if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
-                continue;
-            }
-
-            if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
-                continue;
-
-            // change this check
-            if( inTypeIdx == outTypeIdx ) {
-                continue;
-            }
-
-            log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
-            fflush( stdout );
-
-            for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
-            {
-
-                for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
-                {
-                    if(vecSizes[sizeIdx]*inTypeSize !=
-                       vecSizes[outSizeIdx]*outTypeSize )
-                    {
-                        continue;
-                    }
-                    error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
-
-
-                }
-
-            }
-            if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
-               get_explicit_type_size(vecTypes[outTypeIdx])) {
-                // as_type3(vec4) allowed, as_type4(vec3) not allowed
-                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
-                error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
-            }
-
-        }
-    }
-    return error;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_barrier.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_barrier.c
@@ -1,158 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *barrier_kernel_code =
-"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
-"{\n"
-"    int  tid = get_local_id(0);\n"
-"    int  lsize = get_local_size(0);\n"
-"    int  i;\n"
-"\n"
-"    tmp_sum[tid] = 0;\n"
-"    for (i=tid; i<n; i+=lsize)\n"
-"        tmp_sum[tid] += a[i];\n"
-"     \n"
-"     // updated to work for any workgroup size \n"
-"    for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
-"    {\n"
-"        barrier(CLK_GLOBAL_MEM_FENCE);\n"
-"        if (tid + i < lsize)\n"
-"            tmp_sum[tid] += tmp_sum[tid + i];\n"
-"         lsize = i; \n"
-"    }\n"
-"\n"
-"     //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
-"    if (tid == 0)\n"
-"        *sum = tmp_sum[0];\n"
-"}\n";
-
-
-static int
-verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
-{
-  int            r = 0;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-        r += inptr[i];
-  }
-
-    if (r != outptr[0])
-    {
-        log_error("BARRIER test failed\n");
-        return -1;
-    }
-
-  log_info("BARRIER test passed\n");
-  return 0;
-}
-
-
-int
-test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem            streams[3];
-    cl_int            *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
-    cl_program        program;
-    cl_kernel        kernel;
-    size_t    global_threads[3];
-    size_t    local_threads[3];
-    int                err;
-    int                i;
-    size_t max_local_workgroup_size[3];
-    size_t max_threadgroup_size = 0;
-    MTdata d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
-    test_error(err, "Failed to build kernel/program.");
-
-    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
-                                 sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
-    test_error(err, "clGetKernelWorkgroupInfo failed.");
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
-    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
-    // Pick the minimum of the device and the kernel
-    if (max_threadgroup_size > max_local_workgroup_size[0])
-        max_threadgroup_size = max_local_workgroup_size[0];
-
-    // work group size must divide evenly into the global size
-    while( num_elements % max_threadgroup_size )
-        max_threadgroup_size--;
-
-    input_ptr = (int*)malloc(sizeof(int) * num_elements);
-    output_ptr = (int*)malloc(sizeof(int));
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, &err);
-    test_error(err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int), NULL, &err);
-    test_error(err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * max_threadgroup_size, NULL, &err);
-    test_error(err, "clCreateBuffer failed.");
-
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
-    free_mtdata(d);  d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed.");
-
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
-    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
-    err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
-    test_error(err, "clSetKernelArg failed.");
-
-    global_threads[0] = max_threadgroup_size;
-    local_threads[0] = max_threadgroup_size;
-
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
-    test_error(err, "clEnqueueNDRangeKernel failed.");
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
-    test_error(err, "clEnqueueReadBuffer failed.");
-
-    err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
-
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_basic_parameter_types.c
@@ -1,300 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-const char *kernel_code =
-"__kernel void test_kernel(\n"
-"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
-"__global float%s *result)\n"
-"{\n"
-"  result[0] = %s(c);\n"
-"  result[1] = %s(uc);\n"
-"  result[2] = %s(s);\n"
-"  result[3] = %s(us);\n"
-"  result[4] = %s(i);\n"
-"  result[5] = %s(ui);\n"
-"  result[6] = f;\n"
-"}\n";
-
-const char *kernel_code_long =
-"__kernel void test_kernel_long(\n"
-"long%s l, ulong%s ul,\n"
-"__global float%s *result)\n"
-"{\n"
-"  result[0] = %s(l);\n"
-"  result[1] = %s(ul);\n"
-"}\n";
-
-int test_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-     clMemWrapper results;
-  int error;
-  size_t global[3] = {1, 1, 1};
-  float results_back[2*16];
-  int count, index;
-  const char* types[] = { "long", "ulong" };
-  char kernel_string[8192];
-  int sizes[] = {1, 2, 4, 8, 16};
-  const char* size_strings[] = {"", "2", "4", "8", "16"};
-  float expected;
-  int total_errors = 0;
-  int size_to_test;
-  char *ptr;
-  char convert_string[1024];
-  size_t max_parameter_size;
-
-  // We don't really care about the contents since we're just testing that the types work.
-  cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
-  cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-
-  // Calculate how large our paramter size is to the kernel
-  size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
-
-  // Init our strings.
-  kernel_string[0] = '\0';
-  convert_string[0] = '\0';
-
-  // Get the maximum parameter size allowed
-  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
-
-  // Create the results buffer
-  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
-  test_error(error, "clCreateBuffer failed");
-
-  // Go over all the vector sizes
-  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
-    if (total_parameter_size > max_parameter_size) {
-      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
-               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
-      continue;
-    }
-
-    log_info("Testing vector size %d\n", sizes[size_to_test]);
-
-    // If size is > 1, then we need a explicit convert call.
-    if (sizes[size_to_test] > 1) {
-      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
-    } else {
-      sprintf(convert_string, " ");
-    }
-
-    // Build the kernel
-    sprintf(kernel_string, kernel_code_long,
-            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
-            convert_string, convert_string
-    );
-
-    ptr = kernel_string;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
-    test_error(error, "create single kernel failed");
-
-    // Set the arguments
-    for (count = 0; count < 2; count++) {
-      switch (count) {
-        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
-        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
-        default: log_error("Test error"); break;
-      }
-      if (error)
-        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
-      test_error(error, "clSetKernelArgs failed");
-    }
-    error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
-    test_error(error, "clSetKernelArgs failed");
-
-    // Execute
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-
-    // Read back the results
-    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
-    test_error(error, "clEnqueueReadBuffer failed");
-
-    // Verify the results
-    for (count = 0; count < 2; count++) {
-      for (index=0; index < sizes[size_to_test]; index++) {
-        switch (count) {
-          case 0: expected = (float)l[index]; break;
-          case 1: expected = (float)ul[index]; break;
-          default: log_error("Test error"); break;
-        }
-
-        if (results_back[count*sizes[size_to_test]+index] != expected) {
-          total_errors++;
-          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
-                    index, results_back[count*sizes[size_to_test]+index], expected);
-        }
-      }
-    }
-  }
-
-  return total_errors;
-}
-
-int test_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-  clMemWrapper results;
-  int error;
-  size_t global[3] = {1, 1, 1};
-  float results_back[7*16];
-  int count, index;
-  const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
-  char kernel_string[8192];
-  int sizes[] = {1, 2, 4, 8, 16};
-  const char* size_strings[] = {"", "2", "4", "8", "16"};
-  float expected;
-  int total_errors = 0;
-  int size_to_test;
-  char *ptr;
-  char convert_string[1024];
-  size_t max_parameter_size;
-
-  // We don't really care about the contents since we're just testing that the types work.
-  cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
-  cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-  cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
-  cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-  cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
-  cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-  cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
-
-  // Calculate how large our paramter size is to the kernel
-  size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
-  sizeof(cl_short) +sizeof(cl_ushort) +
-  sizeof(cl_int) +sizeof(cl_uint) +
-  sizeof(cl_float);
-
-  // Init our strings.
-  kernel_string[0] = '\0';
-  convert_string[0] = '\0';
-
-  // Get the maximum parameter size allowed
-  error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
-
-  // Create the results buffer
-  results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
-  test_error(error, "clCreateBuffer failed");
-
-  // Go over all the vector sizes
-  for (size_to_test = 0; size_to_test < 5; size_to_test++) {
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
-    if (total_parameter_size > max_parameter_size) {
-      log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
-               (int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
-      continue;
-    }
-
-    log_info("Testing vector size %d\n", sizes[size_to_test]);
-
-    // If size is > 1, then we need a explicit convert call.
-    if (sizes[size_to_test] > 1) {
-      sprintf(convert_string, "convert_float%s",  size_strings[size_to_test]);
-    } else {
-      sprintf(convert_string, " ");
-    }
-
-    // Build the kernel
-    sprintf(kernel_string, kernel_code,
-            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
-            size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
-            size_strings[size_to_test], size_strings[size_to_test],
-            convert_string, convert_string, convert_string,
-            convert_string, convert_string, convert_string
-    );
-
-    ptr = kernel_string;
-    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
-    test_error(error, "create single kernel failed");
-
-    // Set the arguments
-    for (count = 0; count < 7; count++) {
-      switch (count) {
-        case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
-        case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
-        case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
-        case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
-        case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
-        case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
-        case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
-        default: log_error("Test error"); break;
-      }
-      if (error)
-        log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
-      test_error(error, "clSetKernelArgs failed");
-    }
-    error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
-    test_error(error, "clSetKernelArgs failed");
-
-    // Execute
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
-    test_error(error, "clEnqueueNDRangeKernel failed");
-
-    // Read back the results
-    error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
-    test_error(error, "clEnqueueReadBuffer failed");
-
-    // Verify the results
-    for (count = 0; count < 7; count++) {
-      for (index=0; index < sizes[size_to_test]; index++) {
-        switch (count) {
-          case 0: expected = (float)c[index]; break;
-          case 1: expected = (float)uc[index]; break;
-          case 2: expected = (float)s[index]; break;
-          case 3: expected = (float)us[index]; break;
-          case 4: expected = (float)i[index]; break;
-          case 5: expected = (float)ui[index]; break;
-          case 6: expected = (float)f[index]; break;
-          default: log_error("Test error"); break;
-        }
-
-        if (results_back[count*sizes[size_to_test]+index] != expected) {
-          total_errors++;
-          log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
-                    index, results_back[count*sizes[size_to_test]+index], expected);
-        }
-      }
-    }
-  }
-
-  if (gHasLong) {
-    log_info("Testing long types...\n");
-    total_errors += test_parameter_types_long( device, context, queue, num_elements );
-  }
-  else {
-    log_info("Longs unsupported, skipping.");
-  }
-
-  return total_errors;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_constant.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_constant.c
@@ -1,275 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *constant_kernel_code =
-"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    float ftmp = tmpF[tid]; \n"
-"    float Itmp = tmpI[tid]; \n"
-"    out[tid] = ftmp * Itmp; \n"
-"}\n";
-
-const char *loop_constant_kernel_code =
-"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
-"{\n"
-"    int tid = get_global_id(0);\n"
-"    float sum = 0;\n"
-"    for (int i = 0; i < num; i++) {\n"
-"        float  pos  = i_pos[i*3];\n"
-"        sum += pos;\n"
-"    }\n"
-"    out[tid] = sum;\n"
-"}\n";
-
-
-static int
-verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
-{
-    int         i;
-
-    for (i=0; i < n; i++)
-    {
-        float f = tmpF[i] * tmpI[i];
-        if( out[i] != f )
-        {
-            log_error("CONSTANT test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("CONSTANT test passed\n");
-    return 0;
-}
-
-
-static int
-verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
-{
-    int i;
-    cl_int j;
-    for (i=0; i < n; i++)
-    {
-        float sum = 0;
-        for (j=0; j < l; ++j)
-            sum += tmp[j*3];
-
-        if( out[i] != sum )
-        {
-            log_error("loop CONSTANT test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("loop CONSTANT test passed\n");
-    return 0;
-}
-
-int
-test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem            streams[3];
-    cl_int            *tmpI;
-    cl_float        *tmpF, *out;
-    cl_program        program;
-    cl_kernel        kernel;
-    size_t    global_threads[3];
-    int                err;
-    unsigned int                i;
-    cl_ulong maxSize, maxGlobalSize, maxAllocSize;
-    size_t num_floats, num_ints, constant_values;
-    MTdata          d;
-    RoundingMode     oldRoundMode;
-    int isRTZ = 0;
-
-  /* Verify our test buffer won't be bigger than allowed */
-    err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
-    test_error( err, "Unable to get max constant buffer size" );
-
-  log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
-  
-  // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
-  err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
-  test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
-
-  if (maxSize > maxGlobalSize / 4)
-    maxSize = maxGlobalSize / 4;
-
-  err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
-  test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
-
-  if (maxSize > maxAllocSize)
-    maxSize = maxAllocSize;
-  
-  maxSize/=4;
-  num_ints = (size_t)maxSize/sizeof(cl_int);
-  num_floats = (size_t)maxSize/sizeof(cl_float);
-  if (num_ints >= num_floats) {
-    constant_values = num_floats;
-  } else {
-    constant_values = num_ints;
-  }
-
-  log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
-           constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
-
-    tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
-    tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
-    out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * constant_values, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * constant_values, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<constant_values; i++) {
-        tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
-        tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
-    if (err) {
-    log_error("Failed to create kernel and program: %d\n", err);
-    return -1;
-  }
-
-
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    global_threads[0] = constant_values;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
-        return -1;
-    }
-    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
-    {
-        oldRoundMode = set_round(kRoundTowardZero, kfloat);
-        isRTZ = 1;
-    }
-
-    err = verify(tmpF, tmpI, out, (int)constant_values);
-
-    if (isRTZ)
-        (void)set_round(oldRoundMode, kfloat);
-
-    // Loop constant buffer test
-    cl_program loop_program;
-    cl_kernel  loop_kernel;
-    cl_int limit = 2;
-
-    memset(out, 0, sizeof(cl_float) * constant_values);
-    err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
-                                      &loop_constant_kernel_code, "loop_constant_kernel" );
-    if (err) {
-        log_error("Failed to create loop kernel and program: %d\n", err);
-        return -1;
-    }
-
-    err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
-    if (err != CL_SUCCESS) {
-        log_error("clSetKernelArgs for loop kernel failed\n");
-        return -1;
-    }
-
-    err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS) {
-        log_error("clEnqueueNDRangeKernel failed: %d\n", err);
-        return -1;
-    }
-    err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
-    if (err != CL_SUCCESS) {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    clReleaseKernel(loop_kernel);
-    clReleaseProgram(loop_program);
-    free(tmpI);
-    free(tmpF);
-    free(out);
-
-    return err;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_constant_source.cpp
@@ -1,100 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *constant_source_kernel_code[] = {
-"__constant int outVal = 42;\n"
-"__constant int outIndex = 7;\n"
-"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
-"\n"
-"__kernel void constant_kernel( __global int *out )\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    if( tid == 0 )\n"
-"    {\n"
-"        out[ 0 ] = outVal;\n"
-"        out[ 1 ] = outValues[ outIndex ];\n"
-"    }\n"
-"    else\n"
-"    {\n"
-"        out[ tid + 1 ] = outValues[ tid ];\n"
-"    }\n"
-"}\n" };
-
-int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
-    clMemWrapper outStream;
-    cl_int         outValues[ 17 ];
-    cl_int         expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
-
-    cl_int        error;
-
-
-    // Create a kernel to test with
-    error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
-    test_error( error, "Unable to create testing kernel" );
-
-    // Create our output buffer
-    outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
-    test_error( error, "Unable to create output buffer" );
-
-    // Set the argument
-    error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
-    test_error( error, "Unable to set kernel argument" );
-
-    // Run test kernel
-    size_t threads[ 1 ] = { 16 };
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-    test_error( error, "Unable to enqueue kernel" );
-
-    // Read results
-    error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
-    test_error( error, "Unable to read results" );
-
-    // Verify results
-    for( int i = 0; i < 17; i++ )
-    {
-        if( expectedValues[ i ] != outValues[ i ] )
-        {
-            if( i == 0 )
-                log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
-            else if( i == 1 )
-                log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
-            else
-                log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_createkernelsinprogram.c
@@ -1,121 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *sample_single_kernel = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n"};
-
-const char *sample_double_kernel = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n"
-"__kernel void sample_test2(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n"};
-
-
-int
-test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_program        program;
-    cl_kernel        kernel[2];
-    unsigned int    num_kernels;
-    size_t            lengths[2];
-    int                err;
-
-    lengths[0] = strlen(sample_single_kernel);
-    program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
-    if (!program)
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgramExecutable failed\n");
-        return -1;
-    }
-
-    err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
-    if ( (err != CL_SUCCESS) || (num_kernels != 1) )
-    {
-        log_error("clCreateKernelsInProgram test failed for a single kernel\n");
-        return -1;
-    }
-
-    clReleaseKernel(kernel[0]);
-    clReleaseProgram(program);
-
-  lengths[0] = strlen(sample_double_kernel);
-  program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
-    if (!program)
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgramExecutable failed\n");
-        return -1;
-    }
-
-    err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
-    if ( (err != CL_SUCCESS) || (num_kernels != 2) )
-    {
-        log_error("clCreateKernelsInProgram test failed for two kernels\n");
-        return -1;
-    }
-
-  log_info("clCreateKernelsInProgram test passed\n");
-
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program);
-
-
-    return err;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_enqueue_map.cpp
@@ -1,253 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-const cl_mem_flags flag_set[] = {
-  CL_MEM_ALLOC_HOST_PTR,
-  CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_USE_HOST_PTR,
-  CL_MEM_COPY_HOST_PTR,
-  0
-};
-const char* flag_set_names[] = {
-  "CL_MEM_ALLOC_HOST_PTR",
-  "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
-  "CL_MEM_USE_HOST_PTR",
-  "CL_MEM_COPY_HOST_PTR",
-  "0"
-};
-
-int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    const size_t bufferSize = 256*256;
-    int src_flag_id;
-    MTdata d = init_genrand( gRandomSeed );
-    cl_char *initialData = (cl_char*)malloc(bufferSize);
-    cl_char *finalData = (cl_char*)malloc(bufferSize);
-
-    for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
-    {
-        clMemWrapper memObject;
-        log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-
-        generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
-
-        if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), initialData, &error);
-        else
-            memObject = clCreateBuffer(context, flag_set[src_flag_id],  bufferSize * sizeof( cl_char ), NULL, &error);
-        test_error( error, "Unable to create testing buffer" );
-
-        if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-        {
-            error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
-            test_error( error, "clEnqueueWriteBuffer failed");
-        }
-
-        for( int i = 0; i < 128; i++ )
-        {
-
-          size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
-          size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
-
-          cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
-                                                                offset, length, 0, NULL, NULL, &error );
-          if( error != CL_SUCCESS )
-          {
-            print_error( error, "clEnqueueMapBuffer call failed" );
-            log_error( "\tOffset: %d  Length: %d\n", (int)offset, (int)length );
-            free( initialData );
-            free( finalData );
-            free_mtdata(d);
-            return -1;
-          }
-
-          // Write into the region
-          for( size_t j = 0; j < length; j++ )
-          {
-            cl_char spin = (cl_char)genrand_int32( d );
-
-            // Test read AND write in one swipe
-            cl_char value = mappedRegion[ j ];
-            value = spin - value;
-            mappedRegion[ j ] = value;
-
-            // Also update the initial data array
-            value = initialData[ offset + j ];
-            value = spin - value;
-            initialData[ offset + j ] = value;
-          }
-
-          // Unmap
-          error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
-          test_error( error, "Unable to unmap buffer" );
-        }
-
-        // Final validation: read actual values of buffer and compare against our reference
-        error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
-        test_error( error, "Unable to read results" );
-
-        for( size_t q = 0; q < bufferSize; q++ )
-        {
-            if( initialData[ q ] != finalData[ q ] )
-            {
-                log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
-                free( initialData );
-                free( finalData );
-                free_mtdata(d);
-                return -1;
-            }
-        }
-    } // cl_mem flags
-
-    free( initialData );
-    free( finalData );
-    free_mtdata(d);
-
-    return 0;
-}
-
-int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
-    const size_t imageSize = 256;
-    int src_flag_id;
-    cl_uint *initialData;
-    cl_uint *finalData;
-    MTdata  d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
-
-    initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
-    finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
-
-    if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
-    {
-        log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
-        free(initialData);
-        free(finalData);
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-  for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
-    clMemWrapper memObject;
-    log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-
-    generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
-
-    if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
-      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
-                                  imageSize, imageSize, 0, initialData, &error );
-    else
-      memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
-                                  imageSize, imageSize, 0, NULL, &error );
-    test_error( error, "Unable to create testing buffer" );
-
-    if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
-      size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
-      error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
-      test_error( error, "Unable to write to testing buffer" );
-    }
-
-    for( int i = 0; i < 128; i++ )
-    {
-
-      size_t offset[3], region[3];
-      size_t rowPitch;
-
-      offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
-      region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
-      offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
-      region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
-      offset[ 2 ] = 0;
-      region[ 2 ] = 1;
-      cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
-                                                           offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
-      if( error != CL_SUCCESS )
-      {
-        print_error( error, "clEnqueueMapImage call failed" );
-        log_error( "\tOffset: %d,%d  Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
-        free(initialData);
-        free(finalData);
-        free_mtdata(d);
-        return -1;
-      }
-
-      // Write into the region
-      cl_uint *mappedPtr = mappedRegion;
-      for( size_t y = 0; y < region[ 1 ]; y++ )
-      {
-        for( size_t x = 0; x < region[ 0 ] * 4; x++ )
-        {
-          cl_int spin = (cl_int)random_in_range( 16, 1024, d );
-
-          cl_int value;
-          // Test read AND write in one swipe
-          value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
-          value = spin - value;
-          mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
-
-          // Also update the initial data array
-          value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
-          value = spin - value;
-          initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
-        }
-      }
-
-      // Unmap
-      error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
-      test_error( error, "Unable to unmap buffer" );
-    }
-
-    // Final validation: read actual values of buffer and compare against our reference
-    size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
-    error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
-    test_error( error, "Unable to read results" );
-
-    for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
-    {
-      if( initialData[ q ] != finalData[ q ] )
-      {
-        log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
-                                    (int)finalData[ q ], (int)initialData[ q ] );
-        free(initialData);
-        free(finalData);
-        free_mtdata(d);
-        return -1;
-      }
-    }
-  } // cl_mem_flags
-
-    free(initialData);
-    free(finalData);
-    free_mtdata(d);
-    return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_explicit_s2v.cpp
@@ -1,384 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
-"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n"        \
-"{\n"                                                                            \
-"    int  tid = get_global_id(0);\n"                                        \
-"    " srctype "  src = sourceValues[tid];\n"                                        \
-"\n"                                                                            \
-"    destValues[tid] = (" dsttype #size ")src;\n"                        \
-"\n"                                                                            \
-"}\n"
-
-#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
-{        \
-DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
-DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
-DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
-DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
-}
-
-#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
-
-/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
-
-#define DECLARE_S2V_IDENT_KERNELS_SET(srctype)    \
-{                                                    \
-DECLARE_S2V_IDENT_KERNELS(#srctype,bool),            \
-            DECLARE_S2V_IDENT_KERNELS(#srctype,char),            \
-            DECLARE_S2V_IDENT_KERNELS(#srctype,uchar),            \
-            DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char),    \
-DECLARE_S2V_IDENT_KERNELS(#srctype,short),            \
-DECLARE_S2V_IDENT_KERNELS(#srctype,ushort),            \
-DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short),    \
-DECLARE_S2V_IDENT_KERNELS(#srctype,int),                \
-DECLARE_S2V_IDENT_KERNELS(#srctype,uint),            \
-DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int),    \
-DECLARE_S2V_IDENT_KERNELS(#srctype,long),            \
-DECLARE_S2V_IDENT_KERNELS(#srctype,ulong),            \
-DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long),    \
-DECLARE_S2V_IDENT_KERNELS(#srctype,float),            \
-DECLARE_EMPTY                                        \
-}
-
-#define DECLARE_EMPTY_SET                \
-{                                                    \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY, \
-DECLARE_EMPTY    \
-}
-
-
-/* The overall array */
-const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
-    DECLARE_S2V_IDENT_KERNELS_SET(bool),
-    DECLARE_S2V_IDENT_KERNELS_SET(char),
-    DECLARE_S2V_IDENT_KERNELS_SET(uchar),
-    DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
-    DECLARE_S2V_IDENT_KERNELS_SET(short),
-    DECLARE_S2V_IDENT_KERNELS_SET(ushort),
-    DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
-    DECLARE_S2V_IDENT_KERNELS_SET(int),
-    DECLARE_S2V_IDENT_KERNELS_SET(uint),
-    DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
-    DECLARE_S2V_IDENT_KERNELS_SET(long),
-    DECLARE_S2V_IDENT_KERNELS_SET(ulong),
-    DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
-    DECLARE_S2V_IDENT_KERNELS_SET(float),
-    DECLARE_EMPTY_SET
-};
-
-int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
-                               ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    int error;
-    clMemWrapper streams[2];
-    void *outData;
-    unsigned char convertedData[ 8 ];    /* Max type size is 8 bytes */
-    size_t threadSize[3], groupSize[3];
-    unsigned int i, s;
-    unsigned char *inPtr, *outPtr;
-    size_t paramSize, destTypeSize;
-
-    const char* finalProgramSrc[2] = {
-        "", // optional pragma
-        programSrc
-    };
-
-    if (srcType == kDouble || destType == kDouble) {
-        finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-    }
-
-
-    if( programSrc == NULL )
-        return 0;
-
-    paramSize = get_explicit_type_size( srcType );
-    destTypeSize = get_explicit_type_size( destType );
-
-    size_t destStride = destTypeSize * vecSize;
-
-    outData = malloc( destStride * count );
-
-    if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
-    {
-        log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
-        return -1;
-    }
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
-    test_error( error, "clCreateBuffer failed");
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  destStride * count, NULL, &error);
-    test_error( error, "clCreateBuffer failed");
-
-    /* Set the arguments */
-    error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-    /* Run the kernel */
-    threadSize[0] = count;
-
-    error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-    /* Now verify the results. Each value should have been duplicated four times, and we should be able to just
-     do a memcpy instead of relying on the actual type of data */
-    error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output values!" );
-
-    inPtr = (unsigned char *)inputData;
-    outPtr = (unsigned char *)outData;
-
-    for( i = 0; i < count; i++ )
-    {
-        /* Convert the input data element to our output data type to compare against */
-        convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
-
-        /* Now compare every element of the vector */
-        for( s = 0; s < vecSize; s++ )
-        {
-            if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
-            {
-                unsigned int *p = (unsigned int *)outPtr;
-                log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
-                log_error( "       Input:   0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
-                log_error( "       Actual:  0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
-                return -1;
-            }
-        }
-        inPtr += paramSize;
-        outPtr += destStride;
-    }
-
-    free( outData );
-
-    return 0;
-}
-
-int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
-                                   unsigned int count, void *inputData )
-{
-    unsigned int sizes[] = { 2, 4, 8, 16, 0 };
-    int i, dstType, failed = 0;
-
-
-    for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
-    {
-        if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
-            continue;
-
-        if (( dstType == kLong || dstType == kULong ) && !gHasLong )
-            continue;
-
-        for( i = 0; sizes[i] != 0; i++ )
-        {
-            if( dstType != srcType )
-                continue;
-            if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
-               strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
-                continue;
-
-            if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
-                                           srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
-            {
-                log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
-                          get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
-                failed = -1;
-                break;
-            }
-        }
-    }
-
-    return failed;
-}
-
-int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
-    return 0;
-#if 0
-    bool    data[128];
-
-    generate_random_data( kBool, 128, data );
-
-    return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
-#endif
-}
-
-int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    char    data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kChar, 128, seed, data );
-
-    return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
-}
-
-int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    unsigned char    data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kUChar, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
-        return -1;
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    short            data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kShort, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    unsigned short    data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kUShort, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
-        return -1;
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int                data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kInt, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    unsigned int    data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kUInt, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
-        return -1;
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_long    data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kLong, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kLong,  128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_ulong    data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kULong, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kULong,  128, data ) != 0 )
-        return -1;
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    float            data[128];
-    RandomSeed seed(gRandomSeed);
-
-    generate_random_data( kFloat, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-
-int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    double            data[128];
-    RandomSeed seed(gRandomSeed);
-
-    if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
-        log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
-        return 0;
-    }
-
-    generate_random_data( kDouble, 128, seed, data );
-
-    if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
-        return -1;
-    return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_float2int.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_float2int.c
@@ -1,160 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *float2int_kernel_code =
-"__kernel void test_float2int(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (int)src[tid];\n"
-"\n"
-"}\n";
-
-
-int
-verify_float2int(cl_float *inptr, cl_int *outptr, int n)
-{
-  int     i;
-
-  for (i=0; i<n; i++)
-  {
-    if (outptr[i] != (int)inptr[i])
-    {
-      log_error("FLOAT2INT test failed\n");
-      return -1;
-    }
-  }
-
-  log_info("FLOAT2INT test passed\n");
-  return 0;
-}
-
-
-int
-test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem            streams[2];
-    cl_float        *input_ptr;
-    cl_int          *output_ptr;
-    cl_program        program;
-    cl_kernel        kernel;
-    void            *values[2];
-    size_t            lengths[1];
-    size_t    threads[1];
-    int                err;
-    int                i;
-    MTdata          d;
-
-    input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    lengths[0] = strlen(float2int_kernel_code);
-    program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
-    if (!program)
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgramExecutable failed\n");
-        return -1;
-    }
-
-    kernel = clCreateKernel(program, "test_float2int", NULL);
-    if (!kernel)
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    err = verify_float2int(input_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float.c
@@ -1,270 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "harness/rounding_mode.h"
-
-#include "procs.h"
-
-const char *fpadd_kernel_code =
-"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *fpsub_kernel_code =
-"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *fpmul_kernel_code =
-"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-
-static const float    MAX_ERR = 1e-5f;
-
-int
-verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_ADD float test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_ADD float test passed\n");
-    return 0;
-}
-
-int
-verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_SUB float test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_SUB float test passed\n");
-    return 0;
-}
-
-int
-verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_MUL float test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_MUL float test passed\n");
-    return 0;
-}
-
-
-int
-test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[3];
-    cl_kernel kernel[3];
-
-    float *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-    size_t length = sizeof(cl_float) * num_elements;
-    int isRTZ = 0;
-    RoundingMode oldMode = kDefaultRoundingMode;
-
-    // check for floating point capabilities
-    cl_device_fp_config single_config = 0;
-    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
-    if (err) {
-      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
-      test_finish();
-      return -1;
-    }
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
-    {
-        //Check to make sure we are an embedded device
-        char profile[32];
-        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-        if( err )
-        {
-            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
-              test_finish();
-              return -1;
-        }
-        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
-        {
-            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
-            test_finish();
-            return -1;
-        }
-
-        isRTZ = 1;
-        oldMode = get_round();
-    }
-
-
-    input_ptr[0] = (cl_float*)malloc(length);
-    input_ptr[1] = (cl_float*)malloc(length);
-    input_ptr[2] = (cl_float*)malloc(length);
-    output_ptr   = (cl_float*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    test_error( err, "clEnqueueWriteBuffer failed.");
-
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    test_error( err, "clEnqueueWriteBuffer failed.");
-
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    test_error( err, "clEnqueueWriteBuffer failed.");
-
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
-    test_error( err, "create_single_kernel_helper failed");
-
-
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    threads[0] = (unsigned int)num_elements;
-    for (i=0; i<3; i++)
-    {
-        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-        test_error( err, "clEnqueueNDRangeKernel failed.");
-
-        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-        test_error( err, "clEnqueueReadBuffer failed.");
-
-        if( isRTZ )
-            set_round( kRoundTowardZero, kfloat );
-
-        switch (i)
-        {
-            case 0:
-                err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-                break;
-            case 1:
-                err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-                break;
-            case 2:
-                err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-                break;
-        }
-
-        if( isRTZ )
-            set_round( oldMode, kfloat );
-
-        if (err)
-            break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<3; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    free_mtdata( d );
-
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float2.c
@@ -1,268 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "harness/rounding_mode.h"
-
-
-#include "procs.h"
-
-const char *fpadd2_kernel_code =
-"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *fpsub2_kernel_code =
-"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *fpmul2_kernel_code =
-"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-
-int
-verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_ADD float2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_ADD float2 test passed\n");
-    return 0;
-}
-
-int
-verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_SUB float2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_SUB float2 test passed\n");
-    return 0;
-}
-
-int
-verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_MUL float2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_MUL float2 test passed\n");
-    return 0;
-}
-
-
-int
-test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[3];
-    cl_kernel kernel[3];
-
-    cl_float *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_float) * 2 * num_elements;
-    int isRTZ = 0;
-    RoundingMode oldMode = kDefaultRoundingMode;
-
-    // check for floating point capabilities
-    cl_device_fp_config single_config = 0;
-    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
-    if (err) {
-      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
-      test_finish();
-      return -1;
-    }
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
-    {
-        //Check to make sure we are an embedded device
-        char profile[32];
-        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-        if( err )
-        {
-            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
-              test_finish();
-              return -1;
-        }
-        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
-        {
-            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
-            test_finish();
-            return -1;
-        }
-
-        isRTZ = 1;
-        oldMode = get_round();
-    }
-
-    input_ptr[0] = (cl_float*)malloc(length);
-    input_ptr[1] = (cl_float*)malloc(length);
-    input_ptr[2] = (cl_float*)malloc(length);
-    output_ptr   = (cl_float*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
-    test_error( err, "create_single_kernel_helper failed");
-
-
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-    free_mtdata(d);
-    d = NULL;
-
-    threads[0] = (unsigned int)num_elements;
-    for (i=0; i<3; i++)
-    {
-        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-      test_error( err, "clEnqueueNDRangeKernel failed.");
-
-        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-      test_error( err, "clEnqueueReadBuffer failed.");
-
-        if( isRTZ )
-            set_round( kRoundTowardZero, kfloat );
-
-        switch (i)
-        {
-            case 0:
-                err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
-                break;
-            case 1:
-                err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
-                break;
-            case 2:
-                err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
-                break;
-        }
-
-        if( isRTZ )
-            set_round( oldMode, kfloat );
-
-        if (err)
-            break;
-    }
-
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<3; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_fpmath_float4.c
@@ -1,269 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/rounding_mode.h"
-
-const char *fpadd4_kernel_code =
-"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *fpsub4_kernel_code =
-"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *fpmul4_kernel_code =
-"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-
-int
-verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_ADD float4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_ADD float4 test passed\n");
-    return 0;
-}
-
-int
-verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_SUB float4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_SUB float4 test passed\n");
-    return 0;
-}
-
-int
-verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
-{
-  float       r;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    r = inptrA[i] * inptrB[i];
-    if (r != outptr[i])
-    {
-      log_error("FP_MUL float4 test failed\n");
-      return -1;
-    }
-  }
-
-  log_info("FP_MUL float4 test passed\n");
-  return 0;
-}
-
-
-int
-test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[3];
-    cl_kernel kernel[3];
-
-    cl_float *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_float) * 4 * num_elements;
-    int isRTZ = 0;
-    RoundingMode oldMode = kDefaultRoundingMode;
-
-    // check for floating point capabilities
-    cl_device_fp_config single_config = 0;
-    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
-    if (err) {
-      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
-      test_finish();
-      return -1;
-    }
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
-    {
-        //Check to make sure we are an embedded device
-        char profile[32];
-        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-        if( err )
-        {
-            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
-              test_finish();
-              return -1;
-        }
-        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
-        {
-            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
-            test_finish();
-            return -1;
-        }
-
-        isRTZ = 1;
-        oldMode = get_round();
-    }
-
-    input_ptr[0] = (cl_float*)malloc(length);
-    input_ptr[1] = (cl_float*)malloc(length);
-    input_ptr[2] = (cl_float*)malloc(length);
-    output_ptr   = (cl_float*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-
-    free_mtdata(d);
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
-    test_error( err, "create_single_kernel_helper failed");
-
-
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<3; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    test_error( err, "clEnqueueNDRangeKernel failed.");
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    test_error( err, "clEnqueueReadBuffer failed.");
-
-    if( isRTZ )
-        set_round( kRoundTowardZero, kfloat );
-
-    switch (i)
-    {
-      case 0:
-        err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
-        break;
-      case 1:
-        err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
-        break;
-      case 2:
-        err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
-        break;
-    }
-
-    if( isRTZ )
-        set_round( oldMode, kfloat );
-
-    if (err)
-      break;
-    }
-
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<3; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_hiloeo.c
@@ -1,421 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
-int lo_offset( int index, int vectorSize) { return index; }
-int even_offset( int index, int vectorSize ) { return index * 2; }
-int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
-
-typedef int (*OffsetFunc)( int index, int vectorSize );
-static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
-typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
-static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
-static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
-
-static const unsigned int vector_sizes[] =     { 1, 2, 3, 4, 8, 16};
-static const unsigned int vector_aligns[] =    { 1, 2, 4, 4, 8, 16};
-static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
-// if input is size vector_sizes[i], output is size
-// vector_sizes[out_vector_idx[i]]
-// input type name is strcat(gentype, vector_size_names[i]);
-// and output type name is
-// strcat(gentype, vector_size_names[out_vector_idx[i]]);
-static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
-    -1,-1,-1,-1,-1,-1,-1,5};
-static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
-
-static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
-static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
-
-int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_int *input_ptr, *output_ptr, *p;
-    int err;
-    cl_uint i;
-    int hasDouble = is_extension_available( device, "cl_khr_fp64" );
-    cl_uint vectorSize, operatorToUse;
-    cl_uint type;
-    MTdata d;
-
-    int expressionMode;
-    int numExpressionModes = 2;
-
-    size_t length = sizeof(cl_int) * 4 * n_elems;
-
-    input_ptr   = (cl_int*)malloc(length);
-    output_ptr  = (cl_int*)malloc(length);
-
-    p = input_ptr;
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<4 * (cl_uint) n_elems; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
-    {
-        // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
-        size_t elementCount = length / kSizes[type];
-        cl_mem streams[2];
-
-        // skip double if unavailable
-        if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
-            continue;
-
-        if( !gHasLong &&
-            ( 0 == strcmp( test_str_names[type], "long" )) &&
-            ( 0 == strcmp( test_str_names[type], "ulong" )))
-            continue;
-
-        log_info( "%s", test_str_names[type] );
-        fflush( stdout );
-
-        // Set up data streams for the type
-        streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-        if (!streams[0])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-        streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-        if (!streams[1])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-
-        err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueWriteBuffer failed\n");
-            return -1;
-        }
-
-        for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
-        {
-            log_info( " %s", operatorToUse_names[ operatorToUse ] );
-            fflush( stdout );
-            for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
-                for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
-
-                    cl_program program = NULL;
-                    cl_kernel kernel = NULL;
-                    cl_uint outVectorSize = out_vector_idx[vectorSize];
-                    char expression[1024];
-
-                    const char *source[] = {
-                        "", // optional pragma string
-                        "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
-                        "(__global ", test_str_names[type], vector_size_names[vectorSize],
-                        " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
-                        " *dst)\n"
-                        "{\n"
-                        "    int  tid = get_global_id(0);\n"
-                        "\n"
-                        "    ", test_str_names[type],
-                        vector_size_names[out_vector_idx[vectorSize]],
-                        " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
-                        "    dst[tid] = tmp;\n"
-                        "}\n"
-                    };
-
-                    if(expressionMode == 0) {
-                        sprintf(expression, "srcA[tid]");
-                    } else if(expressionMode == 1) {
-                        switch(vector_sizes[vectorSize]) {
-                            case 16:
-                                sprintf(expression,
-                                        "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 8:
-                                sprintf(expression,
-                                        "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 4:
-                                sprintf(expression,
-                                        "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 3:
-                                sprintf(expression,
-                                        "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            case 2:
-                                sprintf(expression,
-                                        "((%s2)(srcA[tid].s0, srcA[tid].s1))",
-                                        test_str_names[type]
-                                        );
-                                break;
-                            default :
-                                sprintf(expression, "srcA[tid]");
-                                log_info("Default\n");
-                        }
-                    } else {
-                        sprintf(expression, "srcA[tid]");
-                    }
-
-                    if (0 == strcmp( test_str_names[type], "double" ))
-                        source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-
-                    char kernelName[128];
-                    snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
-                    err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
-                    if (err)
-                        return -1;
-
-                    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-                    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clSetKernelArgs failed\n");
-                        return -1;
-                    }
-
-                    //Wipe the output buffer clean
-                    uint32_t pattern = 0xdeadbeef;
-                    memset_pattern4( output_ptr, &pattern, length );
-                    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueWriteBuffer failed\n");
-                        return -1;
-                    }
-
-                    size_t size = elementCount / (vector_aligns[vectorSize]);
-                    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueNDRangeKernel failed\n");
-                        return -1;
-                    }
-
-                    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-                    if (err != CL_SUCCESS)
-                    {
-                        log_error("clEnqueueReadBuffer failed\n");
-                        return -1;
-                    }
-
-                    char *inP = (char *)input_ptr;
-                    char *outP = (char *)output_ptr;
-                    outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
-                                            ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
-                    // was                outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
-                    for( size_t e = 0; e < size; e++ )
-                    {
-                        if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
-
-                            log_info("e is %d\n", (int)e);
-                            fflush(stdout);
-                            // break;
-                            return -1;
-                        }
-                        inP += kSizes[type] * ( vector_aligns[vectorSize] );
-                        outP += kSizes[type] * ( vector_aligns[outVectorSize] );
-                    }
-
-                    clReleaseKernel( kernel );
-                    clReleaseProgram( program );
-                    log_info( "." );
-                    fflush( stdout );
-                }
-            }
-        }
-
-        clReleaseMemObject( streams[0] );
-        clReleaseMemObject( streams[1] );
-        log_info( "done\n" );
-    }
-
-    log_info("HiLoEO test passed\n");
-
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
-{
-    cl_ulong  array[8];
-    void *p = array;
-    size_t halfVectorSize  = vector_sizes[out_vector_idx[vectorSize]];
-    size_t cmpVectorSize =  vector_sizes[out_vector_idx[vectorSize]];
-    // was 1 << (vectorSize-1);
-    OffsetFunc f = offsetFuncs[ operatorToUse ];
-    size_t elementSize =  kSizes[type];
-
-    if(vector_size_names[vectorSize][0] == '3') {
-        if(operatorToUse_names[operatorToUse][0] == 'h' ||
-           operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
-        {
-            cmpVectorSize = 1; // special case for vec3 ignored values
-        }
-    }
-
-    switch( elementSize )
-    {
-        case 1:
-        {
-            char *i = (char*)in;
-            char *o = (char*)out;
-            size_t j;
-            cl_uint k;
-            OffsetFunc f = offsetFuncs[ operatorToUse ];
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                char *o2 = (char*)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
-                {
-                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-                    for( j = 1; j < halfVectorSize * 2; j++ )
-                        log_info( ", %d", i[j] );
-                    log_info( " } --> { %d", o[0] );
-                    for( j = 1; j < halfVectorSize; j++ )
-                        log_info( ", %d", o[j] );
-                    log_info( " }\n" );
-                    return -1;
-                }
-                i += 2 * halfVectorSize;
-                o += halfVectorSize;
-            }
-        }
-            break;
-
-        case 2:
-        {
-            short *i = (short*)in;
-            short *o = (short*)out;
-            size_t j;
-            cl_uint k;
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                short *o2 = (short*)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
-                {
-                    log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-                    for( j = 1; j < halfVectorSize * 2; j++ )
-                        log_info( ", %d", i[j] );
-                    log_info( " } --> { %d", o[0] );
-                    for( j = 1; j < halfVectorSize; j++ )
-                        log_info( ", %d", o[j] );
-                    log_info( " }\n" );
-                    return -1;
-                }
-                i += 2 * halfVectorSize;
-                o += halfVectorSize;
-            }
-        }
-            break;
-
-        case 4:
-        {
-            int *i = (int*)in;
-            int *o = (int*)out;
-            size_t j;
-            cl_uint k;
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                int *o2 = (int *)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                for( j = 0; j < cmpVectorSize; j++ )
-        {
-            /* Allow float nans to be binary different */
-            if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
-            {
-                log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-            for( j = 1; j < halfVectorSize * 2; j++ )
-                log_info( ", 0x%8.8x", i[j] );
-            log_info( " } --> { 0x%8.8x", o[0] );
-            for( j = 1; j < halfVectorSize; j++ )
-                log_info( ", 0x%8.8x", o[j] );
-            log_info( " }\n" );
-            return -1;
-            }
-        }
-        i += 2 * halfVectorSize;
-        o += halfVectorSize;
-            }
-        }
-            break;
-
-        case 8:
-        {
-            cl_ulong *i = (cl_ulong*)in;
-            cl_ulong *o = (cl_ulong*)out;
-            size_t j;
-            cl_uint k;
-
-            for( k = 0; k  < elementCount; k++ )
-            {
-                cl_ulong *o2 = (cl_ulong*)p;
-                for( j = 0; j < halfVectorSize; j++ )
-                    o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
-                if( memcmp( o, o2, elementSize * cmpVectorSize ) )
-                {
-                    log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
-                    for( j = 1; j < halfVectorSize * 2; j++ )
-                        log_info( ", 0x%16.16llx", i[j] );
-                    log_info( " } --> { 0x%16.16llx", o[0] );
-                    for( j = 1; j < halfVectorSize; j++ )
-                        log_info( ", 0x%16.16llx", o[j] );
-                    log_info( " }\n" );
-                    return -1;
-                }
-                i += 2 * halfVectorSize;
-                o += halfVectorSize;
-            }
-        }
-            break;
-
-        default:
-            log_info( "Internal error. Unknown data type\n" );
-            return -2;
-    }
-
-    return 0;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_hostptr.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_hostptr.c
@@ -1,276 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *hostptr_kernel_code =
-"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-static const float    MAX_ERR = 1e-5f;
-
-static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
-{
-    cl_float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static void make_random_data(unsigned count, float *ptr, MTdata d)
-{
-    cl_uint     i;
-    for (i=0; i<count; i++)
-        ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
-}
-
-static unsigned char *
-generate_rgba8_image(int w, int h, MTdata d)
-{
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned char)genrand_int32(d);
-
-    return ptr;
-}
-
-static unsigned char *
-randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
-{
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned char)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-int
-test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_float            *input_ptr[2], *output_ptr;
-    cl_program            program;
-    cl_kernel           kernel;
-    size_t              threads[3]={0,0,0};
-    cl_image_format     img_format;
-    cl_uchar            *rgba8_inptr, *rgba8_outptr;
-    void                *lock_buffer;
-    int                 img_width = 512;
-    int                 img_height = 512;
-    cl_int              err;
-    MTdata              d;
-    RoundingMode        oldRoundMode;
-    int                    isRTZ = 0;
-
-    // Block to mark deletion of streams before deletion of host_ptr
-    {
-        clMemWrapper        streams[7];
-
-        PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-        // Alloc buffers
-        input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-
-        d = init_genrand( gRandomSeed );
-        rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
-        rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
-
-        // Random data
-        make_random_data(num_elements, input_ptr[0], d);
-        make_random_data(num_elements, input_ptr[1], d);
-
-        // Create host-side input
-        streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
-        test_error(err, "clCreateBuffer 0 failed");
-
-        // Create a copied input
-        streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
-        test_error(err, "clCreateBuffer 1 failed");
-
-        // Create a host-side output
-        streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
-        test_error(err, "clCreateBuffer 2 failed");
-
-        // Create a host-side input
-        img_format.image_channel_order = CL_RGBA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
-        test_error(err, "create_image_2d 3 failed");
-
-        // Create a copied input
-        img_format.image_channel_order = CL_RGBA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
-        test_error(err, "create_image_2d 4 failed");
-
-        // Create a host-side output
-        img_format.image_channel_order = CL_RGBA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
-        test_error(err, "create_image_2d 5 failed");
-
-        // Create a copied output
-        img_format.image_channel_data_type = CL_RGBA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
-        test_error(err, "create_image_2d 6 failed");
-
-        err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
-        test_error(err, "create_single_kernel_helper failed");
-
-        // Execute kernel
-        err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-        err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-        err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
-        test_error(err, "clSetKernelArg failed");
-
-        threads[0] = (size_t)num_elements;
-        err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-        test_error(err, "clEnqueueNDRangeKernel failed");
-
-        cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
-        test_error( err, "clEnqueueMapBuffer failed" );
-
-        //If we only support rtz mode
-        if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
-        {
-            oldRoundMode = set_round(kRoundTowardZero, kfloat);
-            isRTZ = 1;
-        }
-
-        if (isRTZ)
-            oldRoundMode = set_round(kRoundTowardZero, kfloat);
-
-        // Verify that we got the expected results back on the host side
-        err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
-        if (err)
-        {
-            log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
-                      "and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
-        } else {
-            log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
-                     "and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
-        }
-
-        if (isRTZ)
-            set_round(oldRoundMode, kfloat);
-
-        err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
-        test_error( err, "clEnqueueUnmapMemObject failed" );
-
-        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
-        randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
-        free_mtdata(d); d = NULL;
-
-        // Copy from host-side to host-side
-        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
-        err = clEnqueueCopyImage(queue, streams[3], streams[5],
-                                 origin, origin, region,  0, NULL, NULL);
-        test_error(err, "clEnqueueCopyImage failed");
-        log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
-
-        // test the lock buffer interface
-        log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
-        size_t row_pitch;
-        lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
-                                        CL_MAP_READ, origin, region,
-                                        &row_pitch, NULL,
-                                        0, NULL, NULL, &err);
-        test_error(err, "clEnqueueMapImage failed");
-
-        err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
-        if (err != CL_SUCCESS)
-        {
-            log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
-            return -1;
-        }
-        log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
-
-        err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
-        test_error(err, "clEnqueueUnmapMemObject failed");
-
-        // Copy host-side to device-side and read back
-        log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
-        err = clEnqueueCopyImage(queue, streams[3], streams[5],
-                                 origin, origin, region,
-                                 0, NULL, NULL);
-        test_error(err, "clEnqueueCopyImage failed");
-
-        err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
-        test_error(err, "clEnqueueReadImage failed");
-
-        err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
-        if (err != CL_SUCCESS)
-        {
-            log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
-            return -1;
-        }
-        log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
-    }
-    // cleanup
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    free(rgba8_inptr);
-    free(rgba8_outptr);
-
-    return err;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_if.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_if.c
@@ -1,165 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *conditional_kernel_code =
-"__kernel void test_if(__global int *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    if (src[tid] == 0)\n"
-"        dst[tid] = 0x12345678;\n"
-"    else if (src[tid] == 1)\n"
-"        dst[tid] = 0x23456781;\n"
-"    else if (src[tid] == 2)\n"
-"        dst[tid] = 0x34567812;\n"
-"    else if (src[tid] == 3)\n"
-"        dst[tid] = 0x45678123;\n"
-"    else if (src[tid] == 4)\n"
-"        dst[tid] = 0x56781234;\n"
-"    else if (src[tid] == 5)\n"
-"        dst[tid] = 0x67812345;\n"
-"    else if (src[tid] == 6)\n"
-"        dst[tid] = 0x78123456;\n"
-"    else if (src[tid] == 7)\n"
-"        dst[tid] = 0x81234567;\n"
-"    else\n"
-"        dst[tid] = 0x7FFFFFFF;\n"
-"\n"
-"}\n";
-
-const int results[] = {
-    0x12345678,
-    0x23456781,
-    0x34567812,
-    0x45678123,
-    0x56781234,
-    0x67812345,
-    0x78123456,
-    0x81234567,
-};
-
-int
-verify_if(int *inptr, int *outptr, int n)
-{
-    int     r, i;
-
-    for (i=0; i<n; i++)
-    {
-        if (inptr[i] <= 7)
-            r = results[inptr[i]];
-        else
-            r = 0x7FFFFFFF;
-
-        if (r != outptr[i])
-        {
-            log_error("IF test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("IF test passed\n");
-    return 0;
-}
-
-int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_int *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_int) * num_elements;
-    input_ptr  = (cl_int*)malloc(length);
-    output_ptr = (cl_int*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (int)get_random_float(0, 32, d);
-
-    free_mtdata(d); d = NULL;
-
-  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueWriteBuffer failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
-  if (err)
-    return -1;
-
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (unsigned int)num_elements;
-  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueNDRangeKernel failed\n");
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clReadArray failed\n");
-    return -1;
-  }
-
-  err = verify_if(input_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_imagecopy.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagecopy.c
@@ -1,234 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static unsigned char *
-generate_rgba8_image(int w, int h, MTdata d)
-{
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned char)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-
-static unsigned short *
-generate_rgba16_image(int w, int h, MTdata d)
-{
-    unsigned short    *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned short)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-
-static float *
-generate_rgbafp_image(int w, int h, MTdata d)
-{
-    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-    return ptr;
-}
-
-static int
-verify_rgbafp_image(float *image, float *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-
-int
-test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_image_format    img_format;
-    unsigned char    *rgba8_inptr, *rgba8_outptr;
-    unsigned short    *rgba16_inptr, *rgba16_outptr;
-    float            *rgbafp_inptr, *rgbafp_outptr;
-    clMemWrapper            streams[6];
-    int                img_width = 512;
-    int                img_height = 512;
-    int                i, err;
-    MTdata          d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
-    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
-    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
-    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
-    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    for (i=0; i<3; i++)
-    {
-        void    *p, *outp;
-        int        x, y, delta_w = img_width/8, delta_h = img_height/16;
-
-        switch (i)
-        {
-            case 0:
-                p = (void *)rgba8_inptr;
-                outp = (void *)rgba8_outptr;
-        log_info("Testing CL_RGBA CL_UNORM_INT8\n");
-                break;
-            case 1:
-                p = (void *)rgba16_inptr;
-                outp = (void *)rgba16_outptr;
-        log_info("Testing CL_RGBA CL_UNORM_INT16\n");
-                break;
-            case 2:
-                p = (void *)rgbafp_inptr;
-                outp = (void *)rgbafp_outptr;
-        log_info("Testing CL_RGBA CL_FLOAT\n");
-                break;
-        }
-
-        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
-        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
-        test_error(err, "create_image_2d failed");
-
-        int copy_number = 0;
-        for (y=0; y<img_height; y+=delta_h)
-        {
-            for (x=0; x<img_width; x+=delta_w)
-            {
-        copy_number++;
-        size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
-        err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
-                                 copy_origin, copy_origin, copy_region,
-                                 0, NULL, NULL);
-        if (err) {
-          log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
-        }
-        test_error(err, "clEnqueueCopyImage failed");
-            }
-        }
-
-        err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
-        test_error(err, "clEnqueueReadImage failed");
-
-        switch (i)
-        {
-            case 0:
-                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
-                break;
-            case 1:
-                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
-                break;
-            case 2:
-                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-  free(rgba8_inptr);
-  free(rgba16_inptr);
-  free(rgbafp_inptr);
-  free(rgba8_outptr);
-  free(rgba16_outptr);
-  free(rgbafp_outptr);
-
-    if (err)
-        log_error("IMAGE copy test failed\n");
-    else
-        log_info("IMAGE copy test passed\n");
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_imagecopy3d.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagecopy3d.c
@@ -1,237 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static unsigned char *
-generate_uint8_image(unsigned num_elements, MTdata d)
-{
-    unsigned char *ptr = (unsigned char*)malloc(num_elements);
-    unsigned i;
-
-    for (i=0; i<num_elements; i++)
-        ptr[i] = (unsigned char)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
-{
-    unsigned i;
-
-    for (i=0; i<num_elements; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-
-static unsigned short *
-generate_uint16_image(unsigned num_elements, MTdata d)
-{
-    unsigned short    *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
-    unsigned i;
-
-    for (i=0; i<num_elements; i++)
-        ptr[i] = (unsigned short)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
-{
-    unsigned i;
-
-    for (i=0; i<num_elements; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-
-static float *
-generate_float_image(unsigned num_elements, MTdata d)
-{
-    float   *ptr = (float*)malloc(num_elements * sizeof(float));
-    unsigned i;
-
-    for (i=0; i<num_elements; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-    return ptr;
-}
-
-static int
-verify_float_image(float *image, float *outptr, unsigned num_elements)
-{
-    unsigned i;
-
-    for (i=0; i<num_elements; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-
-int
-test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
-{
-    cl_image_format    img_format;
-    unsigned char    *rgba8_inptr, *rgba8_outptr;
-    unsigned short *rgba16_inptr, *rgba16_outptr;
-    float *rgbafp_inptr, *rgbafp_outptr;
-    clMemWrapper streams[6];
-    int img_width = 128;
-    int img_height = 128;
-    int img_depth = 64;
-    int i;
-    cl_int        err;
-    unsigned    num_elements = img_width * img_height * img_depth * 4;
-    MTdata      d;
-
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
-    rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
-    rgbafp_inptr = (float *)generate_float_image(num_elements, d);
-    free_mtdata(d); d = NULL;
-
-    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
-    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
-    rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-    streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-    streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-    for (i=0; i<3; i++)
-    {
-        void    *p, *outp;
-        int        x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
-
-        switch (i)
-        {
-            case 0:
-                p = (void *)rgba8_inptr;
-                outp = (void *)rgba8_outptr;
-                break;
-            case 1:
-                p = (void *)rgba16_inptr;
-                outp = (void *)rgba16_outptr;
-                break;
-            case 2:
-                p = (void *)rgbafp_inptr;
-                outp = (void *)rgbafp_outptr;
-                break;
-        }
-
-        size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
-        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
-        test_error(err, "clEnqueueWriteImage failed");
-
-        for (z=0; z<img_depth; z+=delta_d)
-        {
-            for (y=0; y<img_height; y+=delta_h)
-            {
-                for (x=0; x<img_width; x+=delta_w)
-                {
-                  origin[0] = x; origin[1] = y; origin[2] = z;
-                  region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
-
-                  err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
-                  test_error(err, "clEnqueueCopyImage failed");
-                }
-            }
-        }
-
-        origin[0] = 0; origin[1] = 0; origin[2] = 0;
-        region[0] = img_width; region[1] = img_height; region[2] = img_depth;
-        err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
-        test_error(err, "clEnqueueReadImage failed");
-
-        switch (i)
-        {
-            case 0:
-                err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
-        if (err) log_error("Failed uint8\n");
-                break;
-            case 1:
-                err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
-        if (err) log_error("Failed uint16\n");
-                break;
-            case 2:
-                err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
-        if (err) log_error("Failed float\n");
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-  free(rgba8_inptr);
-  free(rgba16_inptr);
-  free(rgbafp_inptr);
-  free(rgba8_outptr);
-  free(rgba16_outptr);
-  free(rgbafp_outptr);
-
-    if (err)
-        log_error("IMAGE3D copy test failed\n");
-    else
-        log_info("IMAGE3D copy test passed\n");
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_imagerandomcopy.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagerandomcopy.c
@@ -1,269 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static unsigned char *
-generate_rgba8_image(int w, int h, MTdata d)
-{
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned char)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, int w, int h, int img_width)
-{
-    int     i, j, indx;
-
-    for (j=y; j<(y+h); j++)
-    {
-        indx = j*img_width*4;
-        for (i=x*4; i<(x+w)*4; i++)
-        {
-            if (outptr[indx+i] != image[indx+i])
-                return -1;
-        }
-    }
-    return 0;
-}
-
-
-static unsigned short *
-generate_rgba16_image(int w, int h, MTdata d)
-{
-    unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned short)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y, int w, int h, int img_width)
-{
-    int     i, j, indx;
-
-    for (j=y; j<(y+h); j++)
-    {
-        indx = j*img_width*4;
-        for (i=x*4; i<(x+w)*4; i++)
-        {
-            if (outptr[indx+i] != image[indx+i])
-                return -1;
-        }
-    }
-    return 0;
-}
-
-
-static float *
-generate_rgbafp_image(int w, int h, MTdata d)
-{
-    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-    return ptr;
-}
-
-static int
-verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int img_width)
-{
-    int     i, j, indx;
-
-    for (j=y; j<(y+h); j++)
-    {
-        indx = j*img_width*4;
-        for (i=x*4; i<(x+w)*4; i++)
-        {
-            if (outptr[indx+i] != image[indx+i])
-                return -1;
-        }
-    }
-    return 0;
-}
-
-
-#define NUM_COPIES    10
-static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
-
-int
-test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_image_format    img_format;
-    unsigned char    *rgba8_inptr, *rgba8_outptr;
-    unsigned short    *rgba16_inptr, *rgba16_outptr;
-    float            *rgbafp_inptr, *rgbafp_outptr;
-    clMemWrapper            streams[6];
-    int                img_width = 512;
-    int                img_height = 512;
-    int                i, j;
-    cl_int          err;
-    MTdata          d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    log_info("Testing with image %d x %d.\n", img_width, img_height);
-
-    d = init_genrand( gRandomSeed );
-    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
-    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
-    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
-
-    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
-    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
-    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-    streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-    streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    for (i=0; i<3; i++)
-    {
-        void            *p, *outp;
-        unsigned int    x[2], y[2], delta_w, delta_h ;
-
-        switch (i)
-        {
-            case 0:
-                p = (void *)rgba8_inptr;
-                outp = (void *)rgba8_outptr;
-                break;
-            case 1:
-                p = (void *)rgba16_inptr;
-                outp = (void *)rgba16_outptr;
-                break;
-            case 2:
-                p = (void *)rgbafp_inptr;
-                outp = (void *)rgbafp_outptr;
-                break;
-        }
-
-        size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
-        err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
-//        err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
-        test_error(err, "clEnqueueWriteImage failed");
-
-        for (j=0; j<NUM_COPIES; j++)
-        {
-            x[0] = (int)get_random_float(0, img_width, d);
-            do
-            {
-                x[1] = (int)get_random_float(0, img_width, d);
-            } while (x[1] <= x[0]);
-
-            y[0] = (int)get_random_float(0, img_height, d);
-            do
-            {
-                y[1] = (int)get_random_float(0, img_height, d);
-            } while (y[1] <= y[0]);
-
-            delta_w = x[1] - x[0];
-            delta_h = y[1] - y[0];
-            log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
-            origin[0] = x[0];
-            origin[1] = y[0];
-            origin[2] = 0;
-            region[0] = delta_w;
-            region[1] = delta_h;
-            region[2] = 1;
-            err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
-//          err = clCopyImage(context, streams[i*2], streams[i*2+1],
-//                              x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
-            test_error(err, "clEnqueueCopyImage failed");
-
-            origin[0] = 0;
-            origin[1] = 0;
-            origin[2] = 0;
-            region[0] = img_width;
-            region[1] = img_height;
-            region[2] = 1;
-            err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
-//            err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
-            test_error(err, "clEnqueueReadImage failed");
-
-            switch (i)
-            {
-                case 0:
-                    err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
-                    break;
-                case 1:
-                    err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
-                    break;
-                case 2:
-                    err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
-                    break;
-            }
-
-            if (err)
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-    free_mtdata(d); d = NULL;
-    free(rgba8_inptr);
-    free(rgba16_inptr);
-    free(rgbafp_inptr);
-    free(rgba8_outptr);
-    free(rgba16_outptr);
-    free(rgbafp_outptr);
-
-    if (err)
-        log_error("IMAGE random copy test failed\n");
-    else
-        log_info("IMAGE random copy test passed\n");
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite.c
@@ -1,417 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static unsigned char *
-generate_rgba8_image(int w, int h, MTdata d)
-{
-  unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
-  int             i;
-
-  for (i=0; i<w*h*4; i++)
-    ptr[i] = (unsigned char)genrand_int32(d);
-
-  return ptr;
-}
-
-static void
-update_rgba8_image(unsigned char *p, int x, int y, int w, int h, int img_width, MTdata d)
-{
-    int        i, j, indx;
-
-    for (i=y; i<y+h; i++)
-    {
-        indx = (i * img_width + x) * 4;
-        for (j=x; j<x+w; j++,indx+=4)
-        {
-            p[indx+0] = (unsigned char)genrand_int32(d);
-            p[indx+1] = (unsigned char)genrand_int32(d);
-            p[indx+2] = (unsigned char)genrand_int32(d);
-            p[indx+3] = (unsigned char)genrand_int32(d);
-        }
-    }
-}
-
-static void
-update_image_from_image(void *out, void *in, int x, int y, int w, int h, int img_width, int elem_size)
-{
-    int        i, j, k, out_indx, in_indx;
-    in_indx = 0;
-
-    for (i=y; i<y+h; i++)
-    {
-        out_indx = (i * img_width + x) * elem_size;
-        for (j=x; j<x+w; j++,out_indx+=elem_size)
-        {
-            for (k=0; k<elem_size; k++)
-            {
-                ((char*)out)[out_indx + k] = ((char*)in)[in_indx];
-                in_indx++;
-            }
-        }
-    }
-}
-
-static int
-verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
-{
-  int     i;
-
-  for (i=0; i<w*h*4; i++)
-  {
-    if (outptr[i] != image[i])
-    {
-        log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
-        return -1;
-    }
-  }
-
-  return 0;
-}
-
-
-static unsigned short *
-generate_rgba16_image(int w, int h, MTdata d)
-{
-  unsigned short    *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
-  int             i;
-
-  for (i=0; i<w*h*4; i++)
-    ptr[i] = (unsigned short)genrand_int32(d);
-
-  return ptr;
-}
-
-static void
-update_rgba16_image(unsigned short *p, int x, int y, int w, int h, int img_width, MTdata d)
-{
-    int        i, j, indx;
-
-    for (i=y; i<y+h; i++)
-    {
-        indx = (i * img_width + x) * 4;
-        for (j=x; j<x+w; j++,indx+=4)
-        {
-            p[indx+0] = (unsigned short)genrand_int32(d);
-            p[indx+1] = (unsigned short)genrand_int32(d);
-            p[indx+2] = (unsigned short)genrand_int32(d);
-            p[indx+3] = (unsigned short)genrand_int32(d);
-        }
-    }
-}
-
-static int
-verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h)
-{
-  int     i;
-
-  for (i=0; i<w*h*4; i++)
-  {
-    if (outptr[i] != image[i])
-    {
-        log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
-        return -1;
-    }
-  }
-
-  return 0;
-}
-
-
-static float *
-generate_rgbafp_image(int w, int h, MTdata d)
-{
-  float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
-  int     i;
-
-  for (i=0; i<w*h*4; i++)
-    ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-  return ptr;
-}
-
-static void
-update_rgbafp_image(float *p, int x, int y, int w, int h, int img_width, MTdata d)
-{
-    int        i, j, indx;
-
-    for (i=y; i<y+h; i++)
-    {
-        indx = (i * img_width + x) * 4;
-        for (j=x; j<x+w; j++,indx+=4)
-        {
-            p[indx+0] = get_random_float(-0x40000000, 0x40000000, d);
-            p[indx+1] = get_random_float(-0x40000000, 0x40000000, d);
-            p[indx+2] = get_random_float(-0x40000000, 0x40000000, d);
-            p[indx+3] = get_random_float(-0x40000000, 0x40000000, d);
-        }
-    }
-}
-
-static int
-verify_rgbafp_image(float *image, float *outptr, int w, int h)
-{
-  int     i;
-
-  for (i=0; i<w*h*4; i++)
-  {
-    if (outptr[i] != image[i])
-    {
-        log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
-        return -1;
-    }
-  }
-
-  return 0;
-}
-
-
-int
-test_imagereadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_image_format    img_format;
-    unsigned char    *rgba8_inptr, *rgba8_outptr;
-    unsigned short    *rgba16_inptr, *rgba16_outptr;
-    float            *rgbafp_inptr, *rgbafp_outptr;
-    clMemWrapper            streams[3];
-    int                img_width = 512;
-    int                img_height = 512;
-    int                num_tries = 200;
-    int                i, j, err;
-    MTdata          d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
-    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
-    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
-
-    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
-    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
-    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  &img_format, img_width, img_height, 0, NULL, &err);
-    test_error(err, "create_image_2d failed");
-
-    for (i=0; i<3; i++)
-    {
-        void    *p;
-
-        if (i == 0)
-            p = (void *)rgba8_inptr;
-        else if (i == 1)
-            p = (void *)rgba16_inptr;
-        else
-            p = (void *)rgbafp_inptr;
-        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
-        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
-                              origin, region, 0, 0,
-                              p, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clWriteImage2D failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0,j=0; i<num_tries*3; i++,j++)
-    {
-        int        x = (int)get_random_float(0, img_width, d);
-        int        y = (int)get_random_float(0, img_height, d);
-        int        w = (int)get_random_float(1, (img_width - x), d);
-        int        h = (int)get_random_float(1, (img_height - y), d);
-        size_t    input_pitch;
-        int     set_input_pitch = (int)(genrand_int32(d) & 0x01);
-        int     packed_update = (int)(genrand_int32(d) & 0x01);
-        void    *p, *outp;
-        int        elem_size;
-
-        if (j == 3)
-            j = 0;
-
-        switch (j)
-        {
-            case 0:
-                //if ((w<=10) || (h<=10)) continue;
-                elem_size = 4;
-                if(packed_update)
-                {
-                    p = generate_rgba8_image(w, h, d);
-                    update_image_from_image(rgba8_inptr, p, x, y, w, h, img_width, elem_size);
-                }
-                else
-                {
-                    update_rgba8_image(rgba8_inptr, x, y, w, h, img_width, d);
-                    p = (void *)(rgba8_inptr + ((y * img_width + x) * 4));
-                }
-                outp = (void *)rgba8_outptr;
-                break;
-            case 1:
-                //if ((w<=8) || (h<=8)) continue;
-                elem_size = 2*4;
-                if(packed_update)
-                {
-                    p = generate_rgba16_image(w, h, d);
-                    update_image_from_image(rgba16_inptr, p, x, y, w, h, img_width, elem_size);
-                }
-                else
-                {
-                    update_rgba16_image(rgba16_inptr, x, y, w, h, img_width, d);
-                    p = (void *)(rgba16_inptr + ((y * img_width + x) * 4));
-                }
-                outp = (void *)rgba16_outptr;
-                break;
-            case 2:
-                //if ((w<=8) || (h<=8)) continue;
-                elem_size = 4*4;
-                if(packed_update)
-                {
-                    p = generate_rgbafp_image(w, h, d);
-                    update_image_from_image(rgbafp_inptr, p, x, y, w, h, img_width, elem_size);
-                }
-                else
-                {
-                    update_rgbafp_image(rgbafp_inptr, x, y, w, h, img_width, d);
-                    p = (void *)(rgbafp_inptr + ((y * img_width + x) * 4));
-                }
-                outp = (void *)rgbafp_outptr;
-                break;
-        }
-
-        const char* update_packed_pitch_name = "";
-        if(packed_update)
-        {
-            if(set_input_pitch)
-            {
-                // for packed updates the pitch does not need to be calculated here (but can be)
-                update_packed_pitch_name = "'packed with pitch'";
-                input_pitch = w*elem_size;
-            }
-            else
-            {
-                // for packed updates the pitch does not need to be calculated here
-                update_packed_pitch_name = "'packed without pitch'";
-                input_pitch = 0;
-            }
-        }
-        else
-        {
-            // for unpacked updates the pitch is required
-            update_packed_pitch_name = "'unpacked with pitch'";
-            input_pitch = img_width*elem_size;
-        }
-
-        size_t origin[3] = {x,y,0}, region[3] = {w, h, 1};
-        err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
-                              origin, region, input_pitch, 0, p,
-                              0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clWriteImage update failed for %s %s: %d\n",
-                (packed_update) ? "packed" : "unpacked",
-                (set_input_pitch) ? "set pitch" : "unset pitch", err);
-            free_mtdata(d);
-            return -1;
-        }
-
-        if(packed_update)
-        {
-            free(p);
-            p = NULL;
-        }
-
-        memset(outp, 0x7, img_width*img_height*elem_size);
-
-        origin[0]=0; origin[1]=0; origin[2]=0;
-        region[0]=img_width; region[1]=img_height; region[2]=1;
-        err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
-                             origin, region, 0,0,
-                             outp, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clReadImage failed\n");
-            free_mtdata(d);
-            return -1;
-        }
-
-        switch (j)
-        {
-            case 0:
-                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
-                if (err)
-                {
-                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
-                    log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
-                }
-                break;
-            case 1:
-                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
-                if (err)
-                {
-                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
-                    log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
-                }
-                break;
-            case 2:
-                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
-                if (err)
-                {
-                    log_error("x=%d y=%d w=%d h=%d, pitch=%d, try=%d\n", x, y, w, h, (int)input_pitch, (int)i);
-                    log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
-                }
-                break;
-        }
-
-        if (err) break;
-    }
-
-    free_mtdata(d);
-    free(rgba8_inptr);
-    free(rgba16_inptr);
-    free(rgbafp_inptr);
-    free(rgba8_outptr);
-    free(rgba16_outptr);
-    free(rgbafp_outptr);
-
-    if (!err)
-        log_info("IMAGE read, write test passed\n");
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite3d.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_imagereadwrite3d.c
@@ -1,417 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static unsigned char *
-generate_rgba8_image(int w, int h, int d, MTdata mtData)
-{
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * d *4);
-    int             i;
-
-    for (i=0; i<w*h*d*4; i++)
-            ptr[i] = (unsigned char)genrand_int32(mtData);
-
-    return ptr;
-}
-
-static void
-update_rgba8_image(unsigned char *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
-{
-    int        i, j, k, indx;
-    int        img_slice = img_width * img_height;
-
-    for (k=z; k<z+d; k++)
-        for (j=y; j<y+h; j++)
-        {
-            indx = (k * img_slice + j * img_width + x) * 4;
-            for (i=x; i<x+w; i++,indx+=4)
-            {
-                p[indx+0] = (unsigned char)genrand_int32(mtData);
-                p[indx+1] = (unsigned char)genrand_int32(mtData);
-                p[indx+2] = (unsigned char)genrand_int32(mtData);
-                p[indx+3] = (unsigned char)genrand_int32(mtData);
-            }
-        }
-}
-
-static void
-update_image_from_image(void *out, void *in, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, int elem_size)
-{
-    int        i, j, k, elem, out_indx, in_indx;
-    int        img_slice = img_width * img_height;
-    in_indx = 0;
-
-    for (k=z; k<z+d; k++)
-        for (j=y; j<y+h; j++)
-        {
-            out_indx = (k * img_slice + j * img_width + x) * elem_size;
-            for (i=x; i<x+w; i++,out_indx+=elem_size)
-            {
-                for (elem=0; elem<elem_size; elem++)
-                {
-                    ((char*)out)[out_indx + elem] = ((char*)in)[in_indx];
-                    in_indx++;
-                }
-            }
-        }
-}
-
-static int
-verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h, int d)
-{
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-
-static unsigned short *
-generate_rgba16_image(int w, int h, int d, MTdata mtData)
-{
-    unsigned short    *ptr = (unsigned short*)malloc(w * h * d * 4 * sizeof(unsigned short));
-    int             i;
-
-    for (i=0; i<w*h*d*4; i++)
-            ptr[i] = (unsigned short)genrand_int32(mtData);
-
-    return ptr;
-}
-
-static void
-update_rgba16_image(unsigned short *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
-{
-    int        i, j, k, indx;
-    int        img_slice = img_width * img_height;
-
-    for (k=z; k<z+d; k++)
-        for (j=y; j<y+h; j++)
-    {
-        indx = (k * img_slice + j * img_width + x) * 4;
-        for (i=x; i<x+w; i++,indx+=4)
-        {
-            p[indx+0] = (unsigned short)genrand_int32(mtData);
-            p[indx+1] = (unsigned short)genrand_int32(mtData);
-            p[indx+2] = (unsigned short)genrand_int32(mtData);
-            p[indx+3] = (unsigned short)genrand_int32(mtData);
-        }
-    }
-}
-
-static int
-verify_rgba16_image(unsigned short *image, unsigned short *outptr, int w, int h, int d)
-{
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("i = %d. Expected (%d %d %d %d), got (%d %d %d %d)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-
-static float *
-generate_rgbafp_image(int w, int h, int d, MTdata mtData)
-{
-    float   *ptr = (float*)malloc(w * h * d *4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-            ptr[i] = get_random_float(-0x40000000, 0x40000000, mtData);
-
-    return ptr;
-}
-
-static void
-update_rgbafp_image(float *p, int x, int y, int z, int w, int h, int d, int img_width, int img_height, int img_depth, MTdata mtData)
-{
-    int        i, j, k, indx;
-    int        img_slice = img_width * img_height;
-
-    for (k=z; k<z+d; k++)
-        for (j=y; j<y+h; j++)
-        {
-            indx = (k * img_slice + j * img_width + x) * 4;
-            for (i=x; i<x+w; i++,indx+=4)
-            {
-                p[indx+0] = get_random_float(-0x40000000, 0x40000000, mtData);
-                p[indx+1] = get_random_float(-0x40000000, 0x40000000, mtData);
-                p[indx+2] = get_random_float(-0x40000000, 0x40000000, mtData);
-                p[indx+3] = get_random_float(-0x40000000, 0x40000000, mtData);
-            }
-        }
-}
-
-static int
-verify_rgbafp_image(float *image, float *outptr, int w, int h, int d)
-{
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("i = %d. Expected (%f %f %f %f), got (%f %f %f %f)\n", i, image[i], image[i+1], image[i+2], image[i+3], outptr[i], outptr[i+1], outptr[i+2], outptr[i+3]);
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-
-int
-test_imagereadwrite3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_image_format    img_format;
-    unsigned char    *rgba8_inptr, *rgba8_outptr;
-    unsigned short *rgba16_inptr, *rgba16_outptr;
-    float            *rgbafp_inptr, *rgbafp_outptr;
-    clMemWrapper    streams[3];
-    int       img_width = 64;
-    int       img_height = 64;
-    int       img_depth = 32;
-    int       img_slice = img_width * img_height;
-    int       num_tries = 30;
-    int       i, j, err;
-    MTdata      mtData;
-
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-    mtData = init_genrand( gRandomSeed );
-    rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, img_depth, mtData);
-    rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, img_depth, mtData);
-    rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, img_depth, mtData);
-
-    rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height * img_depth);
-    rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height * img_depth);
-    rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height * img_depth);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-    for (i=0; i<3; i++)
-    {
-        void    *p;
-
-        if (i == 0)
-            p = (void *)rgba8_inptr;
-        else if (i == 1)
-            p = (void *)rgba16_inptr;
-        else
-            p = (void *)rgbafp_inptr;
-
-        size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, img_depth};
-        err = clEnqueueWriteImage(queue, streams[i], CL_TRUE,
-                                  origin, region, 0, 0,
-                                  p,
-                                  0, NULL, NULL);
-        test_error(err, "clEnqueueWriteImage failed");
-    }
-
-    for (i=0,j=0; i<num_tries*3; i++,j++)
-    {
-        int        x = (int)get_random_float(0, (float)img_width - 1, mtData);
-        int        y = (int)get_random_float(0, (float)img_height - 1, mtData);
-        int        z = (int)get_random_float(0, (float)img_depth - 1, mtData);
-        int        w = (int)get_random_float(1, (float)(img_width - x), mtData);
-        int        h = (int)get_random_float(1, (float)(img_height - y), mtData);
-        int        d = (int)get_random_float(1, (float)(img_depth - z), mtData);
-        size_t    input_pitch, input_slice_pitch;
-        int     set_input_pitch = (int)(genrand_int32(mtData) & 0x01);
-        int     packed_update = (int)(genrand_int32(mtData) & 0x01);
-        void    *p, *outp;
-        int        elem_size;
-
-        if (j == 3)
-            j = 0;
-
-        // packed: the source image for the write is a whole image                                                                                                                                                                                                                                                      .
-        // unpacked: the source image for the write is a subset within a larger image
-        switch (j)
-        {
-            case 0:
-                elem_size = 4;
-                if(packed_update)
-                {
-                    p = generate_rgba8_image(w, h, d, mtData);
-                    update_image_from_image(rgba8_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
-                }
-                else
-                {
-                    update_rgba8_image(rgba8_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
-                    p = (void *)(rgba8_inptr + ((z * img_slice + y * img_width + x) * 4));
-                }
-                outp = (void *)rgba8_outptr;
-                break;
-            case 1:
-                elem_size = 2*4;
-                if(packed_update)
-                {
-                    p = generate_rgba16_image(w, h, d, mtData);
-                    update_image_from_image(rgba16_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
-                }
-                else
-                {
-                    update_rgba16_image(rgba16_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
-                    p = (void *)(rgba16_inptr + ((z * img_slice + y * img_width + x) * 4));
-                }
-                outp = (void *)rgba16_outptr;
-                break;
-            case 2:
-                elem_size = 4*4;
-                if(packed_update)
-                {
-                    p = generate_rgbafp_image(w, h, d, mtData);
-                    update_image_from_image(rgbafp_inptr, p, x, y, z, w, h, d, img_width, img_height, img_depth, elem_size);
-                }
-                else
-                {
-                    update_rgbafp_image(rgbafp_inptr, x, y, z, w, h, d, img_width, img_height, img_depth, mtData);
-                    p = (void *)(rgbafp_inptr + ((z * img_slice + y * img_width + x) * 4));
-                }
-                outp = (void *)rgbafp_outptr;
-                break;
-        }
-
-        const char* update_packed_pitch_name = "";
-        if(packed_update)
-        {
-            if(set_input_pitch)
-            {
-                // for packed updates the pitch does not need to be calculated here (but can be)
-                update_packed_pitch_name = "'packed with pitch'";
-                input_pitch = w*elem_size;
-                input_slice_pitch = w*h*elem_size;
-            }
-            else
-            {
-                // for packed updates the pitch does not need to be calculated here
-                update_packed_pitch_name = "'packed without pitch'";
-                input_pitch = 0;
-                input_slice_pitch = 0;
-            }
-        }
-        else
-        {
-            // for unpacked updates the pitch is required
-            update_packed_pitch_name = "'unpacked with pitch'";
-            input_pitch = img_width*elem_size;
-            input_slice_pitch = input_pitch*img_height;
-        }
-
-    size_t origin[3] = {x,y,z}, region[3] = {w, h, d};
-        err = clEnqueueWriteImage(queue, streams[j], CL_TRUE,
-                              origin, region, input_pitch, input_slice_pitch,
-                              p, 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteImage failed");
-
-        if(packed_update)
-        {
-            free(p);
-            p = NULL;
-        }
-
-        memset(outp, 0x7, img_width*img_height*img_depth*elem_size);
-
-    origin[0]=0; origin[1]=0; origin[2]=0; region[0]=img_width; region[1]=img_height; region[2]=img_depth;
-        err = clEnqueueReadImage(queue, streams[j], CL_TRUE,
-                             origin, region, 0, 0,
-                             outp, 0, NULL, NULL);
-    test_error(err, "clEnqueueReadImage failed");
-
-        switch (j)
-        {
-            case 0:
-                err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height, img_depth);
-                if (err)
-                {
-                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
-                    log_error("IMAGE RGBA8 read, write %s test failed\n", update_packed_pitch_name);
-                }
-                break;
-            case 1:
-                err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height, img_depth);
-                if (err)
-                {
-                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
-                    log_error("IMAGE RGBA16 read, write %s test failed\n", update_packed_pitch_name);
-                }
-                break;
-            case 2:
-                err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height, img_depth);
-                if (err)
-                {
-                    log_error("x=%d y=%d z=%d w=%d h=%d d=%d pitch=%d, slice_pitch=%d, try=%d\n", x, y, z, w, h, d, (int)input_pitch, (int)input_slice_pitch, (int)i);
-                    log_error("IMAGE RGBA FP read, write %s test failed\n", update_packed_pitch_name);
-                }
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-    free_mtdata(mtData);
-    free(rgba8_inptr);
-    free(rgba16_inptr);
-    free(rgbafp_inptr);
-    free(rgba8_outptr);
-    free(rgba16_outptr);
-    free(rgbafp_outptr);
-
-    if (!err)
-        log_info("IMAGE read, write test passed\n");
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_int2float.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_int2float.c
@@ -1,159 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *int2float_kernel_code =
-"__kernel void test_int2float(__global int *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = (float)src[tid];\n"
-"\n"
-"}\n";
-
-
-int
-verify_int2float(cl_int *inptr, cl_float *outptr, int n)
-{
-    int     i;
-
-    for (i=0; i<n; i++)
-    {
-        if (outptr[i] != (float)inptr[i])
-        {
-            log_error("INT2FLOAT test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT2FLOAT test passed\n");
-    return 0;
-}
-
-int
-test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem            streams[2];
-    cl_int            *input_ptr;
-    cl_float        *output_ptr;
-    cl_program        program;
-    cl_kernel        kernel;
-    void            *values[2];
-    size_t            lengths[1];
-    size_t    threads[1];
-    int                err;
-    int                i;
-    MTdata          d;
-
-    input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    lengths[0] = strlen(int2float_kernel_code);
-    program = clCreateProgramWithSource(context, 1, &int2float_kernel_code, lengths, NULL);
-    if (!program)
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgramExecutable failed\n");
-        return -1;
-    }
-
-    kernel = clCreateKernel(program, "test_int2float", NULL);
-    if (!kernel)
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    err = verify_int2float(input_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_intmath_int.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_int.c
@@ -1,389 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *int_add_kernel_code =
-"__kernel void test_int_add(__global int *srcA, __global int *srcB, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *int_sub_kernel_code =
-"__kernel void test_int_sub(__global int *srcA, __global int *srcB, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *int_mul_kernel_code =
-"__kernel void test_int_mul(__global int *srcA, __global int *srcB, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-const char *int_mad_kernel_code =
-"__kernel void test_int_mad(__global int *srcA, __global int *srcB, __global int *srcC, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
-"}\n";
-
-static const float    MAX_ERR = 1e-5f;
-
-int
-verify_int_add(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_ADD int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_ADD int test passed\n");
-    return 0;
-}
-
-int
-verify_int_sub(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_SUB int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_SUB int test passed\n");
-    return 0;
-}
-
-int
-verify_int_mul(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_MUL int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_MUL int test passed\n");
-    return 0;
-}
-
-int
-verify_int_mad(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i] + inptrC[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_MAD int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_MAD int test passed\n");
-    return 0;
-}
-
-int
-test_intmath_int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[4];
-    cl_kernel kernel[4];
-
-    cl_int *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_int) * num_elements;
-
-    input_ptr[0] = (cl_int*)malloc(length);
-    input_ptr[1] = (cl_int*)malloc(length);
-    input_ptr[2] = (cl_int*)malloc(length);
-    output_ptr   = (cl_int*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-        p[i] = (int)genrand_int32(d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        p[i] = (int)genrand_int32(d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements; i++)
-        p[i] = (int)genrand_int32(d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    program[0] = clCreateProgramWithSource(context, 1, &int_add_kernel_code, NULL, NULL);
-    if (!program[0])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[0] = clCreateKernel(program[0], "test_int_add", NULL);
-    if (!kernel[0])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[1] = clCreateProgramWithSource(context, 1, &int_sub_kernel_code, NULL, NULL);
-    if (!program[1])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[1] = clCreateKernel(program[1], "test_int_sub", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[2] = clCreateProgramWithSource(context, 1, &int_mul_kernel_code, NULL, NULL);
-    if (!program[2])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[2] = clCreateKernel(program[2], "test_int_mul", NULL);
-    if (!kernel[2])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[3] = clCreateProgramWithSource(context, 1, &int_mad_kernel_code, NULL, NULL);
-    if (!program[3])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[3] = clCreateKernel(program[3], "test_int_mad", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<4; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        err = verify_int_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 1:
-        err = verify_int_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 2:
-        err = verify_int_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 3:
-        err = verify_int_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-        break;
-    }
-    if (err)
-      break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<4; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_intmath_int2.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_int2.c
@@ -1,388 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *int_add2_kernel_code =
-"__kernel void test_int_add2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *int_sub2_kernel_code =
-"__kernel void test_int_sub2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *int_mul2_kernel_code =
-"__kernel void test_int_mul2(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-const char *int_mad2_kernel_code =
-"__kernel void test_int_mad2(__global int2 *srcA, __global int2 *srcB, __global int2 *srcC, __global int2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
-"}\n";
-
-int
-verify_int_add2(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_ADD int2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_ADD int2 test passed\n");
-    return 0;
-}
-
-int
-verify_int_sub2(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_SUB int2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_SUB int2 test passed\n");
-    return 0;
-}
-
-int
-verify_int_mul2(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_MUL int2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_MUL int2 test passed\n");
-    return 0;
-}
-
-int
-verify_int_mad2(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i] + inptrC[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_MAD int2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_MAD int2 test passed\n");
-    return 0;
-}
-
-int
-test_intmath_int2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[4];
-    cl_kernel kernel[4];
-
-    cl_int *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_int) * 2 * num_elements;
-
-    input_ptr[0] = (cl_int*)malloc(length);
-    input_ptr[1] = (cl_int*)malloc(length);
-    input_ptr[2] = (cl_int*)malloc(length);
-    output_ptr   = (cl_int*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = (int)genrand_int32(d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = (int)genrand_int32(d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = (int)genrand_int32(d);
-
-    free_mtdata( d );
-    d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    program[0] = clCreateProgramWithSource(context, 1, &int_add2_kernel_code, NULL, NULL);
-    if (!program[0])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[0] = clCreateKernel(program[0], "test_int_add2", NULL);
-    if (!kernel[0])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[1] = clCreateProgramWithSource(context, 1, &int_sub2_kernel_code, NULL, NULL);
-    if (!program[1])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[1] = clCreateKernel(program[1], "test_int_sub2", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[2] = clCreateProgramWithSource(context, 1, &int_mul2_kernel_code, NULL, NULL);
-    if (!program[2])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[2] = clCreateKernel(program[2], "test_int_mul2", NULL);
-    if (!kernel[2])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[3] = clCreateProgramWithSource(context, 1, &int_mad2_kernel_code, NULL, NULL);
-    if (!program[3])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[3] = clCreateKernel(program[3], "test_int_mad2", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<4; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        err = verify_int_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 1:
-        err = verify_int_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 2:
-        err = verify_int_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 3:
-        err = verify_int_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-        break;
-    }
-    if (err)
-      break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<4; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_intmath_int4.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_int4.c
@@ -1,387 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *int_add4_kernel_code =
-"__kernel void test_int_add4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *int_sub4_kernel_code =
-"__kernel void test_int_sub4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *int_mul4_kernel_code =
-"__kernel void test_int_mul4(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-const char *int_mad4_kernel_code =
-"__kernel void test_int_mad4(__global int4 *srcA, __global int4 *srcB, __global int4 *srcC, __global int4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
-"}\n";
-
-int
-verify_int_add4(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_ADD int4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_ADD int4 test passed\n");
-    return 0;
-}
-
-int
-verify_int_sub4(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_SUB int4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_SUB int4 test passed\n");
-    return 0;
-}
-
-int
-verify_int_mul4(int *inptrA, int *inptrB, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_MUL int4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_MUL int4 test passed\n");
-    return 0;
-}
-
-int
-verify_int_mad4(int *inptrA, int *inptrB, int *inptrC, int *outptr, int n)
-{
-    int            r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i] + inptrC[i];
-        if (r != outptr[i])
-        {
-            log_error("INT_MAD int4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("INT_MAD int4 test passed\n");
-    return 0;
-}
-
-int
-test_intmath_int4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[4];
-    cl_kernel kernel[4];
-
-    cl_int *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_int) * 4 * num_elements;
-
-    input_ptr[0] = (cl_int*)malloc(length);
-    input_ptr[1] = (cl_int*)malloc(length);
-    input_ptr[2] = (cl_int*)malloc(length);
-    output_ptr   = (cl_int*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = (int)genrand_int32(d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = (int)genrand_int32(d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = (int)genrand_int32(d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    program[0] = clCreateProgramWithSource(context, 1, &int_add4_kernel_code, NULL, NULL);
-    if (!program[0])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[0] = clCreateKernel(program[0], "test_int_add4", NULL);
-    if (!kernel[0])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[1] = clCreateProgramWithSource(context, 1, &int_sub4_kernel_code, NULL, NULL);
-    if (!program[1])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[1] = clCreateKernel(program[1], "test_int_sub4", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[2] = clCreateProgramWithSource(context, 1, &int_mul4_kernel_code, NULL, NULL);
-    if (!program[2])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[2] = clCreateKernel(program[2], "test_int_mul4", NULL);
-    if (!kernel[2])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[3] = clCreateProgramWithSource(context, 1, &int_mad4_kernel_code, NULL, NULL);
-    if (!program[3])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[3] = clCreateKernel(program[3], "test_int_mad4", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<4; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        err = verify_int_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 1:
-        err = verify_int_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 2:
-        err = verify_int_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 3:
-        err = verify_int_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-        break;
-    }
-    if (err)
-      break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<4; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_intmath_long.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_long.c
@@ -1,397 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *long_add_kernel_code =
-"__kernel void test_long_add(__global long *srcA, __global long *srcB, __global long *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *long_sub_kernel_code =
-"__kernel void test_long_sub(__global long *srcA, __global long *srcB, __global long *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *long_mul_kernel_code =
-"__kernel void test_long_mul(__global long *srcA, __global long *srcB, __global long *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-const char *long_mad_kernel_code =
-"__kernel void test_long_mad(__global long *srcA, __global long *srcB, __global long *srcC, __global long *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
-"}\n";
-
-static const float    MAX_ERR = 1e-5f;
-
-int
-verify_long_add(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int            i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_ADD int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_ADD int test passed\n");
-    return 0;
-}
-
-int
-verify_long_sub(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int            i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_SUB int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_SUB int test passed\n");
-    return 0;
-}
-
-int
-verify_long_mul(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int            i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_MUL int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_MUL int test passed\n");
-    return 0;
-}
-
-int
-verify_long_mad(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int            i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i] + inptrC[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_MAD int test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_MAD int test passed\n");
-    return 0;
-}
-
-int
-test_intmath_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[4];
-    cl_kernel kernel[4];
-
-    cl_long *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-
-    if(! gHasLong )
-    {
-        log_info("64-bit integers are not supported by this device. Skipping test.\n");
-        return CL_SUCCESS;
-    }
-
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_long) * num_elements;
-
-    input_ptr[0] = (cl_long*)malloc(length);
-    input_ptr[1] = (cl_long*)malloc(length);
-    input_ptr[2] = (cl_long*)malloc(length);
-    output_ptr   = (cl_long*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-    p = input_ptr[2];
-    for (i=0; i<num_elements; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-
-    free_mtdata(d);
-    d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    program[0] = clCreateProgramWithSource(context, 1, &long_add_kernel_code, NULL, NULL);
-    if (!program[0])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[0] = clCreateKernel(program[0], "test_long_add", NULL);
-    if (!kernel[0])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[1] = clCreateProgramWithSource(context, 1, &long_sub_kernel_code, NULL, NULL);
-    if (!program[1])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[1] = clCreateKernel(program[1], "test_long_sub", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[2] = clCreateProgramWithSource(context, 1, &long_mul_kernel_code, NULL, NULL);
-    if (!program[2])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[2] = clCreateKernel(program[2], "test_long_mul", NULL);
-    if (!kernel[2])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[3] = clCreateProgramWithSource(context, 1, &long_mad_kernel_code, NULL, NULL);
-    if (!program[3])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[3] = clCreateKernel(program[3], "test_long_mad", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<4; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        err = verify_long_add(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 1:
-        err = verify_long_sub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 2:
-        err = verify_long_mul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 3:
-        err = verify_long_mad(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-        break;
-    }
-    if (err)
-      break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<4; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_intmath_long2.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_long2.c
@@ -1,395 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *long_add2_kernel_code =
-"__kernel void test_long_add2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *long_sub2_kernel_code =
-"__kernel void test_long_sub2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *long_mul2_kernel_code =
-"__kernel void test_long_mul2(__global long2 *srcA, __global long2 *srcB, __global long2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-const char *long_mad2_kernel_code =
-"__kernel void test_long_mad2(__global long2 *srcA, __global long2 *srcB, __global long2 *srcC, __global long2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
-"}\n";
-
-int
-verify_long_add2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_ADD long2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_ADD long2 test passed\n");
-    return 0;
-}
-
-int
-verify_long_sub2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_SUB long2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_SUB long2 test passed\n");
-    return 0;
-}
-
-int
-verify_long_mul2(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_MUL long2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_MUL long2 test passed\n");
-    return 0;
-}
-
-int
-verify_long_mad2(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i] + inptrC[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_MAD long2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_MAD long2 test passed\n");
-    return 0;
-}
-
-int
-test_intmath_long2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[4];
-    cl_kernel kernel[4];
-
-    cl_long *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-
-    if(! gHasLong)
-    {
-        log_info("64-bit integers are not supported in this device. Skipping test.\n");
-        return 0;
-    }
-
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_long) * 2* num_elements;
-
-    input_ptr[0] = (cl_long*)malloc(length);
-    input_ptr[1] = (cl_long*)malloc(length);
-    input_ptr[2] = (cl_long*)malloc(length);
-    output_ptr   = (cl_long*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements * 2; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-    p = input_ptr[1];
-    for (i=0; i<num_elements * 2; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-    p = input_ptr[2];
-    for (i=0; i<num_elements * 2; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-
-    free_mtdata(d);
-    d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    program[0] = clCreateProgramWithSource(context, 1, &long_add2_kernel_code, NULL, NULL);
-    if (!program[0])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[0] = clCreateKernel(program[0], "test_long_add2", NULL);
-    if (!kernel[0])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[1] = clCreateProgramWithSource(context, 1, &long_sub2_kernel_code, NULL, NULL);
-    if (!program[1])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[1] = clCreateKernel(program[1], "test_long_sub2", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[2] = clCreateProgramWithSource(context, 1, &long_mul2_kernel_code, NULL, NULL);
-    if (!program[2])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[2] = clCreateKernel(program[2], "test_long_mul2", NULL);
-    if (!kernel[2])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[3] = clCreateProgramWithSource(context, 1, &long_mad2_kernel_code, NULL, NULL);
-    if (!program[3])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[3] = clCreateKernel(program[3], "test_long_mad2", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<4; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        err = verify_long_add2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 1:
-        err = verify_long_sub2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 2:
-        err = verify_long_mul2(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 3:
-        err = verify_long_mad2(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-        break;
-    }
-    if (err)
-      break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<4; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_intmath_long4.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_intmath_long4.c
@@ -1,395 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *long_add4_kernel_code =
-"__kernel void test_long_add4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *long_sub4_kernel_code =
-"__kernel void test_long_sub4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *long_mul4_kernel_code =
-"__kernel void test_long_mul4(__global long4 *srcA, __global long4 *srcB, __global long4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-const char *long_mad4_kernel_code =
-"__kernel void test_long_mad4(__global long4 *srcA, __global long4 *srcB, __global long4 *srcC, __global long4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid] + srcC[tid];\n"
-"}\n";
-
-int
-verify_long_add4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_ADD long4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_ADD long4 test passed\n");
-    return 0;
-}
-
-int
-verify_long_sub4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_SUB long4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_SUB long4 test passed\n");
-    return 0;
-}
-
-int
-verify_long_mul4(cl_long *inptrA, cl_long *inptrB, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_MUL long4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_MUL long4 test passed\n");
-    return 0;
-}
-
-int
-verify_long_mad4(cl_long *inptrA, cl_long *inptrB, cl_long *inptrC, cl_long *outptr, int n)
-{
-    cl_long    r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i] + inptrC[i];
-        if (r != outptr[i])
-        {
-            log_error("LONG_MAD long4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("LONG_MAD long4 test passed\n");
-    return 0;
-}
-
-int
-test_intmath_long4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[4];
-    cl_kernel kernel[4];
-
-    cl_long *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-
-    if(! gHasLong )
-    {
-        log_info("64-bit integers are not supported by this device. Skipping test.\n");
-        return 0;
-    }
-
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_long) * 4 * num_elements;
-
-    input_ptr[0] = (cl_long*)malloc(length);
-    input_ptr[1] = (cl_long*)malloc(length);
-    input_ptr[2] = (cl_long*)malloc(length);
-    output_ptr   = (cl_long*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements * 4; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-    p = input_ptr[1];
-    for (i=0; i<num_elements * 4; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-    p = input_ptr[2];
-    for (i=0; i<num_elements * 4; i++)
-        p[i] = (cl_long)genrand_int32(d) | ((cl_long) genrand_int32(d) << 32);
-
-    free_mtdata(d);
-    d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    program[0] = clCreateProgramWithSource(context, 1, &long_add4_kernel_code, NULL, NULL);
-    if (!program[0])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[0], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[0] = clCreateKernel(program[0], "test_long_add4", NULL);
-    if (!kernel[0])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[1] = clCreateProgramWithSource(context, 1, &long_sub4_kernel_code, NULL, NULL);
-    if (!program[1])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[1], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[1] = clCreateKernel(program[1], "test_long_sub4", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[2] = clCreateProgramWithSource(context, 1, &long_mul4_kernel_code, NULL, NULL);
-    if (!program[2])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[2], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[2] = clCreateKernel(program[2], "test_long_mul4", NULL);
-    if (!kernel[2])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  program[3] = clCreateProgramWithSource(context, 1, &long_mad4_kernel_code, NULL, NULL);
-    if (!program[3])
-    {
-        log_error("clCreateProgramWithSource failed\n");
-        return -1;
-    }
-
-    err = clBuildProgram(program[3], 0, NULL, NULL, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clBuildProgram failed\n");
-        return -1;
-    }
-
-    kernel[3] = clCreateKernel(program[3], "test_long_mad4", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[3], 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel[3], 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel[3], 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[3], 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<4; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        err = verify_long_add4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 1:
-        err = verify_long_sub4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 2:
-        err = verify_long_mul4(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-        break;
-      case 3:
-        err = verify_long_mad4(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-        break;
-    }
-    if (err)
-      break;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<4; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_kernel_memory_alignment.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_kernel_memory_alignment.cpp
@@ -1,572 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-#include "harness/errorHelpers.h"
-
-// For global, local, and constant
-const char *parameter_kernel_long =
-"%s\n" // optional pragma
-"kernel void test(global ulong *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
-"{\n"
-"   results[0] = (ulong)&mem0[0];\n"
-"   results[1] = (ulong)&mem2[0];\n"
-"   results[2] = (ulong)&mem3[0];\n"
-"   results[3] = (ulong)&mem4[0];\n"
-"   results[4] = (ulong)&mem8[0];\n"
-"   results[5] = (ulong)&mem16[0];\n"
-"}\n";
-
-// For private and local
-const char *local_kernel_long =
-"%s\n" // optional pragma
-"kernel void test(global ulong *results)\n"
-"{\n"
-"   %s %s mem0[3];\n"
-"   %s %s2 mem2[3];\n"
-"   %s %s3 mem3[3];\n"
-"   %s %s4 mem4[3];\n"
-"   %s %s8 mem8[3];\n"
-"   %s %s16 mem16[3];\n"
-"   results[0] = (ulong)&mem0[0];\n"
-"   results[1] = (ulong)&mem2[0];\n"
-"   results[2] = (ulong)&mem3[0];\n"
-"   results[3] = (ulong)&mem4[0];\n"
-"   results[4] = (ulong)&mem8[0];\n"
-"   results[5] = (ulong)&mem16[0];\n"
-"}\n";
-
-// For constant
-const char *constant_kernel_long =
-"%s\n" // optional pragma
-"  constant %s mem0[3]    = {0};\n"
-"  constant %s2 mem2[3]   = {(%s2)(0)};\n"
-"  constant %s3 mem3[3]   = {(%s3)(0)};\n"
-"  constant %s4 mem4[3]   = {(%s4)(0)};\n"
-"  constant %s8 mem8[3]   = {(%s8)(0)};\n"
-"  constant %s16 mem16[3] = {(%s16)(0)};\n"
-"\n"
-"kernel void test(global ulong *results)\n"
-"{\n"
-"   results[0] = (ulong)&mem0;\n"
-"   results[1] = (ulong)&mem2;\n"
-"   results[2] = (ulong)&mem3;\n"
-"   results[3] = (ulong)&mem4;\n"
-"   results[4] = (ulong)&mem8;\n"
-"   results[5] = (ulong)&mem16;\n"
-"}\n";
-
-
-// For global, local, and constant
-const char *parameter_kernel_no_long =
-"%s\n" // optional pragma
-"kernel void test(global uint *results, %s %s *mem0, %s %s2 *mem2, %s %s3 *mem3, %s %s4 *mem4, %s %s8 *mem8, %s %s16 *mem16)\n"
-"{\n"
-"   results[0] = (uint)&mem0[0];\n"
-"   results[1] = (uint)&mem2[0];\n"
-"   results[2] = (uint)&mem3[0];\n"
-"   results[3] = (uint)&mem4[0];\n"
-"   results[4] = (uint)&mem8[0];\n"
-"   results[5] = (uint)&mem16[0];\n"
-"}\n";
-
-// For private and local
-const char *local_kernel_no_long =
-"%s\n" // optional pragma
-"kernel void test(global uint *results)\n"
-"{\n"
-"   %s %s mem0[3];\n"
-"   %s %s2 mem2[3];\n"
-"   %s %s3 mem3[3];\n"
-"   %s %s4 mem4[3];\n"
-"   %s %s8 mem8[3];\n"
-"   %s %s16 mem16[3];\n"
-"   results[0] = (uint)&mem0[0];\n"
-"   results[1] = (uint)&mem2[0];\n"
-"   results[2] = (uint)&mem3[0];\n"
-"   results[3] = (uint)&mem4[0];\n"
-"   results[4] = (uint)&mem8[0];\n"
-"   results[5] = (uint)&mem16[0];\n"
-"}\n";
-
-// For constant
-const char *constant_kernel_no_long =
-"%s\n" // optional pragma
-"  constant %s mem0[3]    = {0};\n"
-"  constant %s2 mem2[3]   = {(%s2)(0)};\n"
-"  constant %s3 mem3[3]   = {(%s3)(0)};\n"
-"  constant %s4 mem4[3]   = {(%s4)(0)};\n"
-"  constant %s8 mem8[3]   = {(%s8)(0)};\n"
-"  constant %s16 mem16[3] = {(%s16)(0)};\n"
-"\n"
-"kernel void test(global uint *results)\n"
-"{\n"
-"   results[0] = (uint)&mem0;\n"
-"   results[1] = (uint)&mem2;\n"
-"   results[2] = (uint)&mem3;\n"
-"   results[3] = (uint)&mem4;\n"
-"   results[4] = (uint)&mem8;\n"
-"   results[5] = (uint)&mem16;\n"
-"}\n";
-
-enum AddressSpaces
-{
-    kGlobal        = 0,
-    kLocal,
-    kConstant,
-    kPrivate
-};
-
-typedef enum AddressSpaces    AddressSpaces;
-
-#define DEBUG 0
-
-const char * get_explicit_address_name( AddressSpaces address )
-{
-    /* Quick method to avoid branching: make sure the following array matches the Enum order */
-    static const char *sExplicitAddressNames[] = { "global", "local", "constant", "private"};
-
-    return sExplicitAddressNames[ address ];
-}
-
-
-int test_kernel_memory_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, AddressSpaces address )
-{
-    const char *constant_kernel;
-    const char *parameter_kernel;
-    const char *local_kernel;
-
-    if ( gHasLong )
-    {
-        constant_kernel  = constant_kernel_long;
-        parameter_kernel = parameter_kernel_long;
-        local_kernel     = local_kernel_long;
-    }
-    else
-    {
-        constant_kernel  = constant_kernel_no_long;
-        parameter_kernel = parameter_kernel_no_long;
-        local_kernel     = local_kernel_no_long;
-    }
-
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
-    char *kernel_code = (char*)malloc(4096);
-    cl_kernel kernel;
-    cl_program program;
-    int error;
-    int total_errors = 0;
-    cl_mem results;
-    cl_ulong *results_data;
-    cl_mem mem0, mem2, mem3, mem4, mem8, mem16;
-
-    results_data = (cl_ulong*)malloc(sizeof(cl_ulong)*6);
-    results = clCreateBuffer(context, 0, sizeof(cl_ulong)*6, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-
-    mem0 = clCreateBuffer(context, 0, sizeof(cl_long), NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-    mem2 = clCreateBuffer(context, 0, sizeof(cl_long)*2, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-    mem3 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-    mem4 = clCreateBuffer(context, 0, sizeof(cl_long)*4, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-    mem8 = clCreateBuffer(context, 0, sizeof(cl_long)*8, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-    mem16 = clCreateBuffer(context, 0, sizeof(cl_long)*16, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
-
-
-    // For each type
-
-    // Calculate alignment mask for each size
-
-    // For global, local, constant, private
-
-    // If global, local or constant -- do parameter_kernel
-    // If private or local -- do local_kernel
-    // If constant -- do constant kernel
-
-    int numConstantArgs;
-    clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof(numConstantArgs), &numConstantArgs, NULL);
-
-    int typeIndex;
-    for (typeIndex = 0; typeIndex < 10; typeIndex++) {
-        // Skip double tests if we don't support doubles
-        if (vecType[typeIndex] == kDouble && !is_extension_available(device, "cl_khr_fp64")) {
-            log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
-            continue;
-        }
-
-        if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
-            continue;
-
-        log_info("Testing %s...\n", get_explicit_type_name(vecType[typeIndex]));
-
-        // Determine the expected alignment masks.
-        // E.g., if it is supposed to be 4 byte aligned, we should get 4-1=3 = ... 000011
-        // We can then and the returned address with that and we should have 0.
-        cl_ulong alignments[6];
-        alignments[0] = get_explicit_type_size(vecType[typeIndex])-1;
-        alignments[1] = (get_explicit_type_size(vecType[typeIndex])<<1)-1;
-        alignments[2] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
-        alignments[3] = (get_explicit_type_size(vecType[typeIndex])<<2)-1;
-        alignments[4] = (get_explicit_type_size(vecType[typeIndex])<<3)-1;
-        alignments[5] = (get_explicit_type_size(vecType[typeIndex])<<4)-1;
-
-        // Parameter kernel
-        if (address == kGlobal || address == kLocal || address == kConstant) {
-            log_info("\tTesting parameter kernel...\n");
-
-            if ( (gIsEmbedded) && (address == kConstant) && (numConstantArgs < 6)) {
-                sprintf(kernel_code, parameter_kernel,
-                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
-                );
-            }
-            else {
-                sprintf(kernel_code, parameter_kernel,
-                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
-                );
-            }
-            //printf("Kernel is: \n%s\n", kernel_code);
-
-            // Create the kernel
-            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
-            test_error(error, "create_single_kernel_helper failed");
-
-            // Initialize the results
-            memset(results_data, 0, sizeof(cl_long)*5);
-            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*6, results_data, 0, NULL, NULL);
-            test_error(error, "clEnqueueWriteBuffer failed");
-
-            // Set the arguments
-            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
-            test_error(error, "clSetKernelArg failed");
-            if (address != kLocal) {
-                error = clSetKernelArg(kernel, 1, sizeof(mem0), &mem0);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 2, sizeof(mem2), &mem2);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 3, sizeof(mem3), &mem3);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 4, sizeof(mem4), &mem4);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 5, sizeof(mem8), &mem8);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 6, sizeof(mem16), &mem16);
-                test_error(error, "clSetKernelArg failed");
-            } else {
-                error = clSetKernelArg(kernel, 1, get_explicit_type_size(vecType[typeIndex]), NULL);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 2, get_explicit_type_size(vecType[typeIndex])*2, NULL);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 3, get_explicit_type_size(vecType[typeIndex])*4, NULL);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 4, get_explicit_type_size(vecType[typeIndex])*4, NULL);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 5, get_explicit_type_size(vecType[typeIndex])*8, NULL);
-                test_error(error, "clSetKernelArg failed");
-                error = clSetKernelArg(kernel, 6, get_explicit_type_size(vecType[typeIndex])*16, NULL);
-                test_error(error, "clSetKernelArg failed");
-            }
-
-            // Enqueue the kernel
-            size_t global_size = 1;
-            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
-            test_error(error, "clEnqueueNDRangeKernel failed");
-
-            // Read back the results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*6, results_data, 0, NULL, NULL);
-            test_error(error, "clEnqueueReadBuffer failed");
-
-            // Verify the results
-            if (gHasLong) {
-                for (int i = 0; i < 6; i++) {
-                    if ((results_data[i] & alignments[i]) != 0) {
-                        total_errors++;
-                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
-                    } else {
-                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
-                    }
-                }
-            }
-            // Verify the results on devices that do not support longs
-            else {
-                cl_uint *results_data_no_long = (cl_uint *)results_data;
-
-                for (int i = 0; i < 6; i++) {
-                    if ((results_data_no_long[i] & alignments[i]) != 0) {
-                        total_errors++;
-                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
-                    } else {
-                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
-                    }
-                }
-            }
-
-            clReleaseKernel(kernel);
-            clReleaseProgram(program);
-        }
-
-
-
-
-        // Local kernel
-        if (address == kLocal || address == kPrivate) {
-            log_info("\tTesting local kernel...\n");
-            sprintf(kernel_code, local_kernel,
-                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_address_name(address), get_explicit_type_name(vecType[typeIndex])
-                    );
-            //printf("Kernel is: \n%s\n", kernel_code);
-
-            // Create the kernel
-            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
-            test_error(error, "create_single_kernel_helper failed");
-
-            // Initialize the results
-            memset(results_data, 0, sizeof(cl_long)*5);
-            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
-            test_error(error, "clEnqueueWriteBuffer failed");
-
-            // Set the arguments
-            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
-            test_error(error, "clSetKernelArg failed");
-
-            // Enqueue the kernel
-            size_t global_size = 1;
-            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
-            test_error(error, "clEnqueueNDRangeKernel failed");
-
-            // Read back the results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
-            test_error(error, "clEnqueueReadBuffer failed");
-
-            // Verify the results
-            if (gHasLong) {
-                for (int i = 0; i < 5; i++) {
-                    if ((results_data[i] & alignments[i]) != 0) {
-                        total_errors++;
-                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
-                    } else {
-                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
-                    }
-                }
-            }
-        // Verify the results on devices that do not support longs
-            else {
-                cl_uint *results_data_no_long = (cl_uint *)results_data;
-
-                for (int i = 0; i < 5; i++) {
-                    if ((results_data_no_long[i] & alignments[i]) != 0) {
-                        total_errors++;
-                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
-                    } else {
-                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
-                    }
-                }
-            }
-            clReleaseKernel(kernel);
-            clReleaseProgram(program);
-        }
-
-
-
-        // Constant kernel
-        if (address == kConstant) {
-            log_info("\tTesting constant kernel...\n");
-            sprintf(kernel_code, constant_kernel,
-                    vecType[typeIndex] == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex]),
-                    get_explicit_type_name(vecType[typeIndex])
-                    );
-            //printf("Kernel is: \n%s\n", kernel_code);
-
-            // Create the kernel
-            error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&kernel_code, "test");
-            test_error(error, "create_single_kernel_helper failed");
-
-            // Initialize the results
-            memset(results_data, 0, sizeof(cl_long)*5);
-            error = clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, sizeof(cl_long)*5, results_data, 0, NULL, NULL);
-            test_error(error, "clEnqueueWriteBuffer failed");
-
-            // Set the arguments
-            error = clSetKernelArg(kernel, 0, sizeof(results), &results);
-            test_error(error, "clSetKernelArg failed");
-
-            // Enqueue the kernel
-            size_t global_size = 1;
-            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size, NULL, 0, NULL, NULL);
-            test_error(error, "clEnqueueNDRangeKernel failed");
-
-            // Read back the results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_ulong)*5, results_data, 0, NULL, NULL);
-            test_error(error, "clEnqueueReadBuffer failed");
-
-            // Verify the results
-            if (gHasLong) {
-                for (int i = 0; i < 5; i++) {
-                    if ((results_data[i] & alignments[i]) != 0) {
-                        total_errors++;
-                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data[i]);
-                    } else {
-                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data[i]);
-                    }
-                }
-            }
-        // Verify the results on devices that do not support longs
-            else {
-                cl_uint *results_data_no_long = (cl_uint *)results_data;
-
-                for (int i = 0; i < 5; i++) {
-                    if ((results_data_no_long[i] & alignments[i]) != 0) {
-                        total_errors++;
-                        log_error("\tVector size %d failed: 0x%llx is not properly aligned.\n", 1 << i, results_data_no_long[i]);
-                    } else {
-                        if (DEBUG) log_info("\tVector size %d passed: 0x%llx is properly aligned.\n", 1 << i, results_data_no_long[i]);
-                    }
-                }
-            }
-            clReleaseKernel(kernel);
-            clReleaseProgram(program);
-        }
-
-
-    }
-
-    clReleaseMemObject(results);
-    clReleaseMemObject(mem0);
-    clReleaseMemObject(mem2);
-    clReleaseMemObject(mem3);
-    clReleaseMemObject(mem4);
-    clReleaseMemObject(mem8);
-    clReleaseMemObject(mem16);
-    free( kernel_code );
-    free( results_data );
-
-    if (total_errors != 0)
-        return -1;
-    return 0;
-
-}
-
-
-int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kLocal );
-}
-
-int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kGlobal );
-}
-
-int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // There is a class of approved OpenCL 1.0 conformant devices out there that in some circumstances
-    // are unable to meaningfully take (or more precisely use) the address of constant data by virtue
-    // of limitations in their ISA design. This feature was not tested in 1.0, so they were declared
-    // conformant by Khronos. The failure is however caught here.
-    //
-    // Unfortunately, determining whether or not these devices are 1.0 conformant is not the jurisdiction
-    // of the 1.1 tests -- We can't fail them from 1.1 conformance here because they are not 1.1
-    // devices. They are merely 1.0 conformant devices that interop with 1.1 devices in a 1.1 platform.
-    // To add new binding tests now to conformant 1.0 devices would violate the workingroup requirement
-    // of no new tests for 1.0 devices.  So certain allowances have to be made in intractable cases
-    // such as this one.
-    //
-    // There is some precedent. Similar allowances are made for other 1.0 hardware features such as
-    // local memory size.  The minimum required local memory size grew from 16 kB to 32 kB in OpenCL 1.1.
-
-    // Detect 1.0 devices
-    // Get CL_DEVICE_VERSION size
-    size_t string_size = 0;
-    int err;
-    if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, 0, NULL, &string_size ) ) )
-    {
-        log_error( "FAILURE: Unable to get size of CL_DEVICE_VERSION string!" );
-        return -1;
-    }
-
-    //Allocate storage to hold the version string
-    char *version_string = (char*) malloc(string_size);
-    if( NULL == version_string )
-    {
-        log_error( "FAILURE: Unable to allocate memory to hold CL_DEVICE_VERSION string!" );
-        return -1;
-    }
-
-    // Get CL_DEVICE_VERSION string
-    if( (err = clGetDeviceInfo( device, CL_DEVICE_VERSION, string_size, version_string, NULL ) ) )
-    {
-        log_error( "FAILURE: Unable to read CL_DEVICE_VERSION string!" );
-        return -1;
-    }
-
-    // easy out for 1.0 devices
-    const char *string_1_0 = "OpenCL 1.0 ";
-    if( 0 == strncmp( version_string, string_1_0, strlen(string_1_0)) )
-    {
-        log_info( "WARNING: Allowing device to escape testing of difficult constant memory alignment case.\n\tDevice is not a OpenCL 1.1 device. CL_DEVICE_VERSION: \"%s\"\n", version_string );
-        free(version_string);
-        return 0;
-    }
-    log_info( "Device version string: \"%s\"\n", version_string );
-    free(version_string);
-
-    // Everyone else is to be ground mercilessly under the wheels of progress
-    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kConstant );
-}
-
-int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    return test_kernel_memory_alignment( device,  context,  queue,  n_elems, kPrivate );
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_local.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_local.c
@@ -1,372 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *barrier_with_localmem_kernel_code[] = {
-"__kernel void compute_sum_with_localmem(__global int *a, int n, __local int *tmp_sum, __global int *sum)\n"
-"{\n"
-"    int  tid = get_local_id(0);\n"
-"    int  lsize = get_local_size(0);\n"
-"    int  i;\n"
-"\n"
-"    tmp_sum[tid] = 0;\n"
-"    for (i=tid; i<n; i+=lsize)\n"
-"        tmp_sum[tid] += a[i];\n"
-"\n"
-"    if( lsize == 1 )\n"
-"    {\n"
-"       if( tid == 0 )\n"
-"           *sum = tmp_sum[0];\n"
-"       return;\n"
-"    }\n"
-"\n"
-"    do\n"
-"    {\n"
-"       barrier(CLK_LOCAL_MEM_FENCE);\n"
-"       if (tid < lsize/2)\n"
-"       {\n"
-"           int sum = tmp_sum[tid];\n"
-"           if( (lsize & 1) && tid == 0 )\n"
-"               sum += tmp_sum[tid + lsize - 1];\n"
-"           tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
-"       }\n"
-"       lsize = lsize/2; \n"
-"    }while( lsize );\n"
-"\n"
-"    if( tid == 0 )\n"
-"       *sum = tmp_sum[0];\n"
-"}\n",
-"__kernel void compute_sum_with_localmem(__global int *a, int n, __global int *sum)\n"
-"{\n"
-"     __local int tmp_sum[%d];\n"
-"    int  tid = get_local_id(0);\n"
-"    int  lsize = get_local_size(0);\n"
-"    int  i;\n"
-"\n"
-"    tmp_sum[tid] = 0;\n"
-"    for (i=tid; i<n; i+=lsize)\n"
-"        tmp_sum[tid] += a[i];\n"
-"\n"
-"    if( lsize == 1 )\n"
-"    {\n"
-"       if( tid == 0 )\n"
-"           *sum = tmp_sum[0];\n"
-"       return;\n"
-"    }\n"
-"\n"
-"    do\n"
-"    {\n"
-"       barrier(CLK_LOCAL_MEM_FENCE);\n"
-"       if (tid < lsize/2)\n"
-"       {\n"
-"           int sum = tmp_sum[tid];\n"
-"           if( (lsize & 1) && tid == 0 )\n"
-"               sum += tmp_sum[tid + lsize - 1];\n"
-"           tmp_sum[tid] = sum + tmp_sum[tid + lsize/2];\n"
-"       }\n"
-"       lsize = lsize/2; \n"
-"    }while( lsize );\n"
-"\n"
-"    if( tid == 0 )\n"
-"       *sum = tmp_sum[0];\n"
-"}\n"
-};
-
-
-static int
-verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
-{
-    int            r = 0;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r += inptr[i];
-    }
-
-    if (r != outptr[0])
-    {
-        log_error("LOCAL test failed: *%d vs %d\n", r, outptr[0] );
-        return -1;
-    }
-
-    log_info("LOCAL test passed\n");
-    return 0;
-}
-
-int test_local_arg_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_program program;
-    cl_kernel kernel;
-
-    cl_int *input_ptr, *output_ptr, *tmp_ptr;
-    size_t global_threads[1], local_threads[1];
-    size_t wgsize, kwgsize;
-    size_t max_local_workgroup_size[3];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
-    if (err) {
-        log_error("clGetDeviceInfo failed, %d\n\n", err);
-        return -1;
-    }
-    wgsize/=2;
-    if (wgsize < 1)
-        wgsize = 1;
-
-    size_t in_length = sizeof(cl_int) * num_elements;
-    size_t out_length = sizeof(cl_int) * wgsize;
-
-    input_ptr = (cl_int *)malloc(in_length);
-    output_ptr = (cl_int *)malloc(out_length);
-    tmp_ptr = (cl_int *)malloc(out_length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (int)genrand_int32(d);
-
-    free_mtdata(d); d = NULL;
-
-  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueWriteBuffer failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_with_localmem_kernel_code[0], "compute_sum_with_localmem" );
-  if (err)
-    return -1;
-
-  err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
-  test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
-
-  err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
-  test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
-  // Pick the minimum of the device and the kernel
-  if (kwgsize > max_local_workgroup_size[0])
-    kwgsize = max_local_workgroup_size[0];
-
-  //    err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
-  err  = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
-  err |= clSetKernelArg(kernel, 2, wgsize * sizeof(cl_int), NULL);
-  err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    global_threads[0] = wgsize;
-    local_threads[0] = wgsize;
-
-  // Adjust the local thread size to fit and be a nice multiple.
-  if (kwgsize < wgsize) {
-    log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
-        local_threads[0] = kwgsize;
-  }
-  while (global_threads[0] % local_threads[0] != 0)
-    local_threads[0]--;
-
-  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueNDRangeKernel failed\n");
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueReadBuffer failed\n");
-    return -1;
-  }
-
-  err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(tmp_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-int test_local_kernel_def(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_program program;
-    cl_kernel kernel;
-
-    cl_int *input_ptr, *output_ptr, *tmp_ptr;
-    size_t global_threads[1], local_threads[1];
-    size_t wgsize, kwgsize;
-    int err, i;
-    char *program_source = (char*)malloc(sizeof(char)*2048);
-    MTdata d = init_genrand( gRandomSeed );
-    size_t max_local_workgroup_size[3];
-    memset(program_source, 0, 2048);
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof wgsize, &wgsize, NULL);
-    if (err) {
-        log_error("clGetDeviceInfo failed, %d\n\n", err);
-        return -1;
-    }
-    wgsize/=2;
-    if (wgsize < 1)
-        wgsize = 1;
-
-    size_t in_length = sizeof(cl_int) * num_elements;
-    size_t out_length = sizeof(cl_int) * wgsize;
-
-    input_ptr = (cl_int *)malloc(in_length);
-    output_ptr = (cl_int *)malloc(out_length);
-    tmp_ptr = (cl_int *)malloc(out_length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, in_length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, out_length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (cl_int) genrand_int32(d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, in_length, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    // Validate that created kernel doesn't violate local memory size allowed by the device
-    cl_ulong localMemSize = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMemSize), &localMemSize, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clGetDeviceInfo failed\n");
-        return -1;
-    }
-    if ( wgsize > (localMemSize / (sizeof(cl_int)*sizeof(cl_int))) )
-    {
-        wgsize = localMemSize / (sizeof(cl_int)*sizeof(cl_int));
-    }
-
-    sprintf(program_source, barrier_with_localmem_kernel_code[1], (int)(wgsize * sizeof(cl_int)));
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1, (const char**)&program_source, "compute_sum_with_localmem" );
-    free(program_source);
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof kwgsize, &kwgsize, NULL);
-    test_error(err, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
-    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
-    // Pick the minimum of the device and the kernel
-    if (kwgsize > max_local_workgroup_size[0])
-        kwgsize = max_local_workgroup_size[0];
-
-    //    err = clSetKernelArgs(context, kernel, 4, NULL, values, sizes);
-    err  = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
-    err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
-    err |= clSetKernelArg(kernel, 2, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    global_threads[0] = wgsize;
-    local_threads[0] = wgsize;
-
-  // Adjust the local thread size to fit and be a nice multiple.
-  if (kwgsize < wgsize) {
-    log_info("Adjusting wgsize down from %lu to %lu.\n", wgsize, kwgsize);
-        local_threads[0] = kwgsize;
-  }
-  while (global_threads[0] % local_threads[0] != 0)
-    local_threads[0]--;
-
-  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueNDRangeKernel failed\n");
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, out_length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueReadBuffer failed\n");
-    return -1;
-  }
-
-  err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(tmp_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_local_kernel_scope.cpp
@@ -1,138 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-#define MAX_LOCAL_STORAGE_SIZE  256
-#define MAX_LOCAL_STORAGE_SIZE_STRING "256"
-
-const char *kernelSource[] = {
-    "__kernel void test( __global unsigned int * input, __global unsigned int *outMaxes )\n"
-    "{\n"
-    "   __local unsigned int localStorage[ " MAX_LOCAL_STORAGE_SIZE_STRING " ];\n"
-    "   unsigned int theValue = input[ get_global_id( 0 ) ];\n"
-    "\n"
-    "   // If we just write linearly, there's no verification that the items in a group share local data\n"
-    "   // So we write reverse-linearly, which requires items to read the local data written by at least one\n"
-    "   // different item\n"
-    "   localStorage[ get_local_size( 0 ) - get_local_id( 0 ) - 1 ] = theValue;\n"
-    "\n"
-    "   // The barrier ensures that all local items have written to the local storage\n"
-    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    "   // Now we loop back through the local storage and look for the max value. We only do this if\n"
-    "   // we're the first item in a group\n"
-    "   unsigned int max = 0;\n"
-    "   if( get_local_id( 0 ) == 0 )\n"
-    "   {\n"
-    "       for( size_t i = 0; i < get_local_size( 0 ); i++ )\n"
-    "       {\n"
-    "           if( localStorage[ i ] > max )\n"
-    "               max = localStorage[ i ];\n"
-    "       }\n"
-    "       outMaxes[ get_group_id( 0 ) ] = max;\n"
-    "   }\n"
-    "}\n"
-};
-
-int test_local_kernel_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_int error;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[ 2 ];
-    MTdata randSeed = init_genrand( gRandomSeed );
-
-    // Create a test kernel
-    error = create_single_kernel_helper( context, &program, &kernel, 1, kernelSource, "test" );
-    test_error( error, "Unable to create test kernel" );
-
-
-    // Determine an appropriate test size
-    size_t workGroupSize;
-    error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workGroupSize ), &workGroupSize, NULL );
-    test_error( error, "Unable to obtain kernel work group size" );
-
-    // Make sure the work group size doesn't overrun our local storage size in the kernel
-    while( workGroupSize > MAX_LOCAL_STORAGE_SIZE )
-        workGroupSize >>= 1;
-
-    size_t testSize = workGroupSize;
-    while( testSize < 1024 )
-        testSize += workGroupSize;
-    size_t numGroups = testSize / workGroupSize;
-    log_info( "\tTesting with %ld groups, %ld elements per group...\n", numGroups, workGroupSize );
-
-    // Create two buffers for operation
-    cl_uint *inputData = (cl_uint*)malloc( testSize * sizeof(cl_uint) );
-    generate_random_data( kUInt, testSize, randSeed, inputData );
-    free_mtdata( randSeed );
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, testSize * sizeof(cl_uint), inputData, &error );
-    test_error( error, "Unable to create input buffer" );
-
-    cl_uint *outputData = (cl_uint*)malloc( numGroups *sizeof(cl_uint) );
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_WRITE_ONLY, numGroups * sizeof(cl_uint), NULL, &error );
-    test_error( error, "Unable to create output buffer" );
-
-
-    // Set up the kernel args and run
-    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
-    test_error( error, "Unable to set kernel arg" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
-    test_error( error, "Unable to set kernel arg" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &testSize, &workGroupSize, 0, NULL, NULL );
-    test_error( error, "Unable to enqueue kernel" );
-
-
-    // Read results and verify
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, numGroups * sizeof(cl_uint), outputData, 0, NULL, NULL );
-    test_error( error, "Unable to read output data" );
-
-    // MingW compiler seems to have a bug that otimizes the code below incorrectly.
-    // adding the volatile keyword to size_t decleration to avoid aggressive optimization by the compiler.
-    for( volatile size_t i = 0; i < numGroups; i++ )
-    {
-        // Determine the max in our case
-        cl_uint localMax = 0;
-        for( volatile size_t j = 0; j < workGroupSize; j++ )
-        {
-            if( inputData[ i * workGroupSize + j ] > localMax )
-                localMax = inputData[ i * workGroupSize + j ];
-        }
-
-        if( outputData[ i ] != localMax )
-        {
-            log_error( "ERROR: Local max validation failed! (expected %u, got %u for i=%lu)\n", localMax, outputData[ i ] , i );
-            free(inputData);
-            free(outputData);
-            return -1;
-        }
-    }
-
-    free(inputData);
-    free(outputData);
-    return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_loop.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_loop.c
@@ -1,184 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *loop_kernel_code =
-"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    int  n = get_global_size(0);\n"
-"    int  i, j;\n"
-"\n"
-"    dst[tid] = 0;\n"
-"    for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
-"    {\n"
-"        if (j >= n)\n"
-"            j = 0;\n"
-"        dst[tid] += src[j];\n"
-"    }\n"
-"\n"
-"}\n";
-
-
-int
-verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
-{
-    int     r, i, j, k;
-
-    for (i=0; i<n; i++)
-    {
-        r = 0;
-        for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
-        {
-            if (k >= n)
-                k = 0;
-            r += inptr[k];
-        }
-
-        if (r != outptr[i])
-        {
-            log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
-            return -1;
-        }
-    }
-
-    log_info("LOOP test passed\n");
-    return 0;
-}
-
-int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[1];
-    int err, i;
-
-    size_t length = sizeof(cl_int) * num_elements;
-    input_ptr  = (cl_int*)malloc(length);
-    loop_indx  = (cl_int*)malloc(length);
-    loop_cnt   = (cl_int*)malloc(length);
-    output_ptr = (cl_int*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    MTdata d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        input_ptr[i] = (int)genrand_int32(d);
-        loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
-        loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
-    }
-    free_mtdata(d); d = NULL;
-
-  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueWriteBuffer failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueWriteBuffer failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueWriteBuffer failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
-  if (err)
-    return -1;
-
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (unsigned int)num_elements;
-  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueNDRangeKernel failed\n");
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clReadArray failed\n");
-    return -1;
-  }
-
-  err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(loop_indx);
-    free(loop_cnt);
-    free(output_ptr);
-
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_pointercast.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_pointercast.c
@@ -1,140 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static const char *pointer_cast_kernel_code =
-"__kernel void test_pointer_cast(__global unsigned char *src, __global unsigned int *dst)\n"
-"{\n"
-"    int          tid = get_global_id(0);\n"
-"    __global unsigned int *p = (__global unsigned int *)src;\n"
-"\n"
-"    dst[tid] = p[tid];\n"
-"\n"
-"}\n";
-
-
-int
-verify_pointer_cast(unsigned char *inptr, unsigned int *outptr, int n)
-{
-    unsigned int    *p = (unsigned int *)inptr;
-    int             i;
-    cl_uint r;
-
-    for (i=0; i<n; i++)
-    {
-        r = p[i];
-
-        if (r != outptr[i])
-        {
-            log_error("POINTER_CAST test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("POINTER_CAST test passed\n");
-    return 0;
-}
-
-int test_pointer_cast(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    unsigned char *input_ptr;
-    unsigned int *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(int) * num_elements;
-    input_ptr  = (unsigned char*)malloc(length);
-    output_ptr = (unsigned int*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    for (i=0; i<num_elements*4; i++)
-        input_ptr[i] = (unsigned char)genrand_int32(d);
-
-    free_mtdata(d);
-    d = NULL;
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1, &pointer_cast_kernel_code, "test_pointer_cast" );
-    if (err)
-        return -1;
-
-    err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (unsigned int)num_elements;
-  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueNDRangeKernel failed\n");
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clReadArray failed\n");
-    return -1;
-  }
-
-  err = verify_pointer_cast(input_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_vec_type_hint.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_vec_type_hint.c
@@ -1,97 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-
-static const char *sample_kernel = {
-  "%s\n" // optional pragma string
-  "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
-  "{\n"
-  "    int  tid = get_global_id(0);\n"
-  "     dst[tid] = src[tid];\n"
-  "\n"
-  "}\n"
-};
-
-int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-  int error;
-  int vec_type_index, vec_size_index;
-
-  ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
-    const char *size_names[] = {"", "2", "4", "8", "16"};
-    char *program_source;
-
-  program_source = (char*)malloc(sizeof(char)*4096);
-
-  for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
-    if (vecType[vec_type_index] == kDouble) {
-      if (!is_extension_available(deviceID, "cl_khr_fp64")) {
-        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
-        continue;
-      }
-      log_info("Testing doubles.\n");
-    }
-
-    for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
-      clProgramWrapper program;
-      clKernelWrapper kernel;
-      clMemWrapper in, out;
-      size_t global[] = {1,1,1};
-
-      log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
-
-      program_source[0] = '\0';
-      sprintf(program_source, sample_kernel,
-              (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-              get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
-
-      error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
-      if( error != 0 )
-        return error;
-
-      in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
-      test_error(error, "clCreateBuffer failed");
-      out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
-      test_error(error, "clCreateBuffer failed");
-
-      error = clSetKernelArg(kernel, 0, sizeof(in), &in);
-      test_error(error, "clSetKernelArg failed");
-      error = clSetKernelArg(kernel, 1, sizeof(out), &out);
-      test_error(error, "clSetKernelArg failed");
-
-      error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
-      test_error(error, "clEnqueueNDRangeKernel failed");
-
-      error = clFinish(queue);
-      test_error(error, "clFinish failed");
-    }
-  }
-
-  free(program_source);
-
-  return 0;
-}
--- a/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_vloadstore.c
@@ -1,985 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-#include "harness/errorHelpers.h"
-
-// Outputs debug information for stores
-#define DEBUG 0
-// Forces stores/loads to be done with offsets = tid
-#define LINEAR_OFFSETS 0
-#define NUM_LOADS    512
-
-static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-
-#pragma mark -------------------- vload harness --------------------------
-
-typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
-
-int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
-               create_vload_program_fn createFn, size_t bufferSize, MTdata d )
-{
-    int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[ 4 ];
-    const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
-
-    if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
-
-    size_t threads[ 1 ], localThreads[ 1 ];
-    clProtectedArray inBuffer( bufferSize );
-    char programSrc[ 10240 ];
-    cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
-    size_t numElements, typeSize, i;
-    unsigned int outVectorSize;
-
-
-    typeSize = get_explicit_type_size( type );
-    numElements = bufferSize / ( typeSize * vecSize );
-    bufferSize = numElements * typeSize * vecSize;    // To account for rounding
-
-    if (DEBUG) log_info("Testing: numLoads: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numLoads, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
-
-    // Create some random input data and random offsets to load from
-    generate_random_data( type, numElements * vecSize, d, (void *)inBuffer );
-    for( i = 0; i < numLoads; i++ )
-    {
-        offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 1, d );
-        if( offsets[ i ] < numElements - 2 )
-            alignmentOffsets[ i ] = (cl_uint)random_in_range( 0, (int)vecSize - 1, d );
-        else
-            alignmentOffsets[ i ] = 0;
-        if (LINEAR_OFFSETS) offsets[i] = (cl_uint)i;
-    }
-    if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n");
-
-    // 32-bit fixup
-    outVectorSize = vecSize;
-
-    // Declare output buffers now
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    char outBuffer[ numLoads * typeSize * outVectorSize ];
-    char referenceBuffer[ numLoads * typeSize * vecSize ];
-#else
-    char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
-    char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
-#endif
-
-    // Create the program
-
-
-    createFn( programSrc, numElements, type, vecSize, outVectorSize);
-
-    // Create our kernel
-    const char *ptr = programSrc;
-
-    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
-    test_error( error, "Unable to create testing kernel" );
-    if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
-
-    // Get the number of args to differentiate the kernels with local storage. (They have 5)
-    cl_uint numArgs;
-    error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
-    test_error( error, "clGetKernelInfo failed");
-
-    // Set up parameters
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, bufferSize, (void *)inBuffer, &error );
-    test_error( error, "Unable to create kernel stream" );
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(offsets[0]), offsets, &error );
-    test_error( error, "Unable to create kernel stream" );
-    streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
-    test_error( error, "Unable to create kernel stream" );
-    streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
-    test_error( error, "Unable to create kernel stream" );
-
-    // Set parameters and run
-    if (numArgs == 5) {
-        // We need to set the size of the local storage
-        error = clSetKernelArg(kernel, 0, bufferSize, NULL);
-        test_error( error, "clSetKernelArg for buffer failed");
-        for( i = 0; i < 4; i++ )
-        {
-            error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
-            test_error( error, "Unable to set kernel argument" );
-        }
-    } else {
-        // No local storage
-        for( i = 0; i < 4; i++ )
-        {
-            error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
-            test_error( error, "Unable to set kernel argument" );
-        }
-    }
-
-    threads[ 0 ] = numLoads;
-    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
-    test_error( error, "Unable to get local thread size" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to exec kernel" );
-
-    // Get the results
-    error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
-    test_error( error, "Unable to read results" );
-
-
-    // Create the reference results
-    memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
-    for( i = 0; i < numLoads; i++ )
-    {
-        memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
-               typeSize * vecSize );
-    }
-
-    // Validate the results now
-    char *expected = referenceBuffer;
-    char *actual = outBuffer;
-    char *in = (char *)(void *)inBuffer;
-
-    if (DEBUG) {
-        log_info("Memory contents:\n");
-        for (i=0; i<numElements; i++) {
-            char  inString[1024];
-            char expectedString[ 1024 ], actualString[ 1024 ];
-            if (i < numLoads) {
-                log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
-                         GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
-                         GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ),
-                         offsets[i], alignmentOffsets[i]);
-                if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*outVectorSize]), typeSize * vecSize) != 0)
-                    log_error(" << ERROR\n");
-                else
-                    log_info("\n");
-            } else {
-                log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
-                         GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
-                         GetDataVectorString( &(actual[i*typeSize*outVectorSize]), typeSize, vecSize, actualString ));
-            }
-        }
-    }
-
-    for( i = 0; i < numLoads; i++ )
-    {
-        if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
-        {
-            char expectedString[ 1024 ], actualString[ 1024 ];
-            log_error( "ERROR: Data sample %d for vload of %s%d did not validate (expected {%s}, got {%s}, loaded from offset %d)\n",
-                      (int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
-                      GetDataVectorString( actual, typeSize, vecSize, actualString ), (int)offsets[ i ] );
-            return 1;
-        }
-        expected += typeSize * vecSize;
-        actual += typeSize * outVectorSize;
-    }
-
-    return 0;
-}
-
-int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
-{
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
-    unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
-    const char *size_names[] = { "2", "3", "4", "8", "16"};
-    unsigned int typeIdx, sizeIdx;
-    int error = 0;
-    MTdata mtData = init_genrand( gRandomSeed );
-
-    log_info("Testing with buffer size of %d.\n", (int)bufferSize);
-
-    for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
-    {
-
-        if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
-            continue;
-
-        if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
-            continue;
-
-        for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
-        {
-            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
-
-            int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
-            if (error_this_type) {
-                error += error_this_type;
-                log_error("Failure; skipping further sizes for this type.");
-                break;
-            }
-        }
-    }
-
-    free_mtdata(mtData);
-
-    return error;
-}
-
-#pragma mark -------------------- vload test cases --------------------------
-
-void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
-{
-    const char *pattern =
-    "%s%s"
-    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "    %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
-    "   results[ tid ] = tmp;\n"
-    "}\n";
-
-    const char *patternV3 =
-    "%s%s"
-    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "    %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
-    "   results[ 3*tid ] = tmp.s0;\n"
-    "   results[ 3*tid+1 ] = tmp.s1;\n"
-    "   results[ 3*tid+2 ] = tmp.s2;\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-    if(inVectorSize == 3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble ? doubleExtensionPragma : "",
-                "",
-                typeName, typeName, typeName, typeName );
-    } else {
-        sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
-                "",
-                typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
-                (int)inVectorSize, typeName );
-    }
-}
-
-int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    return test_vloadset( device, context, queue, create_global_load_code, 10240 );
-}
-
-
-void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
-{
-    const char *pattern =
-    "%s%s"
-    //"   __local %s%d sSharedStorage[ %d ];\n"
-    "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "   int lid = get_local_id( 0 );\n"
-    "\n"
-    "    if( lid == 0 )\n"
-    "    {\n"
-    "        for( int i = 0; i < %d; i++ )\n"
-    "           sSharedStorage[ i ] = src[ i ];\n"
-    "    }\n"
-    //  Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
-    //  threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
-    //  updated on all threads at that point
-    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    "    %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
-    "   results[ tid ] = tmp;\n"
-    "}\n";
-
-    const char *patternV3 =
-    "%s%s"
-    //"   __local %s%d sSharedStorage[ %d ];\n"
-    "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "   int lid = get_local_id( 0 );\n"
-    "\n"
-    "    if( lid == 0 )\n"
-    "    {\n"
-    "        for( int i = 0; i < %d; i++ ) {\n"
-    "           sSharedStorage[ 3*i   ] = src[ 3*i   ];\n"
-    "           sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
-    "           sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
-    "        }\n"
-    "    }\n"
-    //  Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
-    //  threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
-    //  updated on all threads at that point
-    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    "    %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
-    "   results[ 3*tid   ] = tmp.s0;\n"
-    "   results[ 3*tid +1] = tmp.s1;\n"
-    "   results[ 3*tid +2] = tmp.s2;\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-    if(inVectorSize == 3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble  ? doubleExtensionPragma : "",
-                "",
-                typeName, /*(int)inBufferSize,*/
-                typeName, typeName,
-                (int)inBufferSize,
-                typeName, typeName );
-    } else {
-        sprintf( destBuffer, pattern,
-                type == kDouble  ? doubleExtensionPragma : "",
-                "",
-                typeName, (int)inVectorSize, /*(int)inBufferSize,*/
-                typeName, (int)inVectorSize, typeName, (int)outVectorSize,
-                (int)inBufferSize,
-                typeName, (int)inVectorSize, (int)inVectorSize, typeName );
-    }
-}
-
-int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // Determine the max size of a local buffer that we can test against
-    cl_ulong localSize;
-    int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
-    test_error( error, "Unable to get max size of local memory buffer" );
-    if( localSize > 10240 )
-        localSize = 10240;
-    if (localSize > 4096)
-        localSize -= 2048;
-    else
-        localSize /= 2;
-
-    return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
-}
-
-
-void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
-{
-    const char *pattern =
-    "%s%s"
-    "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "    %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
-    "   results[ tid ] = tmp;\n"
-    "}\n";
-
-    const char *patternV3 =
-    "%s%s"
-    "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "    %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
-    "   results[ 3*tid   ] = tmp.s0;\n"
-    "   results[ 3*tid+1 ] = tmp.s1;\n"
-    "   results[ 3*tid+2 ] = tmp.s2;\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-    if(inVectorSize == 3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble ? doubleExtensionPragma : "",
-                "",
-                typeName, typeName,  typeName,
-                typeName );
-    } else {
-        sprintf( destBuffer, pattern,
-                type == kDouble ? doubleExtensionPragma : "",
-                "",
-                typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
-                (int)inVectorSize, typeName );
-    }
-}
-
-int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // Determine the max size of a local buffer that we can test against
-    cl_ulong maxSize;
-    int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
-    test_error( error, "Unable to get max size of constant memory buffer" );
-    if( maxSize > 10240 )
-        maxSize = 10240;
-    if (maxSize > 4096)
-        maxSize -= 2048;
-    else
-        maxSize /= 2;
-
-    return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
-}
-
-
-void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
-{
-    const char *pattern =
-    "%s%s"
-    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
-    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
-    "#define PRIV_TYPE %s%d\n"
-    "#define PRIV_SIZE %d\n"
-    "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
-    "{\n"
-    "    __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
-    "    int tid = get_global_id( 0 );\n"
-    "\n"
-    "    for( int i = 0; i < %d; i++ )\n"
-    "      sPrivateStorage[ i ] = src[ i ];\n"
-    //    Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
-    //  anybody else to sync up
-    "\n"
-    "    %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
-    "   results[ tid ] = tmp;\n"
-    "}\n";
-
-    const char *patternV3 =
-    "%s%s"
-    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
-    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
-    "#define PRIV_TYPE %s\n"
-    "#define PRIV_SIZE %d\n"
-    "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
-    "{\n"
-    "    __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
-    "    int tid = get_global_id( 0 );\n"
-    "\n"
-    "    for( int i = 0; i < PRIV_SIZE; i++ )\n"
-    "    {\n"
-    "        sPrivateStorage[ i ] = src[ i ];\n"
-    "    }\n"
-    //    Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
-    //  anybody else to sync up
-    "\n"
-    "    %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
-    "   results[ 3*tid   ] = tmp.s0;\n"
-    "   results[ 3*tid+1 ] = tmp.s1;\n"
-    "   results[ 3*tid+2 ] = tmp.s2;\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-    if(inVectorSize ==3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble ? doubleExtensionPragma : "",
-                "",
-                typeName, 3*((int)inBufferSize),
-                typeName, typeName,
-                typeName );
-        // log_info("Src is \"\n%s\n\"\n", destBuffer);
-    } else {
-        sprintf( destBuffer, pattern,
-                type == kDouble ? doubleExtensionPragma : "",
-                "",
-                typeName, (int)inVectorSize, (int)inBufferSize,
-                typeName, (int)inVectorSize, typeName, (int)outVectorSize,
-                (int)inBufferSize,
-                typeName, (int)inVectorSize, (int)inVectorSize, typeName );
-    }
-}
-
-int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // We have no idea how much actual private storage is available, so just pick a reasonable value,
-    // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
-    return test_vloadset( device, context, queue, create_private_load_code, 256 );
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-#pragma mark -------------------- vstore harness --------------------------
-
-typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
-
-int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
-                create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
-{
-    int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[ 3 ];
-
-    size_t threads[ 1 ], localThreads[ 1 ];
-
-    size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
-
-    if (DEBUG)
-        bufferSize = (bufferSize < 128) ? bufferSize : 128;
-
-    typeSize = get_explicit_type_size( type );
-    numElements = bufferSize / ( typeSize * vecSize );
-    bufferSize = numElements * typeSize * vecSize;    // To account for rounding
-    if( numStores > numElements * 2 / 3 )
-    {
-        // Note: unlike load, we have to restrict the # of stores here, since all offsets must be unique for our test
-        // (Plus, we leave some room for extra values to make sure didn't get written)
-        numStores = numElements * 2 / 3;
-        if( numStores < 1 )
-            numStores = 1;
-    }
-    if (DEBUG)
-        log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    cl_uint offsets[ numStores ];
-#else
-    cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
-#endif
-    char programSrc[ 10240 ];
-    size_t i;
-
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    char inBuffer[ numStores * typeSize * vecSize ];
-#else
-    char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
-#endif
-    clProtectedArray outBuffer( numElements * typeSize * vecSize );
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    char referenceBuffer[ numElements * typeSize * vecSize ];
-#else
-    char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
-#endif
-
-    // Create some random input data and random offsets to load from
-    generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
-
-    // Note: make sure no two offsets are the same, otherwise the output would depend on
-    // the order that threads ran in, and that would be next to impossible to verify
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    char flags[ numElements ];
-#else
-    char* flags = (char*)_malloca( numElements * sizeof(char));
-#endif
-
-    memset( flags, 0, numElements * sizeof(char) );
-    for( i = 0; i < numStores; i++ )
-    {
-        do
-        {
-            offsets[ i ] = (cl_uint)random_in_range( 0, (int)numElements - 2, d );    // Note: keep it one vec below the end for offset testing
-        } while( flags[ offsets[ i ] ] != 0 );
-        flags[ offsets[ i ] ] = -1;
-        if (LINEAR_OFFSETS)
-            offsets[i] = (int)i;
-    }
-    if (LINEAR_OFFSETS)
-        log_info("Offsets set to thread IDs to simplify output.\n");
-
-    createFn( programSrc, numElements, type, vecSize );
-
-    // Create our kernel
-    const char *ptr = programSrc;
-    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
-    test_error( error, "Unable to create testing kernel" );
-    if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
-
-    // Get the number of args to differentiate the kernels with local storage. (They have 5)
-    cl_uint numArgs;
-    error = clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);
-    test_error( error, "clGetKernelInfo failed");
-
-    // Set up parameters
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
-    test_error( error, "Unable to create kernel stream" );
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
-    test_error( error, "Unable to create kernel stream" );
-    streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
-    test_error( error, "Unable to create kernel stream" );
-
-    // Set parameters and run
-    if (numArgs == 5)
-    {
-        // We need to set the size of the local storage
-        error = clSetKernelArg(kernel, 0, bufferSize, NULL);
-        test_error( error, "clSetKernelArg for buffer failed");
-        for( i = 0; i < 3; i++ )
-        {
-            error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
-            test_error( error, "Unable to set kernel argument" );
-        }
-    }
-    else
-    {
-        // No local storage
-        for( i = 0; i < 3; i++ )
-        {
-            error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
-            if (error)
-                log_info("%s\n", programSrc);
-            test_error( error, "Unable to set kernel argument" );
-        }
-    }
-
-    threads[ 0 ] = numStores;
-    error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
-    test_error( error, "Unable to get local thread size" );
-
-    // Run in a loop, changing the address offset from 0 to ( vecSize - 1 ) each time, since
-    // otherwise stores might overlap each other, and it'd be a nightmare to test!
-    for( cl_uint addressOffset = 0; addressOffset < vecSize; addressOffset++ )
-    {
-        if (DEBUG)
-            log_info("\tstore addressOffset is %d, executing with threads %d\n", addressOffset, (int)threads[0]);
-
-        // Clear the results first
-        memset( outBuffer, 0, numElements * typeSize * vecSize );
-        error = clEnqueueWriteBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
-        test_error( error, "Unable to erase result stream" );
-
-        // Set up the new offset and run
-        if (numArgs == 5)
-            error = clSetKernelArg( kernel, 3+1, sizeof( cl_uint ), &addressOffset );
-        else
-            error = clSetKernelArg( kernel, 3, sizeof( cl_uint ), &addressOffset );
-        test_error( error, "Unable to set address offset argument" );
-
-        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-        test_error( error, "Unable to exec kernel" );
-
-        // Get the results
-        error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
-        test_error( error, "Unable to read results" );
-
-
-        // Create the reference results
-        memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
-        for( i = 0; i < numStores; i++ )
-        {
-            memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
-        }
-
-        // Validate the results now
-        char *expected = referenceBuffer;
-        char *actual = (char *)(void *)outBuffer;
-
-        if (DEBUG)
-        {
-            log_info("Memory contents:\n");
-            for (i=0; i<numElements; i++)
-            {
-                char  inString[1024];
-                char expectedString[ 1024 ], actualString[ 1024 ];
-                if (i < numStores)
-                {
-                    log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
-                             GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
-                             GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ),
-                             offsets[i]);
-                    if (memcmp(&(expected[i*typeSize*vecSize]), &(actual[i*typeSize*vecSize]), typeSize * vecSize) != 0)
-                        log_error(" << ERROR\n");
-                    else
-                        log_info("\n");
-                }
-                else
-                {
-                    log_info("buffer %3d: input: %s expected: %s got: %s\n", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
-                             GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
-                             GetDataVectorString( &(actual[i*typeSize*vecSize]), typeSize, vecSize, actualString ));
-                }
-            }
-        }
-
-        for( i = 0; i < numElements; i++ )
-        {
-            if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
-            {
-                char expectedString[ 1024 ], actualString[ 1024 ];
-                log_error( "ERROR: Data sample %d for vstore of %s%d did not validate (expected {%s}, got {%s}",
-                          (int)i, get_explicit_type_name( type ), vecSize, GetDataVectorString( expected, typeSize, vecSize, expectedString ),
-                          GetDataVectorString( actual, typeSize, vecSize, actualString ) );
-                size_t j;
-                for( j = 0; j < numStores; j++ )
-                {
-                    if( offsets[ j ] == (cl_uint)i )
-                    {
-                        log_error( ", stored from store #%d (of %d, offset = %d) with address offset of %d", (int)j, (int)numStores, offsets[j], (int)addressOffset );
-                        break;
-                    }
-                }
-                if( j == numStores )
-                    log_error( ", supposed to be canary value" );
-                log_error( ")\n" );
-                return 1;
-            }
-            expected += typeSize * vecSize;
-            actual += typeSize * vecSize;
-        }
-    }
-
-    return 0;
-}
-
-int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
-{
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
-    unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
-    const char *size_names[] = { "2", "3", "4", "8", "16"};
-    unsigned int typeIdx, sizeIdx;
-    int error = 0;
-    MTdata d = init_genrand( gRandomSeed );
-
-    log_info("Testing with buffer size of %d.\n", (int)bufferSize);
-
-    for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
-    {
-        if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
-            continue;
-
-        if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
-            continue;
-
-        for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
-        {
-            log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
-
-            int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
-            if (error_this_type)
-            {
-                log_error("Failure; skipping further sizes for this type.\n");
-                error += error_this_type;
-                break;
-            }
-        }
-    }
-
-    free_mtdata(d);
-    return error;
-}
-
-
-#pragma mark -------------------- vstore test cases --------------------------
-
-void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
-{
-    const char *pattern =
-    "%s"
-    "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "    vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
-    "}\n";
-
-    const char *patternV3 =
-    "%s"
-    "__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    "    if((tid&3) == 0) { // if \"tid\" is a multiple of 4 \n"
-    "      vstore3( srcValues[ 3*(tid>>2) ], offsets[ tid ], destBuffer + alignmentOffset );\n"
-    "    } else {\n"
-    "      vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
-    "    }\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-
-    if(inVectorSize == 3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble ? doubleExtensionPragma : "",
-                typeName, typeName, typeName);
-
-    } else {
-        sprintf( destBuffer, pattern,
-                type == kDouble ? doubleExtensionPragma : "",
-                typeName, (int)inVectorSize, typeName, (int)inVectorSize );
-    }
-    // if(inVectorSize == 3 || inVectorSize == 4) {
-    //     log_info("\n----\n%s\n----\n", destBuffer);
-    // }
-}
-
-int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
-}
-
-
-void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
-{
-    const char *pattern =
-    "%s"
-    "\n"
-    "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
-    " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
-    " sSharedStorage[ offsets[tid] +1 ] =  sSharedStorage[ offsets[tid] ];\n"
-    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    "    vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
-    "\n"
-    // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
-    // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
-    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
-    // otherwise, local threads would be overwriting results from other local threads
-    "  int i;\n"
-    "  __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
-    "  __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
-    "  for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
-    "       dp[i] = sp[i];\n"
-    "}\n";
-
-    const char *patternV3 =
-    "%s"
-    "\n"
-    "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
-    "{\n"
-    "    int tid = get_global_id( 0 );\n"
-    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
-    "    sSharedStorage[ 3*offsets[tid]   ] = (%s)0;\n"
-    "    sSharedStorage[ 3*offsets[tid] +1 ] =  \n"
-    "        sSharedStorage[ 3*offsets[tid] ];\n"
-    "    sSharedStorage[ 3*offsets[tid] +2 ] =  \n"
-    "        sSharedStorage[ 3*offsets[tid]];\n"
-    "    sSharedStorage[ 3*offsets[tid] +3 ] =  \n"
-    "        sSharedStorage[ 3*offsets[tid]];\n"
-    "    sSharedStorage[ 3*offsets[tid] +4 ] =  \n"
-    "        sSharedStorage[ 3*offsets[tid] ];\n"
-    "    sSharedStorage[ 3*offsets[tid] +5 ] =  \n"
-    "        sSharedStorage[ 3*offsets[tid]];\n"
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    " vstore3( vload3(tid,srcValues), offsets[ tid ], sSharedStorage  + alignmentOffset );\n"
-    "\n"
-    // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
-    // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
-    "   barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "\n"
-    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
-    // otherwise, local threads would be overwriting results from other local threads
-    "  int i;\n"
-    "  __local %s *sp =  (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
-    "  __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
-    "  for( i = 0; i < 3; i++ ) \n"
-    "       dp[i] = sp[i];\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-    if(inVectorSize == 3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble ? doubleExtensionPragma : "",
-                typeName,
-                typeName,
-                typeName,  typeName,
-                typeName, typeName, typeName  );
-    } else {
-        sprintf( destBuffer, pattern,
-                type == kDouble ? doubleExtensionPragma : "",
-                typeName, (int)inVectorSize,
-                typeName, (int)inVectorSize, typeName, (int)inVectorSize,
-                typeName, (int)inVectorSize, typeName,
-                (int)inVectorSize, typeName, typeName,
-                typeName, typeName, typeName  );
-    }
-    // log_info(destBuffer);
-}
-
-int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // Determine the max size of a local buffer that we can test against
-    cl_ulong localSize;
-    int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
-    test_error( error, "Unable to get max size of local memory buffer" );
-    if( localSize > 10240 )
-        localSize = 10240;
-    if (localSize > 4096)
-        localSize -= 2048;
-    else
-        localSize /= 2;
-    return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
-}
-
-
-void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
-{
-    const char *pattern =
-    "%s"
-    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
-    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
-    "\n"
-    "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
-    "{\n"
-    "    __private %s%d sPrivateStorage[ %d ];\n"
-    "    int tid = get_global_id( 0 );\n"
-    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
-    " sPrivateStorage[tid] = (%s%d)(%s)0;\n"
-    "\n"
-    "   vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
-    "\n"
-    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
-    // otherwise, local threads would be overwriting results from other local threads
-    "  uint i;\n"
-    "  __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
-    "  __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
-    "  for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
-    "       dp[i] = sp[i];\n"
-    "}\n";
-
-
-    const char *patternV3 =
-    "%s"
-    // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
-    // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
-    "\n"
-    "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
-    "{\n"
-    "    __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
-    "    int tid = get_global_id( 0 );\n"
-    // We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
-    " sPrivateStorage[tid] = (%s3)(%s)0;\n"
-    "\n"
-
-    "   vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
-    "\n"
-    // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
-    // otherwise, local threads would be overwriting results from other local threads
-    "  uint i;\n"
-    "  __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
-    "  __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
-    "  for( i = 0; i < 3; i++ ) \n"
-    "       dp[i] = sp[i];\n"
-    "}\n";
-
-    const char *typeName = get_explicit_type_name(type);
-    if(inVectorSize == 3) {
-        sprintf( destBuffer, patternV3,
-                type == kDouble ? doubleExtensionPragma : "",
-                typeName,  typeName,
-                typeName, (int)inBufferSize,
-                typeName, typeName,
-                typeName, typeName, typeName, typeName, typeName );
-    } else {
-        sprintf( destBuffer, pattern,
-                type == kDouble ? doubleExtensionPragma : "",
-                typeName, (int)inVectorSize, typeName, (int)inVectorSize,
-                typeName, (int)inVectorSize, (int)inBufferSize,
-                typeName, (int)inVectorSize, typeName,
-                (int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
-    }
-}
-
-int test_vstore_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
-{
-    // We have no idea how much actual private storage is available, so just pick a reasonable value,
-    // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
-    return test_vstoreset( device, context, queue, create_private_store_code, 256 );
-}
-
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp
+++ b/test_conformance/compatibility/test_conformance/basic/test_work_item_functions.cpp
@@ -1,177 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-typedef struct work_item_data
-{
-    cl_uint workDim;
-    cl_uint globalSize[ 3 ];
-    cl_uint globalID[ 3 ];
-    cl_uint localSize[ 3 ];
-    cl_uint localID[ 3 ];
-    cl_uint numGroups[ 3 ];
-    cl_uint groupID[ 3 ];
-};
-
-static const char *workItemKernelCode =
-"typedef struct {\n"
-"    uint workDim;\n"
-"    uint globalSize[ 3 ];\n"
-"    uint globalID[ 3 ];\n"
-"    uint localSize[ 3 ];\n"
-"    uint localID[ 3 ];\n"
-"    uint numGroups[ 3 ];\n"
-"    uint groupID[ 3 ];\n"
-" } work_item_data;\n"
-"\n"
-"__kernel void sample_kernel( __global work_item_data *outData )\n"
-"{\n"
-"    int id = get_global_id(0);\n"
-"   outData[ id ].workDim = (uint)get_work_dim();\n"
-"    for( uint i = 0; i < get_work_dim(); i++ )\n"
-"   {\n"
-"       outData[ id ].globalSize[ i ] = (uint)get_global_size( i );\n"
-"       outData[ id ].globalID[ i ] = (uint)get_global_id( i );\n"
-"       outData[ id ].localSize[ i ] = (uint)get_local_size( i );\n"
-"       outData[ id ].localID[ i ] = (uint)get_local_id( i );\n"
-"       outData[ id ].numGroups[ i ] = (uint)get_num_groups( i );\n"
-"       outData[ id ].groupID[ i ] = (uint)get_group_id( i );\n"
-"   }\n"
-"}";
-
-#define NUM_TESTS 1
-
-int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    int error;
-
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper outData;
-    work_item_data    testData[ 10240 ];
-    size_t threads[3], localThreads[3];
-    MTdata d;
-
-
-    error = create_single_kernel_helper( context, &program, &kernel, 1, &workItemKernelCode, "sample_kernel" );
-    test_error( error, "Unable to create testing kernel" );
-
-    outData = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( testData ), NULL, &error );
-    test_error( error, "Unable to create output buffer" );
-
-    error = clSetKernelArg( kernel, 0, sizeof( outData ), &outData );
-    test_error( error, "Unable to set kernel arg" );
-
-    d = init_genrand( gRandomSeed );
-    for( size_t dim = 1; dim <= 3; dim++ )
-    {
-        for( int i = 0; i < NUM_TESTS; i++  )
-        {
-            size_t numItems = 1;
-            for( size_t j = 0; j < dim; j++ )
-            {
-                // All of our thread sizes should be within the max local sizes, since they're all <= 20
-                threads[ j ] = (size_t)random_in_range( 1, 20, d );
-                localThreads[ j ] = threads[ j ] / (size_t)random_in_range( 1, (int)threads[ j ], d );
-                while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
-                    localThreads[ j ]--;
-
-                numItems *= threads[ j ];
-
-                // Hack for now: localThreads > 1 are iffy
-                localThreads[ j ] = 1;
-            }
-            error = clEnqueueNDRangeKernel( queue, kernel, (cl_uint)dim, NULL, threads, localThreads, 0, NULL, NULL );
-            test_error( error, "Unable to run kernel" );
-
-            error = clEnqueueReadBuffer( queue, outData, CL_TRUE, 0, sizeof( testData ), testData, 0, NULL, NULL );
-            test_error( error, "Unable to read results" );
-
-            // Validate
-            for( size_t q = 0; q < threads[0]; q++ )
-            {
-                // We can't really validate the actual value of each one, but we can validate that they're within a sane range
-                if( testData[ q ].workDim != (cl_uint)dim )
-                {
-                    log_error( "ERROR: get_work_dim() did not return proper value for %d dimensions (expected %d, got %d)\n", (int)dim, (int)dim, (int)testData[ q ].workDim );
-                    free_mtdata(d);
-                    return -1;
-                }
-                for( size_t j = 0; j < dim; j++ )
-                {
-                    if( testData[ q ].globalSize[ j ] != (cl_uint)threads[ j ] )
-                    {
-                        log_error( "ERROR: get_global_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
-                                    (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalSize[ j ] );
-                        free_mtdata(d);
-                        return -1;
-                    }
-                    if( testData[ q ].globalID[ j ] < 0 || testData[ q ].globalID[ j ] >= (cl_uint)threads[ j ] )
-                    {
-                        log_error( "ERROR: get_global_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
-                                  (int)j, (int)dim, (int)threads[ j ], (int)testData[ q ].globalID[ j ] );
-                        free_mtdata(d);
-                        return -1;
-                    }
-                    if( testData[ q ].localSize[ j ] != (cl_uint)localThreads[ j ] )
-                    {
-                        log_error( "ERROR: get_local_size(%d) did not return proper value for %d dimensions (expected %d, got %d)\n",
-                                  (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localSize[ j ] );
-                        free_mtdata(d);
-                        return -1;
-                    }
-                    if( testData[ q ].localID[ j ] < 0 && testData[ q ].localID[ j ] >= (cl_uint)localThreads[ j ] )
-                    {
-                        log_error( "ERROR: get_local_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
-                                  (int)j, (int)dim, (int)localThreads[ j ], (int)testData[ q ].localID[ j ] );
-                        free_mtdata(d);
-                        return -1;
-                    }
-                    size_t groupCount = ( threads[ j ] + localThreads[ j ] - 1 ) / localThreads[ j ];
-                    if( testData[ q ].numGroups[ j ] != (cl_uint)groupCount )
-                    {
-                        log_error( "ERROR: get_num_groups(%d) did not return proper value for %d dimensions (expected %d with global dim %d and local dim %d, got %d)\n",
-                                  (int)j, (int)dim, (int)groupCount, (int)threads[ j ], (int)localThreads[ j ], (int)testData[ q ].numGroups[ j ] );
-                        free_mtdata(d);
-                        return -1;
-                    }
-                    if( testData[ q ].groupID[ j ] < 0 || testData[ q ].groupID[ j ] >= (cl_uint)groupCount )
-                    {
-                        log_error( "ERROR: get_group_id(%d) did not return proper value for %d dimensions (max %d, got %d)\n",
-                                  (int)j, (int)dim, (int)groupCount, (int)testData[ q ].groupID[ j ] );
-                        free_mtdata(d);
-                        return -1;
-                    }
-                }
-            }
-        }
-    }
-
-    free_mtdata(d);
-    return 0;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_writeimage_fp32.c
@@ -1,188 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-
-static const char *rgbaFFFF_write_kernel_code =
-"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
-"{\n"
-"    int            tid_x = get_global_id(0);\n"
-"    int            tid_y = get_global_id(1);\n"
-"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
-"    float4         color;\n"
-"\n"
-"    indx *= 4;\n"
-"    color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
-"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static float *
-generate_float_image(int w, int h, MTdata d)
-{
-    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-    return ptr;
-}
-
-static int
-verify_float_image(const char *string, float *image, float *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("%s failed\n", string);
-            return -1;
-        }
-    }
-
-    log_info("%s passed\n", string);
-    return 0;
-}
-
-int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[3];
-    cl_program program;
-    cl_kernel kernel[2];
-    cl_image_format    img_format;
-    float *input_ptr, *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int i, err, any_err = 0;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(float);
-    MTdata d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    input_ptr = generate_float_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (float*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateArray failed\n");
-        return -1;
-    }
-
-  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
-  if (err)
-    return -1;
-    kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)img_width;
-  threads[1] = (unsigned int)img_height;
-
-    for (i=0; i<2; i++)
-    {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clExecuteKernel failed\n");
-            return -1;
-        }
-
-    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clReadImage failed\n");
-            return -1;
-        }
-
-        err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
-                             "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
-                             input_ptr, output_ptr, img_width, img_height);
-        any_err |= err;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return any_err;
-}
-
-
--- a/test_conformance/compatibility/test_conformance/basic/test_writeimage_int16.c
+++ b/test_conformance/compatibility/test_conformance/basic/test_writeimage_int16.c
@@ -1,194 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static const char *rgba16_write_kernel_code =
-"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
-"{\n"
-"    int            tid_x = get_global_id(0);\n"
-"    int            tid_y = get_global_id(1);\n"
-"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
-"    float4         color;\n"
-"\n"
-"    indx *= 4;\n"
-"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
-"    color /= 65535.0f;\n"
-"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static unsigned short *
-generate_16bit_image(int w, int h, MTdata d)
-{
-    cl_ushort  *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (cl_ushort)genrand_int32(d);
-
-    return ptr;
-}
-
-// normalized 16bit ints ... get dived by 64k then muled by 64k...
-// give the poor things some tolerance
-#define MAX_ERR 1
-
-static int
-verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (abs(outptr[i] - image[i]) > MAX_ERR)
-        {
-            log_error("%s failed\n", string);
-            return -1;
-        }
-    }
-
-    log_info("%s passed\n", string);
-    return 0;
-}
-
-int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[3];
-    cl_program program;
-    cl_kernel kernel[2];
-    cl_image_format    img_format;
-    cl_ushort *input_ptr, *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int i, err, any_err = 0;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    MTdata d = init_genrand( gRandomSeed );
-    input_ptr = generate_16bit_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (cl_ushort*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateArray failed\n");
-        return -1;
-    }
-
-  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
-  if (err)
-    return -1;
-    kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)img_width;
-  threads[1] = (unsigned int)img_height;
-
-    for (i=0; i<2; i++)
-    {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clExecuteKernel failed\n");
-            return -1;
-        }
-
-    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clReadImage failed\n");
-            return -1;
-        }
-
-        err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
-                             "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
-                             input_ptr, output_ptr, img_width, img_height);
-        any_err |= err;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return any_err;
-}
-
-