Synchronise with Khronos-private Gitlab branch

The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
2026-03-22 15:19:02 +00:00 · 2019-02-20 16:36:05 +00:00
parent 95196e7fb4
commit d8733efc0f
576 changed files with 212486 additions and 191776 deletions
--- a/test_conformance/commonfns/Jamfile
+++ b/test_conformance/commonfns/Jamfile
@@ -1,33 +1,33 @@
-project
-    : requirements
-      <toolset>gcc:<cflags>-xc++
-      <toolset>msvc:<cflags>"/TP"
-    ;
-
-exe test_commonfns
-    : main.c
-      test_binary_fn.c
-      test_clamp.c
-      test_degrees.c
-      test_fmax.c
-      test_fmaxf.c
-      test_fmin.c
-      test_fminf.c
-      test_max.c
-      test_maxf.c
-      test_min.c
-      test_minf.c
-      test_mix.c
-      test_radians.c
-      test_sign.c
-      test_smoothstep.c
-      test_smoothstepf.c
-      test_step.c
-      test_stepf.c
-    ;
-
-install dist
-    : test_commonfns
-    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/commonfns
-      <variant>release:<location>$(DIST)/release/tests/test_conformance/commonfns
-    ;
+project
+    : requirements
+      <toolset>gcc:<cflags>-xc++
+      <toolset>msvc:<cflags>"/TP"
+    ;
+
+exe test_commonfns
+    : main.c
+      test_binary_fn.c
+      test_clamp.c
+      test_degrees.c
+      test_fmax.c
+      test_fmaxf.c
+      test_fmin.c
+      test_fminf.c
+      test_max.c
+      test_maxf.c
+      test_min.c
+      test_minf.c
+      test_mix.c
+      test_radians.c
+      test_sign.c
+      test_smoothstep.c
+      test_smoothstepf.c
+      test_step.c
+      test_stepf.c
+    ;
+
+install dist
+    : test_commonfns
+    : <variant>debug:<location>$(DIST)/debug/tests/test_conformance/commonfns
+      <variant>release:<location>$(DIST)/release/tests/test_conformance/commonfns
+    ;
--- a/test_conformance/commonfns/Makefile
+++ b/test_conformance/commonfns/Makefile
@@ -1,45 +1,45 @@
-ifdef BUILD_WITH_ATF
-ATF = -framework ATF
-USE_ATF = -DUSE_ATF
-endif
-
-SRCS = main.c test_clamp.c test_degrees.c \
-			test_max.c test_maxf.c test_min.c test_minf.c \
-            test_mix.c test_radians.c test_step.c test_stepf.c\
-            test_smoothstep.c test_smoothstepf.c test_sign.c \
-			test_fmax.c test_fmin.c test_fmaxf.c test_fminf.c test_binary_fn.c \
-		  ../../test_common/harness/errorHelpers.c \
-		  ../../test_common/harness/threadTesting.c \
-		  ../../test_common/harness/testHarness.c \
-                  ../../test_common/harness/conversions.c \
-                  ../../test_common/harness/mt19937.c \
-		  ../../test_common/harness/kernelHelpers.c 
-
-SOURCES = $(abspath $(SRCS))
-
-LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
-LIBPATH += -L.
-FRAMEWORK = $(SOURCES)
-HEADERS = 
-TARGET = test_commonfns
-INCLUDE = 
-COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
-CC = c++
-CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
-CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
-LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${RC_CFLAGS} ${ATF}
-
-OBJECTS := ${SOURCES:.c=.o}
-OBJECTS := ${OBJECTS:.cpp=.o}
-
-TARGETOBJECT =
-all: $(TARGET)
-
-$(TARGET): $(OBJECTS)
-	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
-
-clean:
-	rm -f $(TARGET) $(OBJECTS)
-
-.DEFAULT:
-	@echo The target \"$@\" does not exist in Makefile.
+ifdef BUILD_WITH_ATF
+ATF = -framework ATF
+USE_ATF = -DUSE_ATF
+endif
+
+SRCS = main.c test_clamp.c test_degrees.c \
+			test_max.c test_maxf.c test_min.c test_minf.c \
+            test_mix.c test_radians.c test_step.c test_stepf.c\
+            test_smoothstep.c test_smoothstepf.c test_sign.c \
+			test_fmax.c test_fmin.c test_fmaxf.c test_fminf.c test_binary_fn.c \
+		  ../../test_common/harness/errorHelpers.c \
+		  ../../test_common/harness/threadTesting.c \
+		  ../../test_common/harness/testHarness.c \
+                  ../../test_common/harness/conversions.c \
+                  ../../test_common/harness/mt19937.c \
+		  ../../test_common/harness/kernelHelpers.c 
+
+SOURCES = $(abspath $(SRCS))
+
+LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
+LIBPATH += -L.
+FRAMEWORK = $(SOURCES)
+HEADERS = 
+TARGET = test_commonfns
+INCLUDE = 
+COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
+CC = c++
+CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
+CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
+LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${RC_CFLAGS} ${ATF}
+
+OBJECTS := ${SOURCES:.c=.o}
+OBJECTS := ${OBJECTS:.cpp=.o}
+
+TARGETOBJECT =
+all: $(TARGET)
+
+$(TARGET): $(OBJECTS)
+	$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
+
+clean:
+	rm -f $(TARGET) $(OBJECTS)
+
+.DEFAULT:
+	@echo The target \"$@\" does not exist in Makefile.
--- a/test_conformance/commonfns/main.c
+++ b/test_conformance/commonfns/main.c
@@ -1,95 +1,95 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <math.h>
-#include <string.h>
-#include "procs.h"
-
-#include "../../test_common/harness/compat.h"
-
-int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
-int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
-
-static void initVecSizes() {
-    int i;
-    for(i = 0; i < kVectorSizeCount; ++i) {
-	g_arrVecSizes[i] = (1<<i);
-    }
-    for(; i < kVectorSizeCount + kStrangeVectorSizeCount; ++i) {
-	g_arrVecSizes[i] = g_arrStrangeVectorSizes[i-kVectorSizeCount];
-    }
-}
-
-
-basefn	commonfn_list[] = {
-				test_clamp,
-				test_degrees,
-				test_fmax,
-				test_fmaxf,
-				test_fmin,
-				test_fminf,
-				test_max,
-				test_maxf,
-				test_min,
-				test_minf,
-				test_mix,
-				test_radians,
-				test_step,
-				test_stepf,
-				test_smoothstep,
-				test_smoothstepf,
-				test_sign,
-};
-
-const char *commonfn_names[] = {
-	"clamp",
-	"degrees",
-	"fmax",
-	"fmaxf",
-	"fmin",
-	"fminf",
-	"max",
-	"maxf",
-	"min",
-	"minf",
-	"mix",
-	"radians",
-	"step",
-	"stepf",
-	"smoothstep",
-	"smoothstepf",
-	"sign",
-	"all",
-};
-
-ct_assert((sizeof(commonfn_names) / sizeof(commonfn_names[0]) - 1) == (sizeof(commonfn_list) / sizeof(commonfn_list[0])));
-
-int	num_commonfns = sizeof(commonfn_names) / sizeof(char *);
-
-int
-main(int argc, const char *argv[])
-{
-    initVecSizes();
-    return runTestHarness( argc, argv, num_commonfns, commonfn_list, commonfn_names, false, false, 0 );
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+#include "procs.h"
+
+#include "../../test_common/harness/compat.h"
+
+int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
+int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
+
+static void initVecSizes() {
+    int i;
+    for(i = 0; i < kVectorSizeCount; ++i) {
+    g_arrVecSizes[i] = (1<<i);
+    }
+    for(; i < kVectorSizeCount + kStrangeVectorSizeCount; ++i) {
+    g_arrVecSizes[i] = g_arrStrangeVectorSizes[i-kVectorSizeCount];
+    }
+}
+
+
+basefn    commonfn_list[] = {
+                test_clamp,
+                test_degrees,
+                test_fmax,
+                test_fmaxf,
+                test_fmin,
+                test_fminf,
+                test_max,
+                test_maxf,
+                test_min,
+                test_minf,
+                test_mix,
+                test_radians,
+                test_step,
+                test_stepf,
+                test_smoothstep,
+                test_smoothstepf,
+                test_sign,
+};
+
+const char *commonfn_names[] = {
+    "clamp",
+    "degrees",
+    "fmax",
+    "fmaxf",
+    "fmin",
+    "fminf",
+    "max",
+    "maxf",
+    "min",
+    "minf",
+    "mix",
+    "radians",
+    "step",
+    "stepf",
+    "smoothstep",
+    "smoothstepf",
+    "sign",
+    "all",
+};
+
+ct_assert((sizeof(commonfn_names) / sizeof(commonfn_names[0]) - 1) == (sizeof(commonfn_list) / sizeof(commonfn_list[0])));
+
+int    num_commonfns = sizeof(commonfn_names) / sizeof(char *);
+
+int
+main(int argc, const char *argv[])
+{
+    initVecSizes();
+    return runTestHarness( argc, argv, num_commonfns, commonfn_list, commonfn_names, false, false, 0 );
+}
+
+
--- a/test_conformance/commonfns/procs.h
+++ b/test_conformance/commonfns/procs.h
@@ -1,54 +1,54 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "../../test_common/harness/testHarness.h"
-#include "../../test_common/harness/kernelHelpers.h"
-#include "../../test_common/harness/errorHelpers.h"
-#include "../../test_common/harness/conversions.h"
-#include "../../test_common/harness/mt19937.h"
-
-#define kVectorSizeCount 5
-#define kStrangeVectorSizeCount 1
-#define kTotalVecCount (kVectorSizeCount + kStrangeVectorSizeCount)
-
-extern int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
-// int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
-
-extern int		test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_max(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-extern int		test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
-
-typedef int     (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize );
-typedef int     (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize );
-
-extern int      test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
-						   const char *fnName, bool vectorSecondParam,
-						   binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn );
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/testHarness.h"
+#include "../../test_common/harness/kernelHelpers.h"
+#include "../../test_common/harness/errorHelpers.h"
+#include "../../test_common/harness/conversions.h"
+#include "../../test_common/harness/mt19937.h"
+
+#define kVectorSizeCount 5
+#define kStrangeVectorSizeCount 1
+#define kTotalVecCount (kVectorSizeCount + kStrangeVectorSizeCount)
+
+extern int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
+// int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
+
+extern int        test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_max(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int        test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+
+typedef int     (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize );
+typedef int     (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize );
+
+extern int      test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
+                           const char *fnName, bool vectorSecondParam,
+                           binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn );
+
+
--- a/test_conformance/commonfns/test_binary_fn.c
+++ b/test_conformance/commonfns/test_binary_fn.c
@@ -1,265 +1,265 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-const char *binary_fn_code_pattern = 
-"%s\n" /* optional pragma */
-"__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s *dst)\n"    
-"{\n"    
-"    int  tid = get_global_id(0);\n"    
-"\n"                                        
-"    dst[tid] = %s(x[tid], y[tid]);\n"    
-"}\n";
-
-const char *binary_fn_code_pattern_v3 = 
-"%s\n" /* optional pragma */
-"__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"    
-"{\n"    
-"    int  tid = get_global_id(0);\n"    
-"\n"                                        
-"    vstore3(%s(vload3(tid,x), vload3(tid,y) ), tid, dst);\n"    
-"}\n";
-
-const char *binary_fn_code_pattern_v3_scalar = 
-"%s\n" /* optional pragma */
-"__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"    
-"{\n"    
-"    int  tid = get_global_id(0);\n"    
-"\n"                                        
-"    vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"    
-"}\n";
-
-int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
-                    const char *fnName, bool vectorSecondParam,
-                    binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn )
-{
-    cl_mem      streams[6];
-    cl_float      *input_ptr[2], *output_ptr;
-    cl_double     *input_ptr_double[2], *output_ptr_double=NULL;
-    cl_program  *program;
-    cl_kernel   *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i, j;
-    MTdata d;
-    
-  	program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
-  	kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
-  
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-    
-    int test_double = 0;
-    if(is_extension_available( device, "cl_khr_fp64" )) 
-    {
-        log_info("Testing doubles.\n");
-        test_double = 1;
-    }  
-  
-    for( i = 0; i < 2; i++ )
-    {
-        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    }
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    
-    for( i = 0; i < 3; i++ )
-    {
-        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, &err );
-        test_error( err, "clCreateBuffer failed");
-    }
-    
-    if (test_double) 
-        for( i = 3; i < 6; i++ )
-        {
-          streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, &err );
-          test_error( err, "clCreateBuffer failed");
-        }
-    
-    d = init_genrand( gRandomSeed );
-    for( j = 0; j < num_elements; j++ )
-    {
-        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
-        if (test_double) 
-        {
-            input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
-            input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
-        }
-    }
-    free_mtdata(d);     d = NULL;
-    
-    for( i = 0; i < 2; i++ )
-    {
-        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
-        
-        if (test_double) 
-        {
-          err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
-          test_error( err, "Unable to write input buffer" );
-        }
-    }
-    
-    for( i = 0; i < kTotalVecCount; i++ )
-    {
-        char programSrc[ 10240 ];
-        char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" };
-        
-		if(i >= kVectorSizeCount) {
-			// do vec3 print
-		    
-		    if(vectorSecondParam) {
-			sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName );
-		} else  {
-			sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName );
-		    }
-		} else  {
-			// do regular
-			sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName );
-		}
-        const char *ptr = programSrc;
-        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" );
-        test_error( err, "Unable to create kernel" );
-        
-        if (test_double) 
-        {
-	    if(i >= kVectorSizeCount) {
-		if(vectorSecondParam) {
-		    sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-            "double",  "double",  "double",  fnName );
-		} else {
-
-		sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-			     "double",  "double",  "double",  fnName );
-		}
-	    } else {
-		sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-            "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName );
-	    }
-            ptr = programSrc;
-            err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" );
-            test_error( err, "Unable to create kernel" );
-        }
-    }
-    
-    for( i = 0; i < kTotalVecCount; i++ )
-    {
-        for( j = 0; j < 3; j++ )
-        {
-            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
-            test_error( err, "Unable to set kernel argument" );
-        }
-        
-        threads[0] = (size_t)n_elems;
-        
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        test_error( err, "Unable to execute kernel" );
-        
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        test_error( err, "Unable to read results" );
-        
-		
-
-        if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) )
-        {
-            log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
-            err = -1;
-        }
-        else
-        {
-            log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
-            err = 0;
-        }
-        
-        if (err)
-            break;
-    }
-    
-    if (test_double) 
-    {
-        for( i = 0; i < kTotalVecCount; i++ )
-        {
-            for( j = 0; j < 3; j++ )
-            {
-                err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] );
-                test_error( err, "Unable to set kernel argument" );
-            }
-
-            threads[0] = (size_t)n_elems;
-
-            err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            test_error( err, "Unable to execute kernel" );
-
-            err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
-            test_error( err, "Unable to read results" );
-
-            if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i]))))
-            {
-                log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
-                err = -1;
-            }
-            else
-            {
-                log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
-                err = 0;
-            }
-
-            if (err)
-            break;
-        }
-    }
-    
-    
-    for( i = 0; i < ((test_double) ? 6 : 3); i++ )
-    {
-        clReleaseMemObject(streams[i]);
-    }
-    for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-  	free(program);
-  	free(kernel);
-  
-    if (test_double) 
-    {
-        free(input_ptr_double[0]);
-        free(input_ptr_double[1]);
-        free(output_ptr_double);
-    }
-    
-    return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *binary_fn_code_pattern =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = %s(x[tid], y[tid]);\n"
+"}\n";
+
+const char *binary_fn_code_pattern_v3 =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(%s(vload3(tid,x), vload3(tid,y) ), tid, dst);\n"
+"}\n";
+
+const char *binary_fn_code_pattern_v3_scalar =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s *x, __global %s *y, __global %s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
+"}\n";
+
+int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
+                    const char *fnName, bool vectorSecondParam,
+                    binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn )
+{
+    cl_mem      streams[6];
+    cl_float      *input_ptr[2], *output_ptr;
+    cl_double     *input_ptr_double[2], *output_ptr_double=NULL;
+    cl_program  *program;
+    cl_kernel   *kernel;
+    size_t threads[1];
+    int num_elements;
+    int err;
+    int i, j;
+    MTdata d;
+
+      program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
+      kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    int test_double = 0;
+    if(is_extension_available( device, "cl_khr_fp64" ))
+    {
+        log_info("Testing doubles.\n");
+        test_double = 1;
+    }
+
+    for( i = 0; i < 2; i++ )
+    {
+        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    }
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+
+    for( i = 0; i < 3; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, &err );
+        test_error( err, "clCreateBuffer failed");
+    }
+
+    if (test_double)
+        for( i = 3; i < 6; i++ )
+        {
+          streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, &err );
+          test_error( err, "clCreateBuffer failed");
+        }
+
+    d = init_genrand( gRandomSeed );
+    for( j = 0; j < num_elements; j++ )
+    {
+        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
+        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
+        if (test_double)
+        {
+            input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+        }
+    }
+    free_mtdata(d);     d = NULL;
+
+    for( i = 0; i < 2; i++ )
+    {
+        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
+        test_error( err, "Unable to write input buffer" );
+
+        if (test_double)
+        {
+          err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
+          test_error( err, "Unable to write input buffer" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        char programSrc[ 10240 ];
+        char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" };
+
+        if(i >= kVectorSizeCount) {
+            // do vec3 print
+
+            if(vectorSecondParam) {
+            sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName );
+        } else  {
+            sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName );
+            }
+        } else  {
+            // do regular
+            sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName );
+        }
+        const char *ptr = programSrc;
+        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" );
+        test_error( err, "Unable to create kernel" );
+
+        if (test_double)
+        {
+        if(i >= kVectorSizeCount) {
+        if(vectorSecondParam) {
+            sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
+            "double",  "double",  "double",  fnName );
+        } else {
+
+        sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
+                 "double",  "double",  "double",  fnName );
+        }
+        } else {
+        sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
+            "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName );
+        }
+            ptr = programSrc;
+            err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" );
+            test_error( err, "Unable to create kernel" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        for( j = 0; j < 3; j++ )
+        {
+            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
+            test_error( err, "Unable to set kernel argument" );
+        }
+
+        threads[0] = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( err, "Unable to execute kernel" );
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        test_error( err, "Unable to read results" );
+
+
+
+        if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) )
+        {
+            log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            err = -1;
+        }
+        else
+        {
+            log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            err = 0;
+        }
+
+        if (err)
+            break;
+    }
+
+    if (test_double)
+    {
+        for( i = 0; i < kTotalVecCount; i++ )
+        {
+            for( j = 0; j < 3; j++ )
+            {
+                err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] );
+                test_error( err, "Unable to set kernel argument" );
+            }
+
+            threads[0] = (size_t)n_elems;
+
+            err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( err, "Unable to execute kernel" );
+
+            err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
+            test_error( err, "Unable to read results" );
+
+            if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i]))))
+            {
+                log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
+                err = -1;
+            }
+            else
+            {
+                log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
+                err = 0;
+            }
+
+            if (err)
+            break;
+        }
+    }
+
+
+    for( i = 0; i < ((test_double) ? 6 : 3); i++ )
+    {
+        clReleaseMemObject(streams[i]);
+    }
+    for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+      free(program);
+      free(kernel);
+
+    if (test_double)
+    {
+        free(input_ptr_double[0]);
+        free(input_ptr_double[1]);
+        free(output_ptr_double);
+    }
+
+    return err;
+}
+
+
--- a/test_conformance/commonfns/test_clamp.c
+++ b/test_conformance/commonfns/test_clamp.c
@@ -1,318 +1,318 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "../../test_common/harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-#define CLAMP_KERNEL( type )						\
-    const char *clamp_##type##_kernel_code =				\
-	EMIT_PRAGMA_DIRECTIVE						\
-	"__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
-	"{\n"								\
-	"    int  tid = get_global_id(0);\n"				\
-	"\n"								\
-	"    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"	\
-	"}\n";
-
-#define CLAMP_KERNEL_V( type, size)					\
-    const char *clamp_##type##size##_kernel_code =			\
-	EMIT_PRAGMA_DIRECTIVE						\
-	"__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \
-	"{\n"								\
-	"    int  tid = get_global_id(0);\n"				\
-	"\n"								\
-	"    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"	\
-	"}\n";
-
-#define CLAMP_KERNEL_V3( type, size)					\
-    const char *clamp_##type##size##_kernel_code =			\
-	EMIT_PRAGMA_DIRECTIVE						\
-	"__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
-	"{\n"								\
-	"    int  tid = get_global_id(0);\n"				\
-	"\n"								\
-	"    vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n"	\
-	"}\n";
-
-#define EMIT_PRAGMA_DIRECTIVE " "
-CLAMP_KERNEL( float )
-CLAMP_KERNEL_V( float, 2 )
-CLAMP_KERNEL_V( float, 4 )
-CLAMP_KERNEL_V( float, 8 )
-CLAMP_KERNEL_V( float, 16 )
-CLAMP_KERNEL_V3( float, 3)
-#undef EMIT_PRAGMA_DIRECTIVE
-
-#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-CLAMP_KERNEL( double )
-CLAMP_KERNEL_V( double, 2 )
-CLAMP_KERNEL_V( double, 4 )
-CLAMP_KERNEL_V( double, 8 )
-CLAMP_KERNEL_V( double, 16 )
-CLAMP_KERNEL_V3( double, 3 )
-#undef EMIT_PRAGMA_DIRECTIVE
-
-const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code };
-const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code };
-
-static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n)
-{
-    float       t;
-    int         i;
-
-    for (i=0; i<n; i++)
-	{
-	    t = fminf( fmaxf( x[ i ], minval[ i ] ), maxval[ i ] );
-	    if (t != outptr[i])
-		{
-		    log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
-		    return -1;
-		}
-	}
-
-    return 0;
-}
-
-static int verify_clamp_double(double *x, double *minval, double *maxval, double *outptr, int n)
-{
-    double       t;
-    int         i;
-	
-    for (i=0; i<n; i++)
-	{
-	    t = fmin( fmax( x[ i ], minval[ i ] ), maxval[ i ] );
-	    if (t != outptr[i])
-		{
-		    log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
-		    return -1;
-		}
-	}
-	
-    return 0;
-}
-
-int
-test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[8];
-    cl_float      *input_ptr[3], *output_ptr;
-    cl_double     *input_ptr_double[3], *output_ptr_double = NULL;
-    cl_program  *program;
-    cl_kernel   *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i, j;
-    MTdata d;
-  
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
-
-    num_elements = n_elems * (1 << (kVectorSizeCount-1));
-
-    int test_double = 0;
-    if(is_extension_available( device, "cl_khr_fp64" )) {
-	log_info("Testing doubles.\n");
-  	test_double = 1;
-    }
-  
-  
-    // why does this go from 0 to 2?? -- Oh, I see, there are four function
-    // arguments to the function, and 3 of them are inputs?
-    for( i = 0; i < 3; i++ )
-	{
-	    input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	    if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-	}
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-
-    // why does this go from 0 to 3?
-    for( i = 0; i < 4; i++ )
-	{
-	    streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	    if (!streams[0])
-		{
-		    log_error("clCreateBuffer failed\n");
-		    return -1;
-		}
-	}
-    if (test_double)
-	for( i = 4; i < 8; i++ )
-	    {
-		streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-		if (!streams[0])
-		    {
-			log_error("clCreateBuffer failed\n");
-			return -1;
-		    }
-	    }
-  
-    d = init_genrand( gRandomSeed );
-    for( j = 0; j < num_elements; j++ )
-	{
-	    input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
-	    input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
-	    input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d);
-		
-	    if (test_double) {
-		input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
-		input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
-		input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d);
-	    }
-	}
-    free_mtdata(d); d = NULL;
-  
-    for( i = 0; i < 3; i++ )
-	{
-	    err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
-	    test_error( err, "Unable to write input buffer" );
-
-	    if (test_double) {
-		err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
-		test_error( err, "Unable to write input buffer" );
-	    }
-	}
-	
-    for( i = 0; i < kTotalVecCount; i++ )
-	{
-	    err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" );
-	    test_error( err, "Unable to create kernel" );
-
-		log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i);
-		fflush(stdout);
-    
-	    if (test_double) {
-		err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" );
-		log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i);
-		fflush(stdout);
-		test_error( err, "Unable to create kernel" );
-	    }
-	}
-  
-    for( i = 0; i < kTotalVecCount; i++ )
-	{
-	    for( j = 0; j < 4; j++ )
-		{
-		    err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
-		    test_error( err, "Unable to set kernel argument" );
-		}
-	
-	    threads[0] = (size_t)n_elems;
-		
-	    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-	    test_error( err, "Unable to execute kernel" );
-		
-	    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-	    test_error( err, "Unable to read results" );
-		
-	    if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i]))))
-		{
-		    log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i])));
-		    err = -1;
-		}
-	    else
-		{
-		    log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i])));
-		    err = 0;
-		}
-
-	    
-		
-	    if (err)
-		break;
-	}
-
-    // If the device supports double precision then test that
-    if (test_double) 
-	{
-	    for( ; i < 2*kTotalVecCount; i++ )
-		{
-
-		    log_info("Start of test_double loop, i is %d\n", i);
-		    for( j = 0; j < 4; j++ )
-			{
-			    err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] );
-			    test_error( err, "Unable to set kernel argument" );
-			}
-          
-		    threads[0] = (size_t)n_elems;
-          
-		    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-		    test_error( err, "Unable to execute kernel" );
-          
-		    err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
-		    test_error( err, "Unable to read results" );
-          
-		    if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)]))
-			{
-			    log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]);
-			    err = -1;
-			}
-		    else
-			{
-			    log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]);
-			    err = 0;
-			}
-          
-		    if (err)
-			break;
-		}
-	}
-	
-  
-    for( i = 0; i < ((test_double) ? 8 : 4); i++ )
-	{
-	    clReleaseMemObject(streams[i]);
-	}
-    for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++)
-	{
-	    clReleaseKernel(kernel[i]);
-	    clReleaseProgram(program[i]);
-	}
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    free(program);
-    free(kernel);
-    if (test_double) {
-        free(input_ptr_double[0]);
-        free(input_ptr_double[1]);
-        free(input_ptr_double[2]);
-        free(output_ptr_double);
-    }
-	
-    return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../../test_common/harness/compat.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288
+#endif
+
+#define CLAMP_KERNEL( type )                        \
+    const char *clamp_##type##_kernel_code =                \
+    EMIT_PRAGMA_DIRECTIVE                        \
+    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
+    "{\n"                                \
+    "    int  tid = get_global_id(0);\n"                \
+    "\n"                                \
+    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
+    "}\n";
+
+#define CLAMP_KERNEL_V( type, size)                    \
+    const char *clamp_##type##size##_kernel_code =            \
+    EMIT_PRAGMA_DIRECTIVE                        \
+    "__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \
+    "{\n"                                \
+    "    int  tid = get_global_id(0);\n"                \
+    "\n"                                \
+    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
+    "}\n";
+
+#define CLAMP_KERNEL_V3( type, size)                    \
+    const char *clamp_##type##size##_kernel_code =            \
+    EMIT_PRAGMA_DIRECTIVE                        \
+    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
+    "{\n"                                \
+    "    int  tid = get_global_id(0);\n"                \
+    "\n"                                \
+    "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n"    \
+    "}\n";
+
+#define EMIT_PRAGMA_DIRECTIVE " "
+CLAMP_KERNEL( float )
+CLAMP_KERNEL_V( float, 2 )
+CLAMP_KERNEL_V( float, 4 )
+CLAMP_KERNEL_V( float, 8 )
+CLAMP_KERNEL_V( float, 16 )
+CLAMP_KERNEL_V3( float, 3)
+#undef EMIT_PRAGMA_DIRECTIVE
+
+#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+CLAMP_KERNEL( double )
+CLAMP_KERNEL_V( double, 2 )
+CLAMP_KERNEL_V( double, 4 )
+CLAMP_KERNEL_V( double, 8 )
+CLAMP_KERNEL_V( double, 16 )
+CLAMP_KERNEL_V3( double, 3 )
+#undef EMIT_PRAGMA_DIRECTIVE
+
+const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code };
+const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code };
+
+static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n)
+{
+    float       t;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        t = fminf( fmaxf( x[ i ], minval[ i ] ), maxval[ i ] );
+        if (t != outptr[i])
+        {
+            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int verify_clamp_double(double *x, double *minval, double *maxval, double *outptr, int n)
+{
+    double       t;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        t = fmin( fmax( x[ i ], minval[ i ] ), maxval[ i ] );
+        if (t != outptr[i])
+        {
+            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+int
+test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem      streams[8];
+    cl_float      *input_ptr[3], *output_ptr;
+    cl_double     *input_ptr_double[3], *output_ptr_double = NULL;
+    cl_program  *program;
+    cl_kernel   *kernel;
+    size_t threads[1];
+    int num_elements;
+    int err;
+    int i, j;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
+
+    num_elements = n_elems * (1 << (kVectorSizeCount-1));
+
+    int test_double = 0;
+    if(is_extension_available( device, "cl_khr_fp64" )) {
+    log_info("Testing doubles.\n");
+      test_double = 1;
+    }
+
+
+    // why does this go from 0 to 2?? -- Oh, I see, there are four function
+    // arguments to the function, and 3 of them are inputs?
+    for( i = 0; i < 3; i++ )
+    {
+        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    }
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+
+    // why does this go from 0 to 3?
+    for( i = 0; i < 4; i++ )
+    {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+        if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+    }
+    if (test_double)
+    for( i = 4; i < 8; i++ )
+        {
+        streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+        if (!streams[0])
+            {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+            }
+        }
+
+    d = init_genrand( gRandomSeed );
+    for( j = 0; j < num_elements; j++ )
+    {
+        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
+        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
+        input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d);
+
+        if (test_double) {
+        input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+        input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+        input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d);
+        }
+    }
+    free_mtdata(d); d = NULL;
+
+    for( i = 0; i < 3; i++ )
+    {
+        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
+        test_error( err, "Unable to write input buffer" );
+
+        if (test_double) {
+        err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
+        test_error( err, "Unable to write input buffer" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" );
+        test_error( err, "Unable to create kernel" );
+
+        log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i);
+        fflush(stdout);
+
+        if (test_double) {
+        err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" );
+        log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i);
+        fflush(stdout);
+        test_error( err, "Unable to create kernel" );
+        }
+    }
+
+    for( i = 0; i < kTotalVecCount; i++ )
+    {
+        for( j = 0; j < 4; j++ )
+        {
+            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
+            test_error( err, "Unable to set kernel argument" );
+        }
+
+        threads[0] = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        test_error( err, "Unable to execute kernel" );
+
+        err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        test_error( err, "Unable to read results" );
+
+        if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        {
+            log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i])));
+            err = 0;
+        }
+
+
+
+        if (err)
+        break;
+    }
+
+    // If the device supports double precision then test that
+    if (test_double)
+    {
+        for( ; i < 2*kTotalVecCount; i++ )
+        {
+
+            log_info("Start of test_double loop, i is %d\n", i);
+            for( j = 0; j < 4; j++ )
+            {
+                err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] );
+                test_error( err, "Unable to set kernel argument" );
+            }
+
+            threads[0] = (size_t)n_elems;
+
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            test_error( err, "Unable to execute kernel" );
+
+            err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
+            test_error( err, "Unable to read results" );
+
+            if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)]))
+            {
+                log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]);
+                err = -1;
+            }
+            else
+            {
+                log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]);
+                err = 0;
+            }
+
+            if (err)
+            break;
+        }
+    }
+
+
+    for( i = 0; i < ((test_double) ? 8 : 4); i++ )
+    {
+        clReleaseMemObject(streams[i]);
+    }
+    for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+    free(program);
+    free(kernel);
+    if (test_double) {
+        free(input_ptr_double[0]);
+        free(input_ptr_double[1]);
+        free(input_ptr_double[2]);
+        free(output_ptr_double);
+    }
+
+    return err;
+}
+
+
--- a/test_conformance/commonfns/test_degrees.c
+++ b/test_conformance/commonfns/test_degrees.c
@@ -1,477 +1,477 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *degrees_kernel_code = 
-"__kernel void test_degrees(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees2_kernel_code = 
-"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees4_kernel_code = 
-"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees8_kernel_code = 
-"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees16_kernel_code = 
-"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees3_kernel_code = 
-"__kernel void test_degrees3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static int
-verify_degrees(float *inptr, float *outptr, int n)
-{
-	float error, max_error = 0.0f;
-	double   r, max_val = NAN;
-	int     i, j, max_index = 0;
-    
-	for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (180.0 / M_PI) * inptr[i];
-        error = Ulp_Error( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-    
-	log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-    
-	return 0;
-}
-
-int
-test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-	cl_mem       streams[2];
-	cl_float     *input_ptr[1], *output_ptr, *p;
-	cl_program   *program;
-	cl_kernel    *kernel;
-	void        *values[2];
-	size_t threads[1];
-	int          num_elements;
-	int          err;
-	int          i;
-	MTdata        d;
-    
-	program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-	kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-    
-	num_elements = n_elems * (1 << (kTotalVecCount-1));
-    
-	input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    
-	streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    
-	p = input_ptr[0];
-	d = init_genrand( gRandomSeed );
-	for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
-    }
-	free_mtdata(d); d = NULL;
-    
-	err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    
-	err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
-	if (err)
-		return -1;
-    
-	values[0] = streams[0];
-	values[1] = streams[1];
-	for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-    
-	for (i=0; i < kTotalVecCount; i++)
-    {
-        
-        // Line below is troublesome...
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-        
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-        
-        if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
-        }
-        
-        if (err)
-            break;
-    }
-    
-	clReleaseMemObject(streams[0]);
-	clReleaseMemObject(streams[1]);
-	for (i=0; i < kTotalVecCount; i++) {
-		clReleaseKernel(kernel[i]);
-		clReleaseProgram(program[i]);
-	}
-	free(program);
-	free(kernel);
-	free(input_ptr[0]);
-	free(output_ptr);
-    
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ) )
-    {
-        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
-        return 0;
-    }
-
-    return test_degrees_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-const char *degrees_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees2_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees4_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees8_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees16_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees3_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static int
-verify_degrees_double(double *inptr, double *outptr, int n)
-{
-	float error, max_error = 0.0f;
-	double   r, max_val = NAN;
-	int     i, j, max_index = 0;
-    
-	for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
-        error = Ulp_Error_Double( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-    
-	log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-    
-	return 0;
-}
-
-static int
-test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-	cl_mem       streams[2];
-	cl_double    *input_ptr[1], *output_ptr, *p;
-	cl_program   *program;
-	cl_kernel    *kernel;
-	void        *values[2];
-	size_t threads[1];
-	int          num_elements;
-	int          err;
-	int          i;
-	MTdata        d;
-    
-	program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-	kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-    
-	// TODO: line below is clearly wrong
-	num_elements = n_elems * (1 << (kTotalVecCount-1));
-    
-	input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-	output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-	streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-	if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    
-	streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-	if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    
-	p = input_ptr[0];
-	d = init_genrand( gRandomSeed );
-	for (i=0; i<num_elements; i++)
-        p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
-    
-	free_mtdata(d); d = NULL;
-    
-	err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    
-	err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
-	if (err)
-		return -1;
-	err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
-	if (err)
-		return -1;
-    
-	values[0] = streams[0];
-	values[1] = streams[1];
-	for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-    
-	for (i=0; i < kTotalVecCount; i++)
-    {
-        
-        // Line below is troublesome...
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-        
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-        
-        if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
-        }
-        
-        if (err)
-            break;
-    }
-    
-	clReleaseMemObject(streams[0]);
-	clReleaseMemObject(streams[1]);
-	for (i=0; i < kTotalVecCount; i++) {
-		clReleaseKernel(kernel[i]);
-		clReleaseProgram(program[i]);
-	}
-	free(program);
-	free(kernel);
-	free(input_ptr[0]);
-	free(output_ptr);
-    
-	return err;
-}
-
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288
+#endif
+
+static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *degrees_kernel_code =
+"__kernel void test_degrees(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees2_kernel_code =
+"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees4_kernel_code =
+"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees8_kernel_code =
+"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees16_kernel_code =
+"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees3_kernel_code =
+"__kernel void test_degrees3(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static int
+verify_degrees(float *inptr, float *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (180.0 / M_PI) * inptr[i];
+        error = Ulp_Error( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+int
+test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[2];
+    size_t threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata        d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+
+        // Line below is troublesome...
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    if( err )
+        return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64" ) )
+    {
+        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
+        return 0;
+    }
+
+    return test_degrees_double( device, context, queue, n_elems);
+}
+
+#pragma mark -
+
+const char *degrees_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = degrees(src[tid]);\n"
+"}\n";
+
+const char *degrees3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static int
+verify_degrees_double(double *inptr, double *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
+        error = Ulp_Error_Double( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+static int
+test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_double    *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[2];
+    size_t threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata        d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    // TODO: line below is clearly wrong
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+
+        // Line below is troublesome...
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
--- a/test_conformance/commonfns/test_fmax.c
+++ b/test_conformance/commonfns/test_fmax.c
@@ -1,240 +1,240 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmax_kernel_code = 
-    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax2_kernel_code = 
-    "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax4_kernel_code = 
-    "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax8_kernel_code = 
-    "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax16_kernel_code = 
-    "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-
-static const char *fmax3_kernel_code = 
-    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmax(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-	
-    for (i=0; i<n; i++)
-	{
-	    r = (inptrA[i] >= inptrB[i]) ? inptrA[i] : inptrB[i];
-	    if (r != outptr[i])
-		return -1;
-	}
-	
-    return 0;
-}
-
-int
-test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float     *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-    
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-    
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-    
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[0])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[1])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[2])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-	}
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float(-0x20000000, 0x20000000,d );
-	}
-    free_mtdata(d); d = NULL;
-    
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-	
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
-    if (err)
-	return -1;
-    
-    
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	    err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	    err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clSetKernelArgs failed\n");
-		    return -1;
-		}
-	}
-	
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueNDRangeKernel failed\n");
-		    return -1;
-		}
-		
-	    err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueReadBuffer failed\n");
-		    return -1;
-		}
-		
-	    if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
-		{
-		    log_error("FMAX float%d test failed\n", (g_arrVecSizes[i]));
-		    err = -1;
-		}
-	    else
-		{
-		    log_info("FMAX float%d test passed\n", (g_arrVecSizes[i]));
-		    err = 0;
-		}
-		
-	    if (err)
-		break;
-	}
-	
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    clReleaseKernel(kernel[i]);
-	    clReleaseProgram(program[i]);
-	}
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-	
-    return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmax_kernel_code =
+    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax2_kernel_code =
+    "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax4_kernel_code =
+    "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax8_kernel_code =
+    "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax16_kernel_code =
+    "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+
+static const char *fmax3_kernel_code =
+    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
+    "}\n";
+
+static int
+verify_fmax(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrA[i] >= inptrB[i]) ? inptrA[i] : inptrB[i];
+        if (r != outptr[i])
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float     *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000,d );
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
+    if (err)
+    return -1;
+
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        {
+            log_error("FMAX float%d test failed\n", (g_arrVecSizes[i]));
+            err = -1;
+        }
+        else
+        {
+            log_info("FMAX float%d test passed\n", (g_arrVecSizes[i]));
+            err = 0;
+        }
+
+        if (err)
+        break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
--- a/test_conformance/commonfns/test_fmaxf.c
+++ b/test_conformance/commonfns/test_fmaxf.c
@@ -1,248 +1,248 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmax_kernel_code = 
-    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax2_kernel_code = 
-    "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax4_kernel_code = 
-    "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax8_kernel_code = 
-    "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax16_kernel_code = 
-    "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax3_kernel_code = 
-    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-	
-    for (i=0; i<n; ) {
-		int ii = i/veclen;
-		for (j=0; j<veclen && i<n; ++j, ++i) {
-			r = (inptrA[i] >= inptrB[ii]) ? inptrA[i] : inptrB[ii];
-			if (r != outptr[i]) {
-				log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n",
-						 i,n,ii,j);
-				log_info("SHould be %f, is %f\n", r, outptr[i]);
-				log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]);
-				return -1;
-			}
-		}
-    }
-	
-    return 0;
-}
-
-int
-test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-  
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-	
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-	
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[0])
-		{
-			log_error("clCreateBuffer failed\n");
-			return -1;
-		}
-    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[1])
-		{
-			log_error("clCreateBuffer failed\n");
-			return -1;
-		}
-    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[2])
-		{
-			log_error("clCreateBuffer failed\n");
-			return -1;
-		}
-	
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-		{
-			p[i] = get_random_float(-0x20000000, 0x20000000, d);
-		}
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-		{
-			p[i] = get_random_float(-0x20000000, 0x20000000, d);
-		}
-    free_mtdata(d); d = NULL;
-    
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-								(void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-		{
-			log_error("clWriteArray failed\n");
-			return -1;
-		}
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-								(void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-		{
-			log_error("clWriteArray failed\n");
-			return -1;
-		}
-	
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
-    if (err)
-		return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
-    if (err)
-		return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
-    if (err)
-		return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
-    if (err)
-		return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
-    if (err)
-		return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
-    if (err)
-		return -1;
-    
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
-		{
-			err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-			err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-			err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-			if (err != CL_SUCCESS)
-				{
-					log_error("clSetKernelArgs failed\n");
-					return -1;
-				}
-		}
-	
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-		{
-			err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-			if (err != CL_SUCCESS)
-				{
-					log_error("clEnqueueNDRangeKernel failed\n");
-					return -1;
-				}
-		
-			err = clEnqueueReadBuffer(queue, streams[2], true, 0, sizeof(cl_float)*num_elements,
-									  output_ptr, 0, NULL, NULL);
-			if (err != CL_SUCCESS)
-				{
-					log_error("clEnqueueReadBuffer failed\n");
-					return -1;
-				}
-		
-			if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
-				{
-					log_error("FMAX float%d,float test failed\n", (g_arrVecSizes[i]));
-					err = -1;
-				}
-			else
-				{
-					log_info("FMAX float%d,float test passed\n", (g_arrVecSizes[i]));
-					err = 0;
-				}
-		
-			if (err)
-				break;
-		}
-	
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-		{
-			clReleaseKernel(kernel[i]);
-			clReleaseProgram(program[i]);
-		}
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-	
-    return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmax_kernel_code =
+    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax2_kernel_code =
+    "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax4_kernel_code =
+    "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax8_kernel_code =
+    "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax16_kernel_code =
+    "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmax3_kernel_code =
+    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
+    "}\n";
+
+static int
+verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
+{
+    float       r;
+    int         i, j;
+
+    for (i=0; i<n; ) {
+        int ii = i/veclen;
+        for (j=0; j<veclen && i<n; ++j, ++i) {
+            r = (inptrA[i] >= inptrB[ii]) ? inptrA[i] : inptrB[ii];
+            if (r != outptr[i]) {
+                log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n",
+                         i,n,ii,j);
+                log_info("SHould be %f, is %f\n", r, outptr[i]);
+                log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]);
+                return -1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+int
+test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float    *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+        {
+            log_error("clCreateBuffer failed\n");
+            return -1;
+        }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+        {
+            p[i] = get_random_float(-0x20000000, 0x20000000, d);
+        }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+        {
+            p[i] = get_random_float(-0x20000000, 0x20000000, d);
+        }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
+                                (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+        {
+            log_error("clWriteArray failed\n");
+            return -1;
+        }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
+                                (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+        {
+            log_error("clWriteArray failed\n");
+            return -1;
+        }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+        {
+            err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+            err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+            err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+            if (err != CL_SUCCESS)
+                {
+                    log_error("clSetKernelArgs failed\n");
+                    return -1;
+                }
+        }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i < kTotalVecCount; i++)
+        {
+            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+            if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueNDRangeKernel failed\n");
+                    return -1;
+                }
+
+            err = clEnqueueReadBuffer(queue, streams[2], true, 0, sizeof(cl_float)*num_elements,
+                                      output_ptr, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+                {
+                    log_error("clEnqueueReadBuffer failed\n");
+                    return -1;
+                }
+
+            if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
+                {
+                    log_error("FMAX float%d,float test failed\n", (g_arrVecSizes[i]));
+                    err = -1;
+                }
+            else
+                {
+                    log_info("FMAX float%d,float test passed\n", (g_arrVecSizes[i]));
+                    err = 0;
+                }
+
+            if (err)
+                break;
+        }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i < kTotalVecCount; i++)
+        {
+            clReleaseKernel(kernel[i]);
+            clReleaseProgram(program[i]);
+        }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
--- a/test_conformance/commonfns/test_fmin.c
+++ b/test_conformance/commonfns/test_fmin.c
@@ -1,244 +1,244 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmin_kernel_code = 
-    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin2_kernel_code = 
-    "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin4_kernel_code = 
-    "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin8_kernel_code = 
-    "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin16_kernel_code = 
-    "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-
-static const char *fmin3_kernel_code = 
-    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-    "}\n";
-
-int
-verify_fmin(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-	
-    for (i=0; i<n; i++)
-	{
-	    r = (inptrA[i] > inptrB[i]) ? inptrB[i] : inptrA[i];
-	    if (r != outptr[i])
-		return -1;
-	}
-	
-    return 0;
-}
-
-int
-test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-	
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));;
-	
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[0])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[1])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-	
-    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[2])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-	
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-	}
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-	}
-    free_mtdata(d); d = NULL;
-	
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-				(void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-				(void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-	
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
-    if (err)
-	return -1;
-	
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i<kTotalVecCount; i++)
-	{
-	    err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	    err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	    err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clSetKernelArgs failed\n");
-		    return -1;
-		}
-	}
-	
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-	{
-	    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueNDRangeKernel failed\n");
-		    return -1;
-		}
-		
-	    err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueReadBuffer failed\n");
-		    return -1;
-		}
-		
-	    if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
-		{
-		    log_error("FMIN float%d test failed\n", (g_arrVecSizes[i]));
-		    err = -1;
-		}
-	    else
-		{
-		    log_info("FMIN float%d test passed\n", (g_arrVecSizes[i]));
-		    err = 0;
-		}
-	}
-	
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-	{
-	    clReleaseKernel(kernel[i]);
-	    clReleaseProgram(program[i]);
-	}
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-	
-    return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmin_kernel_code =
+    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin2_kernel_code =
+    "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin4_kernel_code =
+    "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin8_kernel_code =
+    "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin16_kernel_code =
+    "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+
+static const char *fmin3_kernel_code =
+    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
+    "}\n";
+
+int
+verify_fmin(float *inptrA, float *inptrB, float *outptr, int n)
+{
+    float       r;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = (inptrA[i] > inptrB[i]) ? inptrB[i] : inptrA[i];
+        if (r != outptr[i])
+        return -1;
+    }
+
+    return 0;
+}
+
+int
+test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float    *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
+    if (err)
+    return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        {
+            log_error("FMIN float%d test failed\n", (g_arrVecSizes[i]));
+            err = -1;
+        }
+        else
+        {
+            log_info("FMIN float%d test passed\n", (g_arrVecSizes[i]));
+            err = 0;
+        }
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i<kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
--- a/test_conformance/commonfns/test_fminf.c
+++ b/test_conformance/commonfns/test_fminf.c
@@ -1,242 +1,242 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmin_kernel_code = 
-    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin2_kernel_code = 
-    "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin4_kernel_code = 
-    "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin8_kernel_code = 
-    "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin16_kernel_code = 
-    "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin3_kernel_code = 
-    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-	
-    for (i=0; i<n; ) {
-	int ii = i/veclen;
-	for (j=0; j<veclen && i<n; ++j, ++i) {
-	    r = (inptrA[i] > inptrB[ii]) ? inptrB[ii] : inptrA[i];
-	    if (r != outptr[i])
-		return -1;
-	}
-    }
-	
-    return 0;
-}
-
-int
-test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float     *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata      d;
-	
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-  
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-	
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[0])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[1])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[2])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-	
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-	}
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-	}
-    free_mtdata(d); d = NULL;
-    
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-				(void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-				(void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-	
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
-    if (err)
-	return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
-    if (err)
-	return -1;
-    
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	    err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	    err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clSetKernelArgs failed\n");
-		    return -1;
-		}
-	}
-	
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueNDRangeKernel failed\n");
-		    return -1;
-		}
-		
-	    err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueReadBuffer failed\n");
-		    return -1;
-		}
-		
-	    if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
-		{
-		    log_error("fmin float%d,float test failed\n", (g_arrVecSizes[i]));
-		    err = -1;
-		}
-	    else
-		{
-		    log_info("fmin float%d,float test passed\n", (g_arrVecSizes[i]));
-		    err = 0;
-		}
-		
-	    if (err)
-		break;
-	}
-	
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    clReleaseKernel(kernel[i]);
-	    clReleaseProgram(program[i]);
-	}
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-	
-    return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *fmin_kernel_code =
+    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin2_kernel_code =
+    "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin4_kernel_code =
+    "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin8_kernel_code =
+    "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin16_kernel_code =
+    "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
+    "}\n";
+
+static const char *fmin3_kernel_code =
+    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
+    "}\n";
+
+static int
+verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
+{
+    float       r;
+    int         i, j;
+
+    for (i=0; i<n; ) {
+    int ii = i/veclen;
+    for (j=0; j<veclen && i<n; ++j, ++i) {
+        r = (inptrA[i] > inptrB[ii]) ? inptrB[ii] : inptrA[i];
+        if (r != outptr[i])
+        return -1;
+    }
+    }
+
+    return 0;
+}
+
+int
+test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[3];
+    cl_float     *input_ptr[2], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void        *values[3];
+    size_t  threads[1];
+    int num_elements;
+    int err;
+    int i;
+    MTdata      d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    d = init_genrand( gRandomSeed );
+    p = input_ptr[0];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float(-0x20000000, 0x20000000, d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
+                (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
+    if (err)
+    return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
+    if (err)
+    return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    threads[0] = (size_t)n_elems;
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
+        {
+            log_error("fmin float%d,float test failed\n", (g_arrVecSizes[i]));
+            err = -1;
+        }
+        else
+        {
+            log_info("fmin float%d,float test passed\n", (g_arrVecSizes[i]));
+            err = 0;
+        }
+
+        if (err)
+        break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(output_ptr);
+
+    return err;
+}
+
+
--- a/test_conformance/commonfns/test_max.c
+++ b/test_conformance/commonfns/test_max.c
@@ -1,65 +1,65 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements * vecSize; i++ )
-	{
-		float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
-		if( v != out[ i ] ) 
-        {
-            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", 
-                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-			return -1;
-        }
-	}
-	return 0;
-}
-
-static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements * vecSize; i++ )
-	{
-		double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
-		if( v != out[ i ] ) 
-        {
-            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", 
-                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-			return -1;
-        }
-	}
-	return 0;
-}
-
-int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-	return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double );
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] )
+        {
+            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
+                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] )
+        {
+            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
+                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+        }
+    }
+    return 0;
+}
+
+int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double );
+}
+
+
--- a/test_conformance/commonfns/test_maxf.c
+++ b/test_conformance/commonfns/test_maxf.c
@@ -1,69 +1,69 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements; i++ )
-	{
-		for( int j = 0; j < vecSize; j++ )
-		{
-			float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
-			if( v != out[ i * vecSize + j ] ) 
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-				return -1;
-            }
-		}
-	}
-	return 0;
-}
-
-static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements; i++ )
-	{
-		for( int j = 0; j < vecSize; j++ )
-		{
-			double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
-			if(	v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-				return -1;
-            }
-		}
-	}
-	return 0;
-}
-
-int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-	return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double );
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if( v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if(    v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double );
+}
+
+
--- a/test_conformance/commonfns/test_min.c
+++ b/test_conformance/commonfns/test_min.c
@@ -1,61 +1,61 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements * vecSize; i++ )
-	{
-		float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
-		if( v != out[ i ] ) {
-      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-			return -1;
-    }
-	}
-	return 0;
-}
-
-static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements * vecSize; i++ )
-	{
-		double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
-		if( v != out[ i ] ) {
-      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-			return -1;
-    }
-	}
-	return 0;
-}
-
-int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-	return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double );
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] ) {
+      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+    }
+    }
+    return 0;
+}
+
+static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements * vecSize; i++ )
+    {
+        double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
+        if( v != out[ i ] ) {
+      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
+            return -1;
+    }
+    }
+    return 0;
+}
+
+int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double );
+}
+
+
--- a/test_conformance/commonfns/test_minf.c
+++ b/test_conformance/commonfns/test_minf.c
@@ -1,75 +1,75 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-#include "../../test_common/harness/errorHelpers.h"
-
-static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-	for( int i = 0; i < numElements; i++ )
-	{
-		for( int j = 0; j < vecSize; j++ )
-		{
-			float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
-			if( v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-				return -1;
-            }
-		}
-	}
-	return 0;
-}
-
-static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    int maxFail = 1;
-    int numFails = 0;
-	for( int i = 0; i < numElements; i++ )
-	{
-		for( int j = 0; j < vecSize; j++ )
-		{
-			double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
-			if(	v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-			    ++numFails;
-			    if(numFails >= maxFail) {
-				return -1;
-            }
-		}
-	}
-	}
-	return 0;
-}
-
-int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-	return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double );
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include "../../test_common/harness/errorHelpers.h"
+
+static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
+{
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if( v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                return -1;
+            }
+        }
+    }
+    return 0;
+}
+
+static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
+{
+    int maxFail = 1;
+    int numFails = 0;
+    for( int i = 0; i < numElements; i++ )
+    {
+        for( int j = 0; j < vecSize; j++ )
+        {
+            double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
+            if(    v != out[ i * vecSize + j ] )
+            {
+                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
+                ++numFails;
+                if(numFails >= maxFail) {
+                return -1;
+            }
+        }
+    }
+    }
+    return 0;
+}
+
+int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double );
+}
+
+
--- a/test_conformance/commonfns/test_mix.c
+++ b/test_conformance/commonfns/test_mix.c
@@ -1,200 +1,200 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-const char *mix_kernel_code = 
-"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n"
-"}\n";
-
-#define MAX_ERR 1e-3
-
-float
-verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n)
-{
-    float       r, delta, max_err = 0.0f;
-    int         i;
-    
-    for (i=0; i<n; i++)
-    {
-		r = inptrA[i] + ((inptrB[i] - inptrA[i]) * inptrC[i]);
-		delta = fabsf(r - outptr[i]) / r;
-        if(delta > max_err) max_err = delta;
-    }
-	return max_err;
-}
-
-int
-test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-	cl_mem			streams[4];
-	cl_float		*input_ptr[3], *output_ptr, *p;
-	cl_program		program;
-	cl_kernel		kernel;
-	void			*values[4];
-	size_t			lengths[1];
-	size_t	threads[1];
-	float			max_err;
-	int				err;
-	int				i;
-    MTdata          d;
-	
-	input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-	streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	if (!streams[0])
-	{
-		log_error("clCreateBuffer failed\n");
-		return -1;
-	}
-	streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	if (!streams[1])
-	{
-		log_error("clCreateBuffer failed\n");
-		return -1;
-	}
-	streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	if (!streams[2])
-	{
-		log_error("clCreateBuffer failed\n");
-		return -1;
-	}
-	
-	streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-	if (!streams[3])
-	{
-		log_error("clCreateBuffer failed\n");
-		return -1;
-	}
-	
-	p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-	for (i=0; i<num_elements; i++)
-	{
-		p[i] =  (float) genrand_real1(d);
-	}
-	p = input_ptr[1];
-	for (i=0; i<num_elements; i++)
-	{
-		p[i] = (float) genrand_real1(d);
-	}
-	p = input_ptr[2];
-	for (i=0; i<num_elements; i++)
-	{
-		p[i] = (float) genrand_real1(d);
-	}
-    free_mtdata(d); d = NULL;
-		
-	err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-	{
-		log_error("clWriteArray failed\n");
-		return -1;
-	}
-	err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-	{
-		log_error("clWriteArray failed\n");
-		return -1;
-	}
-	err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-	{
-		log_error("clWriteArray failed\n");
-		return -1;
-	}
-	
-	lengths[0] = strlen(mix_kernel_code);
-	err = create_single_kernel_helper( context, &program, &kernel, 1, &mix_kernel_code, "test_mix" );
-	test_error( err, "Unable to create test kernel" );
-
-	
-	values[0] = streams[0];
-	values[1] = streams[1];
-	values[2] = streams[2];
-	values[3] = streams[3];
-  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2] );
-  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3] );
-	if (err != CL_SUCCESS)
-	{
-		log_error("clSetKernelArgs failed\n");
-		return -1;
-	}
-	
-	threads[0] = (size_t)num_elements;
-	err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-	{
-		log_error("clEnqueueNDRangeKernel failed\n");
-		return -1;
-	}
-	
-	err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-	if (err != CL_SUCCESS)
-	{
-		log_error("clEnqueueReadBuffer failed\n");
-		return -1;
-	}
-    
-    max_err = verify_mix(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-	if (max_err > MAX_ERR)
-	{
-		log_error("MIX test failed %g max err\n", max_err);
-		err = -1;
-	}
-	else
-	{
-		log_info("MIX test passed %g max err\n", max_err);
-		err = 0;
-	}
-	
-	clReleaseMemObject(streams[0]);
-	clReleaseMemObject(streams[1]);
-	clReleaseMemObject(streams[2]);
-	clReleaseMemObject(streams[3]);
-	clReleaseKernel(kernel);
-	clReleaseProgram(program);
-	free(input_ptr[0]);
-	free(input_ptr[1]);
-	free(input_ptr[2]);
-	free(output_ptr);
-	
-	return err;
-}
-
-
-	
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+const char *mix_kernel_code =
+"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n"
+"}\n";
+
+#define MAX_ERR 1e-3
+
+float
+verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n)
+{
+    float       r, delta, max_err = 0.0f;
+    int         i;
+
+    for (i=0; i<n; i++)
+    {
+        r = inptrA[i] + ((inptrB[i] - inptrA[i]) * inptrC[i]);
+        delta = fabsf(r - outptr[i]) / r;
+        if(delta > max_err) max_err = delta;
+    }
+    return max_err;
+}
+
+int
+test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+{
+    cl_mem            streams[4];
+    cl_float        *input_ptr[3], *output_ptr, *p;
+    cl_program        program;
+    cl_kernel        kernel;
+    void            *values[4];
+    size_t            lengths[1];
+    size_t    threads[1];
+    float            max_err;
+    int                err;
+    int                i;
+    MTdata          d;
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[2])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[3])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] =  (float) genrand_real1(d);
+    }
+    p = input_ptr[1];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = (float) genrand_real1(d);
+    }
+    p = input_ptr[2];
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = (float) genrand_real1(d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+    err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    lengths[0] = strlen(mix_kernel_code);
+    err = create_single_kernel_helper( context, &program, &kernel, 1, &mix_kernel_code, "test_mix" );
+    test_error( err, "Unable to create test kernel" );
+
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    values[2] = streams[2];
+    values[3] = streams[3];
+  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
+  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
+  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2] );
+  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3] );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clSetKernelArgs failed\n");
+        return -1;
+    }
+
+    threads[0] = (size_t)num_elements;
+    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueNDRangeKernel failed\n");
+        return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueReadBuffer failed\n");
+        return -1;
+    }
+
+    max_err = verify_mix(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
+    if (max_err > MAX_ERR)
+    {
+        log_error("MIX test failed %g max err\n", max_err);
+        err = -1;
+    }
+    else
+    {
+        log_info("MIX test passed %g max err\n", max_err);
+        err = 0;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseMemObject(streams[2]);
+    clReleaseMemObject(streams[3]);
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    free(input_ptr[0]);
+    free(input_ptr[1]);
+    free(input_ptr[2]);
+    free(output_ptr);
+
+    return err;
+}
+
+
+
+
+
--- a/test_conformance/commonfns/test_radians.c
+++ b/test_conformance/commonfns/test_radians.c
@@ -1,475 +1,475 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *radians_kernel_code = 
-"__kernel void test_radians(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians2_kernel_code = 
-"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians4_kernel_code = 
-"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians8_kernel_code = 
-"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians16_kernel_code = 
-"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians3_kernel_code = 
-"__kernel void test_radians3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static float
-verify_radians(float *inptr, float *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-    
-    for (i=0,j=0; i<n; i++,j++)
-	{
-	    r = (M_PI / 180.0) * inptr[i];
-	    error = Ulp_Error( outptr[i], r );
-	    if( fabsf(error) > max_error)
-		{
-		    max_error = error;
-		    max_index = i;
-		    max_val = r;
-		    if( fabsf(error) > MAX_ERR)
-			{
-			    log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-			    return 1;
-			}
-		}
-	}
-    
-    log_info( "radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-    
-    return 0;
-}
-
-
-int
-test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_float     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void         *values[2];
-    size_t       threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-    
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-    
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-    
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[0])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    
-    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-    if (!streams[1])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-	{
-	    p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
-	}
-    free_mtdata(d); d = NULL;
-    
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-    
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
-    if (err)
-        return -1;
-    
-    values[0] = streams[0];
-    values[1] = streams[1];
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	    err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clSetKernelArgs failed\n");
-		    return -1;
-		}
-	}
-    
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-	    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueNDRangeKernel failed\n");
-		    return -1;
-		}
-        
-	    cl_uint dead = 0xdeaddead;
-	    memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
-	    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueReadBuffer failed\n");
-		    return -1;
-		}
-        
-	    if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
-		{
-		    log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
-		    err = -1;
-		}
-	    else
-		{
-		    log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
-		}
-        
-	    if (err)
-            break;
-	}
-    
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ) )
-    {
-        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
-        return 0;
-    }
-
-    return test_radians_double( device,  context,  queue,  n_elems);
-}
-
-
-
-#pragma mark -
-
-const char *radians_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians2_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians4_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians8_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians16_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians3_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static double
-verify_radians_double(double *inptr, double *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-    
-    for (i=0,j=0; i<n; i++,j++)
-	{
-	    r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
-	    error = Ulp_Error_Double( outptr[i], r );
-	    if( fabsf(error) > max_error)
-		{
-		    max_error = error;
-		    max_index = i;
-		    max_val = r;
-		    if( fabsf(error) > MAX_ERR)
-			{
-			    log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-			    return 1;
-			}
-		}
-	}
-    
-    log_info( "radiansd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-    
-    return 0;
-}
-
-
-int
-test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_double     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void         *values[2];
-    size_t       threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-    
-    
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-    
-    //TODO: line below is clearly wrong
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-    
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-    if (!streams[0])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    
-    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-    if (!streams[1])
-	{
-	    log_error("clCreateBuffer failed\n");
-	    return -1;
-	}
-    
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-	    p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
-    
-    free_mtdata(d); d = NULL;
-    
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-	{
-	    log_error("clWriteArray failed\n");
-	    return -1;
-	}
-    
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
-    if (err)
-        return -1;
-    
-    values[0] = streams[0];
-    values[1] = streams[1];
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	    err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clSetKernelArgs failed\n");
-		    return -1;
-		}
-	}
-    
-    for (i=0; i < kTotalVecCount; i++)
-	{
-	    threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-	    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueNDRangeKernel failed\n");
-		    return -1;
-		}
-        
-	    cl_uint dead = 0xdeaddead;
-	    memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
-	    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-	    if (err != CL_SUCCESS)
-		{
-		    log_error("clEnqueueReadBuffer failed\n");
-		    return -1;
-		}
-        
-	    if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-		{
-		    log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
-		    err = -1;
-		}
-	    else
-		{
-		    log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
-		}
-        
-	    if (err)
-            break;
-	}
-    
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-    
-    return err;
-}
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288
+#endif
+
+static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *radians_kernel_code =
+"__kernel void test_radians(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians2_kernel_code =
+"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians4_kernel_code =
+"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians8_kernel_code =
+"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians16_kernel_code =
+"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians3_kernel_code =
+"__kernel void test_radians3(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static float
+verify_radians(float *inptr, float *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (M_PI / 180.0) * inptr[i];
+        error = Ulp_Error( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+
+int
+test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_float     *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void         *values[2];
+    size_t       threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+    {
+        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
+    }
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+    if( err )
+        return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64" ) )
+    {
+        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
+        return 0;
+    }
+
+    return test_radians_double( device,  context,  queue,  n_elems);
+}
+
+
+
+#pragma mark -
+
+const char *radians_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = radians(src[tid]);\n"
+"}\n";
+
+const char *radians3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
+"}\n";
+
+
+#define MAX_ERR  2.0f
+
+static double
+verify_radians_double(double *inptr, double *outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double   r, max_val = NAN;
+    int     i, j, max_index = 0;
+
+    for (i=0,j=0; i<n; i++,j++)
+    {
+        r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
+        error = Ulp_Error_Double( outptr[i], r );
+        if( fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if( fabsf(error) > MAX_ERR)
+            {
+                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
+                return 1;
+            }
+        }
+    }
+
+    log_info( "radiansd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
+
+    return 0;
+}
+
+
+int
+test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+    cl_mem       streams[2];
+    cl_double     *input_ptr[1], *output_ptr, *p;
+    cl_program   *program;
+    cl_kernel    *kernel;
+    void         *values[2];
+    size_t       threads[1];
+    int          num_elements;
+    int          err;
+    int          i;
+    MTdata       d;
+
+
+    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
+    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
+
+    //TODO: line below is clearly wrong
+    num_elements = n_elems * (1 << (kTotalVecCount-1));
+
+    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[0])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+    if (!streams[1])
+    {
+        log_error("clCreateBuffer failed\n");
+        return -1;
+    }
+
+    p = input_ptr[0];
+    d = init_genrand( gRandomSeed );
+    for (i=0; i<num_elements; i++)
+        p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
+
+    free_mtdata(d); d = NULL;
+
+    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+        log_error("clWriteArray failed\n");
+        return -1;
+    }
+
+    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
+    if (err)
+        return -1;
+    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
+    if (err)
+        return -1;
+
+    values[0] = streams[0];
+    values[1] = streams[1];
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+    }
+
+    for (i=0; i < kTotalVecCount; i++)
+    {
+        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 0xdeaddead;
+        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
+        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
+        {
+            log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
+        }
+
+        if (err)
+            break;
+    }
+
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    for (i=0; i < kTotalVecCount; i++) {
+        clReleaseKernel(kernel[i]);
+        clReleaseProgram(program[i]);
+    }
+    free(program);
+    free(kernel);
+    free(input_ptr[0]);
+    free(output_ptr);
+
+    return err;
+}
+
--- a/test_conformance/commonfns/test_sign.c
+++ b/test_conformance/commonfns/test_sign.c
@@ -1,445 +1,445 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int
-test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *sign_kernel_code = 
-"__kernel void test_sign(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign2_kernel_code = 
-"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign4_kernel_code = 
-"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign8_kernel_code = 
-"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign16_kernel_code = 
-"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign3_kernel_code = 
-"__kernel void test_sign3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
-"}\n";
-
-
-
-static int
-verify_sign(float *inptr, float *outptr, int n)
-{
-  float       r;
-  int         i;
-  
-  for (i=0; i<n; i++)
-  {
-    if (inptr[i] > 0.0f)
-      r = 1.0f;
-    else if (inptr[i] < 0.0f)
-      r = -1.0f;
-    else
-      r = 0.0f;
-    if (r != outptr[i])
-      return -1;
-  }
-  
-  return 0;
-}
-
-static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" };
-
-int
-test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[2];
-  cl_float    *input_ptr[1], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  void        *values[2];
-  size_t  threads[1];
-  int num_elements;
-  int err;
-  int i;
-  MTdata    d;
-  
-  num_elements = n_elems * 16;
-  
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-  }
-  free_mtdata(d);   d = NULL;
-  
-    
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code, "test_sign" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code, "test_sign2" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code, "test_sign4" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code, "test_sign8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code, "test_sign16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code, "test_sign3" );
-  if (err)
-    return -1;
-  
-  values[0] = streams[0];
-  values[1] = streams[1];
-  for (i=0; i<kTotalVecCount; i++)
-  {
-	  err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	  err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	  if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-  
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++) // change this so we test all
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-  
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-    
-    if (verify_sign(input_ptr[0], output_ptr, n_elems*(i+1)))
-    {
-      log_error("%s test failed\n", fn_names[i]);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed\n", fn_names[i]);
-      err = 0;
-    }
-    
-    if (err)
-      break;
-  }
-  
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(output_ptr);
-  
-  if(err)
-    return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64"))
-    {
-        log_info( "skipping double test -- cl_khr_fp64 not supported.\n" );
-        return 0;
-    }
-    
-    return test_sign_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-const char *sign_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign2_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign4_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign8_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign16_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign3_kernel_code_double = 
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
-"}\n";
-
-
-static int
-verify_sign_double(double *inptr, double *outptr, int n)
-{
-  double       r;
-  int         i;
-  
-  for (i=0; i<n; i++)
-  {
-    if (inptr[i] > 0.0)
-      r = 1.0;
-    else if (inptr[i] < 0.0)
-      r = -1.0;
-    else
-      r = 0.0f;
-    if (r != outptr[i])
-      return -1;
-  }
-  
-  return 0;
-}
-
-static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" };
-
-int
-test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[2];
-  cl_double    *input_ptr[1], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  void        *values[2];
-  size_t  threads[1];
-  int num_elements;
-  int err;
-  int i;
-  MTdata    d;
-  
-  num_elements = n_elems * 16;
-  
-  input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-    p[i] = get_random_double(-0x20000000, 0x20000000, d);
-
-  free_mtdata(d);   d = NULL;
-  
-    
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code_double, "test_sign_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code_double, "test_sign2_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code_double, "test_sign4_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code_double, "test_sign8_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code_double, "test_sign16_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code_double, "test_sign3_double" );
-  if (err)
-    return -1;
-  
-  values[0] = streams[0];
-  values[1] = streams[1];
-  for (i=0; i<kTotalVecCount; i++)
-  {
-	  err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	  err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	  if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-  
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++) // this hsould be changed
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-  
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-    
-    if (verify_sign_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-    {
-      log_error("%s test failed\n", fn_names_double[i]);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed\n", fn_names_double[i]);
-      err = 0;
-    }
-    
-    if (err)
-      break;
-  }
-  
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(output_ptr);
-  
-  return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static int
+test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
+
+
+const char *sign_kernel_code =
+"__kernel void test_sign(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign2_kernel_code =
+"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign4_kernel_code =
+"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign8_kernel_code =
+"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign16_kernel_code =
+"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign3_kernel_code =
+"__kernel void test_sign3(__global float *src, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
+"}\n";
+
+
+
+static int
+verify_sign(float *inptr, float *outptr, int n)
+{
+  float       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    if (inptr[i] > 0.0f)
+      r = 1.0f;
+    else if (inptr[i] < 0.0f)
+      r = -1.0f;
+    else
+      r = 0.0f;
+    if (r != outptr[i])
+      return -1;
+  }
+
+  return 0;
+}
+
+static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" };
+
+int
+test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[2];
+  cl_float    *input_ptr[1], *output_ptr, *p;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+  void        *values[2];
+  size_t  threads[1];
+  int num_elements;
+  int err;
+  int i;
+  MTdata    d;
+
+  num_elements = n_elems * 16;
+
+  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x20000000, 0x20000000, d);
+  }
+  free_mtdata(d);   d = NULL;
+
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code, "test_sign" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code, "test_sign2" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code, "test_sign4" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code, "test_sign8" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code, "test_sign16" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code, "test_sign3" );
+  if (err)
+    return -1;
+
+  values[0] = streams[0];
+  values[1] = streams[1];
+  for (i=0; i<kTotalVecCount; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++) // change this so we test all
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    if (verify_sign(input_ptr[0], output_ptr, n_elems*(i+1)))
+    {
+      log_error("%s test failed\n", fn_names[i]);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed\n", fn_names[i]);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(output_ptr);
+
+  if(err)
+    return err;
+
+    if( ! is_extension_available( device, "cl_khr_fp64"))
+    {
+        log_info( "skipping double test -- cl_khr_fp64 not supported.\n" );
+        return 0;
+    }
+
+    return test_sign_double( device, context, queue, n_elems);
+}
+
+#pragma mark -
+
+const char *sign_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign2_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign2_double(__global double2 *src, __global double2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign4_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign4_double(__global double4 *src, __global double4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign8_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign8_double(__global double8 *src, __global double8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign16_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign16_double(__global double16 *src, __global double16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = sign(src[tid]);\n"
+"}\n";
+
+const char *sign3_kernel_code_double =
+"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+"__kernel void test_sign3_double(__global double *src, __global double *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
+"}\n";
+
+
+static int
+verify_sign_double(double *inptr, double *outptr, int n)
+{
+  double       r;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    if (inptr[i] > 0.0)
+      r = 1.0;
+    else if (inptr[i] < 0.0)
+      r = -1.0;
+    else
+      r = 0.0f;
+    if (r != outptr[i])
+      return -1;
+  }
+
+  return 0;
+}
+
+static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" };
+
+int
+test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[2];
+  cl_double    *input_ptr[1], *output_ptr, *p;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+  void        *values[2];
+  size_t  threads[1];
+  int num_elements;
+  int err;
+  int i;
+  MTdata    d;
+
+  num_elements = n_elems * 16;
+
+  input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+  output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+    p[i] = get_random_double(-0x20000000, 0x20000000, d);
+
+  free_mtdata(d);   d = NULL;
+
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code_double, "test_sign_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code_double, "test_sign2_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code_double, "test_sign4_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code_double, "test_sign8_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code_double, "test_sign16_double" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code_double, "test_sign3_double" );
+  if (err)
+    return -1;
+
+  values[0] = streams[0];
+  values[1] = streams[1];
+  for (i=0; i<kTotalVecCount; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++) // this hsould be changed
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    if (verify_sign_double(input_ptr[0], output_ptr, n_elems*(i+1)))
+    {
+      log_error("%s test failed\n", fn_names_double[i]);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed\n", fn_names_double[i]);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(output_ptr);
+
+  return err;
+}
+
+
--- a/test_conformance/commonfns/test_smoothstep.c
+++ b/test_conformance/commonfns/test_smoothstep.c
@@ -1,283 +1,283 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *smoothstep_kernel_code = 
-"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep2_kernel_code = 
-"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep4_kernel_code = 
-"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep8_kernel_code = 
-"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep16_kernel_code = 
-"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep3_kernel_code = 
-"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n"
-"}\n";
-
-#define MAX_ERR (1e-5f)
-
-static float
-verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n)
-{
-  float       r, t, delta, max_err = 0.0f;
-  int         i;
-  
-  for (i=0; i<n; i++)
-  {
-    t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
-    if (t < 0.0f)
-      t = 0.0f;
-    else if (t > 1.0f)
-      t = 1.0f;
-    r = t * t * (3.0f - 2.0f * t);
-    delta = (float)fabs(r - outptr[i]);
-    if (delta > max_err)
-      max_err = delta;
-  }
-  
-  return max_err;
-}
-
-const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" };
-
-int
-test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem	  streams[4];
-  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  size_t  threads[1];
-  float max_err;
-  int num_elements;
-  int err;
-  int i;
-  MTdata d;
-  
-  num_elements = n_elems * 16;
-  
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[2])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[3])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  p = input_ptr[0];
-  d = init_genrand( gRandomSeed );
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00400000, 0x00400000, d);
-  }
-  
-  p = input_ptr[1];
-  p_edge0 = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    float edge0 = p_edge0[i];
-    float edge1;
-    do {
-      edge1 = get_random_float(-0x00400000, 0x00400000, d);
-      if (edge0 < edge1)
-        break;
-    } while (1);
-    p[i] = edge1;
-  }
-  
-  p = input_ptr[2];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00400000, 0x00400000, d);
-  }
-  free_mtdata(d);
-  d = NULL;
-  
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &smoothstep8_kernel_code, "test_smoothstep8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &smoothstep16_kernel_code, "test_smoothstep16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &smoothstep3_kernel_code, "test_smoothstep3" );
-  if (err)
-    return -1;
-  
-  for (i=0; i<kTotalVecCount; i++)
-  {
-	  err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	  err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	  err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-	  err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
-	  if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-  
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-    
-
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-    
-    max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * g_arrVecSizes[i]);
-    
-    if (max_err > MAX_ERR)
-    {
-      log_error("%s test failed %g max err\n", fn_names[i], max_err);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed %g max err\n", fn_names[i], max_err);
-      err = 0;
-    }
-    
-    if (err)
-      break;
-  }
-  
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  clReleaseMemObject(streams[2]);
-  clReleaseMemObject(streams[3]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(input_ptr[1]);
-  free(input_ptr[2]);
-  free(output_ptr);
-  
-  return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *smoothstep_kernel_code =
+"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep2_kernel_code =
+"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep4_kernel_code =
+"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep8_kernel_code =
+"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep16_kernel_code =
+"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep3_kernel_code =
+"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n"
+"}\n";
+
+#define MAX_ERR (1e-5f)
+
+static float
+verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n)
+{
+  float       r, t, delta, max_err = 0.0f;
+  int         i;
+
+  for (i=0; i<n; i++)
+  {
+    t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
+    if (t < 0.0f)
+      t = 0.0f;
+    else if (t > 1.0f)
+      t = 1.0f;
+    r = t * t * (3.0f - 2.0f * t);
+    delta = (float)fabs(r - outptr[i]);
+    if (delta > max_err)
+      max_err = delta;
+  }
+
+  return max_err;
+}
+
+const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" };
+
+int
+test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[4];
+  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
+  cl_program  program[kTotalVecCount];
+  cl_kernel   kernel[kTotalVecCount];
+  size_t  threads[1];
+  float max_err;
+  int num_elements;
+  int err;
+  int i;
+  MTdata d;
+
+  num_elements = n_elems * 16;
+
+  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[2])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[3])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  p = input_ptr[0];
+  d = init_genrand( gRandomSeed );
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00400000, 0x00400000, d);
+  }
+
+  p = input_ptr[1];
+  p_edge0 = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    float edge0 = p_edge0[i];
+    float edge1;
+    do {
+      edge1 = get_random_float(-0x00400000, 0x00400000, d);
+      if (edge0 < edge1)
+        break;
+    } while (1);
+    p[i] = edge1;
+  }
+
+  p = input_ptr[2];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00400000, 0x00400000, d);
+  }
+  free_mtdata(d);
+  d = NULL;
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &smoothstep8_kernel_code, "test_smoothstep8" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &smoothstep16_kernel_code, "test_smoothstep16" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &smoothstep3_kernel_code, "test_smoothstep3" );
+  if (err)
+    return -1;
+
+  for (i=0; i<kTotalVecCount; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+
+    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * g_arrVecSizes[i]);
+
+    if (max_err > MAX_ERR)
+    {
+      log_error("%s test failed %g max err\n", fn_names[i], max_err);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed %g max err\n", fn_names[i], max_err);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+  for (i=0; i<kTotalVecCount; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(input_ptr[1]);
+  free(input_ptr[2]);
+  free(output_ptr);
+
+  return err;
+}
+
+
--- a/test_conformance/commonfns/test_smoothstepf.c
+++ b/test_conformance/commonfns/test_smoothstepf.c
@@ -1,260 +1,260 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <stdbool.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *smoothstep_kernel_code = 
-"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep2_kernel_code = 
-"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep4_kernel_code = 
-"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-#define MAX_ERR (1e-5f)
-
-extern "C" float
-verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n, int veclen)
-{
-  float       r, t, delta, max_err = 0.0f;
-  int         i, j;
-  
-  for (i = 0; i < n; ++i) {
-    int vi = i * veclen;
-    for (j = 0; j < veclen; ++j, ++vi) {
-      t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
-      if (t < 0.0f)
-        t = 0.0f;
-      else if (t > 1.0f)
-        t = 1.0f;
-      r = t * t * (3.0f - 2.0f * t);
-      delta = (float)fabs(r - outptr[vi]);
-      if (delta > max_err)
-        max_err = delta;
-    }
-  }
-  return max_err;
-}
-
-const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
-
-int
-test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem	  streams[4];
-  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
-  cl_program  program[3];
-  cl_kernel   kernel[3];
-  size_t  threads[1];
-  float max_err = 0.0f;
-  int num_elements;
-  int err;
-  int i;
-  MTdata d;
-  
-  num_elements = n_elems * 4;
-  
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[2])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
-  if (!streams[3])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00200000, 0x00200000, d);
-  }
-  
-  p = input_ptr[1];
-  p_edge0 = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    float edge0 = p_edge0[i];
-    float edge1;
-    do {
-      edge1 = get_random_float( -0x00200000, 0x00200000, d);
-      if (edge0 < edge1)
-        break;
-    } while (1);
-    p[i] = edge1;
-  }
-  
-  p = input_ptr[2];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00200000, 0x00200000, d);
-  }
-  free_mtdata(d);
-  d = NULL;
-  
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
-  if (err)
-    return -1;
-  
-  for (i=0; i<3; i++)
-  {
-	  err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-	  err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-	  err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-	  err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
-	  if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-  
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<3; i++)
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-    
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-    
-    switch (i)
-    {
-      case 0:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
-        break;
-      case 1:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
-        break;
-      case 2:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
-        break;
-    }
-    
-    if (max_err > MAX_ERR)
-    {
-      log_error("%s test failed %g max err\n", fn_names[i], max_err);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed %g max err\n", fn_names[i], max_err);
-      err = 0;
-    }
-    
-    if (err)
-      break;
-  }
-  
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  clReleaseMemObject(streams[2]);
-  clReleaseMemObject(streams[3]);
-  for (i=0; i<3; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(input_ptr[1]);
-  free(input_ptr[2]);
-  free(output_ptr);
-  
-  return err;
-}
-
-
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+
+static const char *smoothstep_kernel_code =
+"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep2_kernel_code =
+"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+static const char *smoothstep4_kernel_code =
+"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
+"}\n";
+
+#define MAX_ERR (1e-5f)
+
+extern "C" float
+verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n, int veclen)
+{
+  float       r, t, delta, max_err = 0.0f;
+  int         i, j;
+
+  for (i = 0; i < n; ++i) {
+    int vi = i * veclen;
+    for (j = 0; j < veclen; ++j, ++vi) {
+      t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
+      if (t < 0.0f)
+        t = 0.0f;
+      else if (t > 1.0f)
+        t = 1.0f;
+      r = t * t * (3.0f - 2.0f * t);
+      delta = (float)fabs(r - outptr[vi]);
+      if (delta > max_err)
+        max_err = delta;
+    }
+  }
+  return max_err;
+}
+
+const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
+
+int
+test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+{
+  cl_mem      streams[4];
+  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
+  cl_program  program[3];
+  cl_kernel   kernel[3];
+  size_t  threads[1];
+  float max_err = 0.0f;
+  int num_elements;
+  int err;
+  int i;
+  MTdata d;
+
+  num_elements = n_elems * 4;
+
+  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
+  streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[0])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[1])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+  streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[2])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
+  if (!streams[3])
+  {
+    log_error("clCreateBuffer failed\n");
+    return -1;
+  }
+
+  d = init_genrand( gRandomSeed );
+  p = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00200000, 0x00200000, d);
+  }
+
+  p = input_ptr[1];
+  p_edge0 = input_ptr[0];
+  for (i=0; i<num_elements; i++)
+  {
+    float edge0 = p_edge0[i];
+    float edge1;
+    do {
+      edge1 = get_random_float( -0x00200000, 0x00200000, d);
+      if (edge0 < edge1)
+        break;
+    } while (1);
+    p[i] = edge1;
+  }
+
+  p = input_ptr[2];
+  for (i=0; i<num_elements; i++)
+  {
+    p[i] = get_random_float(-0x00200000, 0x00200000, d);
+  }
+  free_mtdata(d);
+  d = NULL;
+
+  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
+  if (err != CL_SUCCESS)
+  {
+    log_error("clWriteArray failed\n");
+    return -1;
+  }
+
+  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
+  if (err)
+    return -1;
+  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
+  if (err)
+    return -1;
+
+  for (i=0; i<3; i++)
+  {
+      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
+      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
+      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
+      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
+      if (err != CL_SUCCESS)
+    {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+    }
+  }
+
+  threads[0] = (size_t)n_elems;
+  for (i=0; i<3; i++)
+  {
+    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueNDRangeKernel failed\n");
+      return -1;
+    }
+
+    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+    if (err != CL_SUCCESS)
+    {
+      log_error("clEnqueueReadBuffer failed\n");
+      return -1;
+    }
+
+    switch (i)
+    {
+      case 0:
+        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
+        break;
+      case 1:
+        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
+        break;
+      case 2:
+        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
+        break;
+    }
+
+    if (max_err > MAX_ERR)
+    {
+      log_error("%s test failed %g max err\n", fn_names[i], max_err);
+      err = -1;
+    }
+    else
+    {
+      log_info("%s test passed %g max err\n", fn_names[i], max_err);
+      err = 0;
+    }
+
+    if (err)
+      break;
+  }
+
+  clReleaseMemObject(streams[0]);
+  clReleaseMemObject(streams[1]);
+  clReleaseMemObject(streams[2]);
+  clReleaseMemObject(streams[3]);
+  for (i=0; i<3; i++)
+  {
+    clReleaseKernel(kernel[i]);
+    clReleaseProgram(program[i]);
+  }
+  free(input_ptr[0]);
+  free(input_ptr[1]);
+  free(input_ptr[2]);
+  free(output_ptr);
+
+  return err;
+}
+
+
--- a/test_conformance/commonfns/test_step.c
+++ b/test_conformance/commonfns/test_step.c
--- a/test_conformance/commonfns/test_stepf.c
+++ b/test_conformance/commonfns/test_stepf.c