diff --git a/test_conformance/commonfns/CMakeLists.txt b/test_conformance/commonfns/CMakeLists.txt index 5aa29250..bea20cf5 100644 --- a/test_conformance/commonfns/CMakeLists.txt +++ b/test_conformance/commonfns/CMakeLists.txt @@ -3,22 +3,10 @@ set(MODULE_NAME COMMONFNS) set(${MODULE_NAME}_SOURCES main.cpp test_clamp.cpp - test_degrees.cpp - test_max.cpp - test_maxf.cpp - test_min.cpp - test_minf.cpp + test_unary_fn.cpp test_mix.cpp - test_radians.cpp test_step.cpp - test_stepf.cpp test_smoothstep.cpp - test_smoothstepf.cpp - test_sign.cpp - test_fmax.cpp - test_fmin.cpp - test_fmaxf.cpp - test_fminf.cpp test_binary_fn.cpp ) diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index b8364d5a..3e4b0b8e 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -13,11 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include "procs.h" +#include "test_base.h" + +std::map BaseFunctionTest::type2name; int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount]; int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3}; @@ -32,25 +34,13 @@ static void initVecSizes() { } } - test_definition test_list[] = { - ADD_TEST( clamp ), - ADD_TEST( degrees ), - ADD_TEST( fmax ), - ADD_TEST( fmaxf ), - ADD_TEST( fmin ), - ADD_TEST( fminf ), - ADD_TEST( max ), - ADD_TEST( maxf ), - ADD_TEST( min ), - ADD_TEST( minf ), - ADD_TEST( mix ), - ADD_TEST( radians ), - ADD_TEST( step ), - ADD_TEST( stepf ), - ADD_TEST( smoothstep ), - ADD_TEST( smoothstepf ), - ADD_TEST( sign ), + ADD_TEST(clamp), ADD_TEST(degrees), ADD_TEST(fmax), + ADD_TEST(fmaxf), ADD_TEST(fmin), ADD_TEST(fminf), + ADD_TEST(max), ADD_TEST(maxf), ADD_TEST(min), + ADD_TEST(minf), ADD_TEST(mix), ADD_TEST(mixf), + ADD_TEST(radians), ADD_TEST(step), ADD_TEST(stepf), + ADD_TEST(smoothstep), ADD_TEST(smoothstepf), ADD_TEST(sign), }; const int test_num = ARRAY_SIZE( test_list ); @@ -58,6 +48,14 @@ const int test_num = ARRAY_SIZE( test_list ); int main(int argc, const char *argv[]) { initVecSizes(); + + if (BaseFunctionTest::type2name.empty()) + { + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; + } + return runTestHarness(argc, argv, test_num, test_list, false, 0); } diff --git a/test_conformance/commonfns/procs.h b/test_conformance/commonfns/procs.h index dada94f9..c1115ee7 100644 --- a/test_conformance/commonfns/procs.h +++ b/test_conformance/commonfns/procs.h @@ -37,6 +37,8 @@ extern int test_maxf(cl_device_id device, cl_context context, cl_command_ extern int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mixf(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); @@ -44,11 +46,4 @@ extern int test_smoothstep(cl_device_id device, cl_context context, cl_co extern int test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -typedef int (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize ); -typedef int (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize ); - -extern int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, - const char *fnName, bool vectorSecondParam, - binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn ); - diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h new file mode 100644 index 00000000..44291042 --- /dev/null +++ b/test_conformance/commonfns/test_base.h @@ -0,0 +1,193 @@ +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef TEST_COMMONFNS_BASE_H +#define TEST_COMMONFNS_BASE_H + +#include +#include +#include + +#include +#include + +#include "harness/deviceInfo.h" +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" + + +template +using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const, + const int num, const int vs, const int vp); + + +template +using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num); + + +using half = cl_half; + + +struct BaseFunctionTest +{ + BaseFunctionTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elems, const char *fn, + bool vsp) + : device(device), context(context), queue(queue), num_elems(num_elems), + fnName(fn), vecParam(vsp) + {} + + // Test body returning an OpenCL error code + virtual cl_int Run() = 0; + + cl_device_id device; + cl_context context; + cl_command_queue queue; + + int num_elems; + std::string fnName; + bool vecParam; + + static std::map type2name; +}; + + +struct MinTest : BaseFunctionTest +{ + MinTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct MaxTest : BaseFunctionTest +{ + MaxTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct ClampTest : BaseFunctionTest +{ + ClampTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct DegreesTest : BaseFunctionTest +{ + DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct RadiansTest : BaseFunctionTest +{ + RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct SignTest : BaseFunctionTest +{ + SignTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct SmoothstepTest : BaseFunctionTest +{ + SmoothstepTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elems, const char *fn, + bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct StepTest : BaseFunctionTest +{ + StepTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +struct MixTest : BaseFunctionTest +{ + MixTest(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elems, const char *fn, bool vsp) + : BaseFunctionTest(device, context, queue, num_elems, fn, vsp) + {} + + cl_int Run() override; +}; + + +template +std::string string_format(const std::string &format, Args... args) +{ + int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; + if (sformat <= 0) + throw std::runtime_error("string_format: string processing error."); + auto format_size = static_cast(sformat); + std::unique_ptr buffer(new char[format_size]); + std::snprintf(buffer.get(), format_size, format.c_str(), args...); + return std::string(buffer.get(), buffer.get() + format_size - 1); +} + + +template +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + const char *fn = "", bool vsp = false) +{ + auto test_fixture = T(device, context, queue, num_elements, fn, vsp); + + cl_int error = test_fixture.Run(); + test_error_ret(error, "Test Failed", TEST_FAIL); + + return TEST_PASS; +} + +#endif // TEST_COMMONFNS_BASE_H diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index b40bf1f6..1eb12f73 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -13,14 +13,18 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include +#include + +#include "harness/deviceInfo.h" +#include "harness/typeWrappers.h" #include "procs.h" +#include "test_base.h" const char *binary_fn_code_pattern = "%s\n" /* optional pragma */ @@ -49,216 +53,286 @@ const char *binary_fn_code_pattern_v3_scalar = " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n" "}\n"; -int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, - const char *fnName, bool vectorSecondParam, - binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn ) + +template +int test_binary_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, + const std::string& fnName, bool vecSecParam, + VerifyFuncBinary verifyFn) { - cl_mem streams[6]; - cl_float *input_ptr[2], *output_ptr; - cl_double *input_ptr_double[2], *output_ptr_double=NULL; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i, j; - MTdata d; + clMemWrapper streams[3]; + std::vector input_ptr[2], output_ptr; - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2); + std::vector programs; + std::vector kernels; + int err, i, j; + MTdataHolder d = MTdataHolder(gRandomSeed); - num_elements = n_elems * (1 << (kTotalVecCount-1)); + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; - int test_double = 0; - if(is_extension_available( device, "cl_khr_fp64" )) - { - log_info("Testing doubles.\n"); - test_double = 1; - } + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); - for( i = 0; i < 2; i++ ) - { - input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - } - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements); + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); for( i = 0; i < 3; i++ ) { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, &err); + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); test_error( err, "clCreateBuffer failed"); } - if (test_double) - for( i = 3; i < 6; i++ ) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed"); - } - - d = init_genrand( gRandomSeed ); - for( j = 0; j < num_elements; j++ ) + std::string pragma_str; + if (std::is_same::value) { - input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d); - input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d); - if (test_double) + for (j = 0; j < num_elements; j++) { - input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d); - input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d); + input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d); } } - free_mtdata(d); d = NULL; - - for( i = 0; i < 2; i++ ) + else if (std::is_same::value) { - err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); - - if (test_double) + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (j = 0; j < num_elements; j++) { - err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); + input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); } } - for( i = 0; i < kTotalVecCount; i++ ) + for (i = 0; i < 2; i++) { - char programSrc[ 10240 ]; - char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" }; + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); + } - if(i >= kVectorSizeCount) { - // do vec3 print + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - if(vectorSecondParam) { - sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName ); - } else { - sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName ); + for (i = 0; i < kTotalVecCount; i++) + { + std::string kernelSource; + if (i >= kVectorSizeCount) + { + if (vecSecParam) + { + std::string str = binary_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); + } + else + { + std::string str = binary_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } - } else { - // do regular - sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName ); } - const char *ptr = programSrc; - err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" ); - test_error( err, "Unable to create kernel" ); - - if (test_double) + else { - if(i >= kVectorSizeCount) { - if(vectorSecondParam) { - sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable", - "double", "double", "double", fnName ); - } else { + // do regular + std::string str = binary_fn_code_pattern; + kernelSource = string_format( + str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], + tname.c_str(), vecSecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i], fnName.c_str()); + } + const char* programPtr = kernelSource.c_str(); + err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char**)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); - sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable", - "double", "double", "double", fnName ); - } - } else { - sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable", - "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName ); - } - ptr = programSrc; - err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" ); - test_error( err, "Unable to create kernel" ); - } - } - - for( i = 0; i < kTotalVecCount; i++ ) - { for( j = 0; j < 3; j++ ) { - err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] ); + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); test_error( err, "Unable to set kernel argument" ); } - threads[0] = (size_t)n_elems; + size_t threads = (size_t)n_elems; - err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); test_error( err, "Unable to execute kernel" ); - err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); + err = clEnqueueReadBuffer(queue, streams[2], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); test_error( err, "Unable to read results" ); - - - if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) ) + if (verifyFn((T*)&input_ptr[0].front(), (T*)&input_ptr[1].front(), + &output_ptr[0], n_elems, g_arrVecSizes[i], + vecSecParam ? 1 : 0)) { - log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float"); + log_error("%s %s%d%s test failed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i])), + vecSecParam ? "" : std::string(", " + tname).c_str()); err = -1; } else { - log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float"); + log_info("%s %s%d%s test passed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i])), + vecSecParam ? "" : std::string(", " + tname).c_str()); err = 0; } if (err) break; } - - if (test_double) - { - for( i = 0; i < kTotalVecCount; i++ ) - { - for( j = 0; j < 3; j++ ) - { - err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] ); - test_error( err, "Unable to set kernel argument" ); - } - - threads[0] = (size_t)n_elems; - - err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( err, "Unable to execute kernel" ); - - err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL ); - test_error( err, "Unable to read results" ); - - if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i])))) - { - log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double"); - err = -1; - } - else - { - log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double"); - err = 0; - } - - if (err) - break; - } - } - - - for( i = 0; i < ((test_double) ? 6 : 3); i++ ) - { - clReleaseMemObject(streams[i]); - } - for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++) - { - clReleaseKernel(kernel[i]); - clReleaseProgram(program[i]); - } - free(input_ptr[0]); - free(input_ptr[1]); - free(output_ptr); - free(program); - free(kernel); - - if (test_double) - { - free(input_ptr_double[0]); - free(input_ptr_double[1]); - free(output_ptr_double); - } - return err; } +namespace { +template +int max_verify(const T* const x, const T* const y, const T* const out, + int numElements, int vecSize, int vecParam) +{ + for (int i = 0; i < numElements; i++) + { + for (int j = 0; j < vecSize; j++) + { + int k = i * vecSize + j; + int l = (k * vecParam + i * (1 - vecParam)); + T v = (x[k] < y[l]) ? y[l] : x[k]; + if (v != out[k]) + { + log_error( + "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + return -1; + } + } + } + return 0; +} + +template +int min_verify(const T* const x, const T* const y, const T* const out, + int numElements, int vecSize, int vecParam) +{ + for (int i = 0; i < numElements; i++) + { + for (int j = 0; j < vecSize; j++) + { + int k = i * vecSize + j; + int l = (k * vecParam + i * (1 - vecParam)); + T v = (x[k] > y[l]) ? y[l] : x[k]; + if (v != out[k]) + { + log_error( + "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + return -1; + } + } + } + return 0; +} + +} + +cl_int MaxTest::Run() +{ + cl_int error = CL_SUCCESS; + + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, max_verify); + test_error(error, "MaxTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify); + test_error(error, "MaxTest::Run failed"); + } + + return error; +} + +cl_int MinTest::Run() +{ + cl_int error = CL_SUCCESS; + + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, min_verify); + test_error(error, "MinTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify); + test_error(error, "MinTest::Run failed"); + } + + return error; +} + +int test_min(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "min", + true); +} + +int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "min", + false); +} + +int test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmin", + true); +} + +int test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmin", + false); +} + +int test_max(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "max", + true); +} + +int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "max", + false); +} + +int test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmax", + true); +} + +int test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "fmax", + false); +} diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index bbb83645..0e96fb60 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,303 +13,252 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include +#include + +#include "harness/deviceInfo.h" +#include "harness/typeWrappers.h" #include "procs.h" +#include "test_base.h" + #ifndef M_PI -#define M_PI 3.14159265358979323846264338327950288 +#define M_PI 3.14159265358979323846264338327950288 #endif -#define CLAMP_KERNEL( type ) \ - const char *clamp_##type##_kernel_code = \ - EMIT_PRAGMA_DIRECTIVE \ - "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ - "{\n" \ - " int tid = get_global_id(0);\n" \ - "\n" \ - " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ - "}\n"; -#define CLAMP_KERNEL_V( type, size) \ - const char *clamp_##type##size##_kernel_code = \ - EMIT_PRAGMA_DIRECTIVE \ - "__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \ - "{\n" \ - " int tid = get_global_id(0);\n" \ - "\n" \ - " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ - "}\n"; +#define CLAMP_KERNEL(type) \ + const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ + "__kernel void test_clamp(__global " #type " *x, __global " #type \ + " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ + "{\n" \ + " int tid = get_global_id(0);\n" \ + "\n" \ + " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ + "}\n"; + +#define CLAMP_KERNEL_V(type, size) \ + const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ + "__kernel void test_clamp(__global " #type #size \ + " *x, __global " #type #size " *minval, __global " #type #size \ + " *maxval, __global " #type #size " *dst)\n" \ + "{\n" \ + " int tid = get_global_id(0);\n" \ + "\n" \ + " dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n" \ + "}\n"; + +#define CLAMP_KERNEL_V3(type, size) \ + const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ + "__kernel void test_clamp(__global " #type " *x, __global " #type \ + " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ + "{\n" \ + " int tid = get_global_id(0);\n" \ + "\n" \ + " vstore3(clamp(vload3(tid, x), vload3(tid,minval), " \ + "vload3(tid,maxval)), tid, dst);\n" \ + "}\n"; -#define CLAMP_KERNEL_V3( type, size) \ - const char *clamp_##type##size##_kernel_code = \ - EMIT_PRAGMA_DIRECTIVE \ - "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \ - "{\n" \ - " int tid = get_global_id(0);\n" \ - "\n" \ - " vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n" \ - "}\n"; #define EMIT_PRAGMA_DIRECTIVE " " -CLAMP_KERNEL( float ) -CLAMP_KERNEL_V( float, 2 ) -CLAMP_KERNEL_V( float, 4 ) -CLAMP_KERNEL_V( float, 8 ) -CLAMP_KERNEL_V( float, 16 ) -CLAMP_KERNEL_V3( float, 3) +CLAMP_KERNEL(float) +CLAMP_KERNEL_V(float, 2) +CLAMP_KERNEL_V(float, 4) +CLAMP_KERNEL_V(float, 8) +CLAMP_KERNEL_V(float, 16) +CLAMP_KERNEL_V3(float, 3) #undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" -CLAMP_KERNEL( double ) -CLAMP_KERNEL_V( double, 2 ) -CLAMP_KERNEL_V( double, 4 ) -CLAMP_KERNEL_V( double, 8 ) -CLAMP_KERNEL_V( double, 16 ) -CLAMP_KERNEL_V3( double, 3 ) +CLAMP_KERNEL(double) +CLAMP_KERNEL_V(double, 2) +CLAMP_KERNEL_V(double, 4) +CLAMP_KERNEL_V(double, 8) +CLAMP_KERNEL_V(double, 16) +CLAMP_KERNEL_V3(double, 3) #undef EMIT_PRAGMA_DIRECTIVE -const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code }; -const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code }; +const char *clamp_float_codes[] = { + clamp_float_kernel_code, clamp_float2_kernel_code, + clamp_float4_kernel_code, clamp_float8_kernel_code, + clamp_float16_kernel_code, clamp_float3_kernel_code +}; +const char *clamp_double_codes[] = { + clamp_double_kernel_code, clamp_double2_kernel_code, + clamp_double4_kernel_code, clamp_double8_kernel_code, + clamp_double16_kernel_code, clamp_double3_kernel_code +}; -static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n) +namespace { + + +template +int verify_clamp(const T *const x, const T *const minval, const T *const maxval, + const T *const outptr, int n) { - float t; - int i; - - for (i=0; i +int test_clamp_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) { - cl_mem streams[8]; - cl_float *input_ptr[3], *output_ptr; - cl_double *input_ptr_double[3], *output_ptr_double = NULL; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i, j; - MTdata d; + clMemWrapper streams[4]; + std::vector input_ptr[3], output_ptr; - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2); + std::vector programs; + std::vector kernels; - num_elements = n_elems * (1 << (kVectorSizeCount-1)); + int err, i, j; + MTdataHolder d = MTdataHolder(gRandomSeed); - int test_double = 0; - if(is_extension_available( device, "cl_khr_fp64" )) { - log_info("Testing doubles.\n"); - test_double = 1; + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + int num_elements = n_elems * (1 << (kVectorSizeCount - 1)); + + for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 4; i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); } - - // why does this go from 0 to 2?? -- Oh, I see, there are four function - // arguments to the function, and 3 of them are inputs? - for( i = 0; i < 3; i++ ) + if (std::is_same::value) { - input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - } - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements); - - // why does this go from 0 to 3? - for( i = 0; i < 4; i++ ) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) + for (j = 0; j < num_elements; j++) { - log_error("clCreateBuffer failed\n"); - return -1; + input_ptr[0][j] = get_random_float(-0x200000, 0x200000, d); + input_ptr[1][j] = get_random_float(-0x200000, 0x200000, d); + input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x200000, d); } } - if (test_double) - for( i = 4; i < 8; i++ ) + else if (std::is_same::value) + { + for (j = 0; j < num_elements; j++) { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - } - - d = init_genrand( gRandomSeed ); - for( j = 0; j < num_elements; j++ ) - { - input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d); - input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d); - input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d); - - if (test_double) { - input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d); - input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d); - input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d); - } - } - free_mtdata(d); d = NULL; - - for( i = 0; i < 3; i++ ) - { - err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); - - if (test_double) { - err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL ); - test_error( err, "Unable to write input buffer" ); + input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); + input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d); } } - for( i = 0; i < kTotalVecCount; i++ ) + for (i = 0; i < 3; i++) { - err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" ); - test_error( err, "Unable to create kernel" ); + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); + } - log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i); + for (i = 0; i < kTotalVecCount; i++) + { + if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_float_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } + else if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_double_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } + + log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", + i, g_arrVecSizes[i], i); fflush(stdout); - if (test_double) { - err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" ); - log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i); - fflush(stdout); - test_error( err, "Unable to create kernel" ); - } - } - - for( i = 0; i < kTotalVecCount; i++ ) - { - for( j = 0; j < 4; j++ ) + for (j = 0; j < 4; j++) { - err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] ); - test_error( err, "Unable to set kernel argument" ); + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); } - threads[0] = (size_t)n_elems; + size_t threads = (size_t)n_elems; - err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( err, "Unable to execute kernel" ); + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); - err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); - test_error( err, "Unable to read results" ); + err = clEnqueueReadBuffer(queue, streams[3], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); - if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i])))) + if (verify_clamp((T *)&input_ptr[0].front(), + (T *)&input_ptr[1].front(), + (T *)&input_ptr[2].front(), (T *)&output_ptr[0], + n_elems * ((g_arrVecSizes[i])))) { - log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i]))); + log_error("CLAMP %s%d test failed\n", tname.c_str(), + ((g_arrVecSizes[i]))); err = -1; } else { - log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i]))); + log_info("CLAMP %s%d test passed\n", tname.c_str(), + ((g_arrVecSizes[i]))); err = 0; } - - - if (err) - break; - } - - // If the device supports double precision then test that - if (test_double) - { - for( ; i < 2*kTotalVecCount; i++ ) - { - - log_info("Start of test_double loop, i is %d\n", i); - for( j = 0; j < 4; j++ ) - { - err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] ); - test_error( err, "Unable to set kernel argument" ); - } - - threads[0] = (size_t)n_elems; - - err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL ); - test_error( err, "Unable to execute kernel" ); - - err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL ); - test_error( err, "Unable to read results" ); - - if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)])) - { - log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]); - err = -1; - } - else - { - log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]); - err = 0; - } - - if (err) - break; - } - } - - - for( i = 0; i < ((test_double) ? 8 : 4); i++ ) - { - clReleaseMemObject(streams[i]); - } - for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++) - { - clReleaseKernel(kernel[i]); - clReleaseProgram(program[i]); - } - free(input_ptr[0]); - free(input_ptr[1]); - free(input_ptr[2]); - free(output_ptr); - free(program); - free(kernel); - if (test_double) { - free(input_ptr_double[0]); - free(input_ptr_double[1]); - free(input_ptr_double[2]); - free(output_ptr_double); + if (err) break; } return err; } +cl_int ClampTest::Run() +{ + cl_int error = CL_SUCCESS; + + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); + } + + return error; +} + + +int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems); +} diff --git a/test_conformance/commonfns/test_degrees.cpp b/test_conformance/commonfns/test_degrees.cpp deleted file mode 100644 index 17311ba8..00000000 --- a/test_conformance/commonfns/test_degrees.cpp +++ /dev/null @@ -1,470 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846264338327950288 -#endif - -static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *degrees_kernel_code = -"__kernel void test_degrees(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees2_kernel_code = -"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees4_kernel_code = -"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees8_kernel_code = -"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees16_kernel_code = -"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = degrees(src[tid]);\n" -"}\n"; - -const char *degrees3_kernel_code = -"__kernel void test_degrees3(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(degrees(vload3(tid,src)),tid,dst);\n" -"}\n"; - - -#define MAX_ERR 2.0f - -static int -verify_degrees(float *inptr, float *outptr, int n) -{ - float error, max_error = 0.0f; - double r, max_val = NAN; - int i, j, max_index = 0; - - for (i=0,j=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - -int -test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_float *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "degreesd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - -static int -test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_double *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - // TODO: line below is clearly wrong - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmax_kernel_code = - "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax2_kernel_code = - "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax4_kernel_code = - "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax8_kernel_code = - "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax16_kernel_code = - "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - - -static const char *fmax3_kernel_code = - "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" - "}\n"; - -static int -verify_fmax(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i= inptrB[i]) ? inptrA[i] : inptrB[i]; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -int -test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmax_kernel_code = - "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax2_kernel_code = - "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax4_kernel_code = - "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax8_kernel_code = - "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax16_kernel_code = - "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmax(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmax3_kernel_code = - "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n" - "}\n"; - -static int -verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen) -{ - float r; - int i, j; - - for (i=0; i= inptrB[ii]) ? inptrA[i] : inptrB[ii]; - if (r != outptr[i]) { - log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n", - i,n,ii,j); - log_info("SHould be %f, is %f\n", r, outptr[i]); - log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]); - return -1; - } - } - } - - return 0; -} - -int -test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmin_kernel_code = - "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin2_kernel_code = - "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin4_kernel_code = - "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin8_kernel_code = - "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin16_kernel_code = - "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - "\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - - -static const char *fmin3_kernel_code = - "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" - "}\n"; - -int -verify_fmin(float *inptrA, float *inptrB, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i inptrB[i]) ? inptrB[i] : inptrA[i]; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -int -test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1));; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static const char *fmin_kernel_code = - "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin2_kernel_code = - "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin4_kernel_code = - "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin8_kernel_code = - "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin16_kernel_code = - "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = fmin(srcA[tid], srcB[tid]);\n" - "}\n"; - -static const char *fmin3_kernel_code = - "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n" - "}\n"; - -static int -verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen) -{ - float r; - int i, j; - - for (i=0; i inptrB[ii]) ? inptrB[ii] : inptrA[i]; - if (r != outptr[i]) - return -1; - } - } - - return 0; -} - -int -test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) - { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", - i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) - { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", - i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_maxf.cpp b/test_conformance/commonfns/test_maxf.cpp deleted file mode 100644 index f96df7ea..00000000 --- a/test_conformance/commonfns/test_maxf.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - return -1; - } - } - } - return 0; -} - -static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - return -1; - } - } - } - return 0; -} - -int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_min.cpp b/test_conformance/commonfns/test_min.cpp deleted file mode 100644 index 707e24b6..00000000 --- a/test_conformance/commonfns/test_min.cpp +++ /dev/null @@ -1,56 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements * vecSize; i++ ) - { - double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ]; - if( v != out[ i ] ) { - log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize); - return -1; - } - } - return 0; -} - -int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_minf.cpp b/test_conformance/commonfns/test_minf.cpp deleted file mode 100644 index 71b1fbe0..00000000 --- a/test_conformance/commonfns/test_minf.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" -#include "harness/errorHelpers.h" - -static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize ) -{ - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - return -1; - } - } - } - return 0; -} - -static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize ) -{ - int maxFail = 1; - int numFails = 0; - for( int i = 0; i < numElements; i++ ) - { - for( int j = 0; j < vecSize; j++ ) - { - double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ]; - if( v != out[ i * vecSize + j ] ) - { - log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v, out[ i * vecSize + j ] ); - ++numFails; - if(numFails >= maxFail) { - return -1; - } - } - } - } - return 0; -} - -int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double ); -} - - diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 5dedce3f..92c10100 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,179 +13,265 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - #include #include #include #include #include "procs.h" +#include "test_base.h" + + +const char *mix_fn_code_pattern = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s " + "*a, __global %s%s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = mix(x[tid], y[tid], a[tid]);\n" + "}\n"; + +const char *mix_fn_code_pattern_v3 = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(mix(vload3(tid, x), vload3(tid, y), vload3(tid, a)), tid, " + "dst);\n" + "}\n"; + +const char *mix_fn_code_pattern_v3_scalar = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n" + "}\n"; -const char *mix_kernel_code = -"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n" -"}\n"; #define MAX_ERR 1e-3 -float -verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n) +namespace { + + +template +int verify_mix(const T *const inptrX, const T *const inptrY, + const T *const inptrA, const T *const outptr, const int n, + const int veclen, const bool vecParam) { - float r, delta, max_err = 0.0f; - int i; + T r; + float delta = 0.0f; + int i; - for (i=0; i max_err) max_err = delta; - } - return max_err; -} - -int -test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[4]; - cl_float *input_ptr[3], *output_ptr, *p; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - float max_err; - int err; - int i; - MTdata d; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i MAX_ERR) - { - log_error("MIX test failed %g max err\n", max_err); - err = -1; + for (i = 0; i < n * veclen; i++) + { + r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]); + delta = fabs(double(r - outptr[i])) / r; + if (delta > MAX_ERR) + { + log_error( + "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i, + inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } } else { - log_info("MIX test passed %g max err\n", max_err); - err = 0; + for (int i = 0; i < n; ++i) + { + int ii = i / veclen; + int vi = i * veclen; + for (int j = 0; j < veclen; ++j, ++vi) + { + r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]); + delta = fabs(double(r - outptr[vi])) / r; + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + } } - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr[0]); - free(input_ptr[1]); - free(input_ptr[2]); - free(output_ptr); + return 0; +} +} // namespace + + +template +int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems, bool vecParam) +{ + clMemWrapper streams[4]; + std::vector input_ptr[3], output_ptr; + + std::vector programs; + std::vector kernels; + + int err, i; + MTdataHolder d = MTdataHolder(gRandomSeed); + + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + + for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 4; i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); + } + + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = (T)genrand_real1(d); + input_ptr[1][i] = (T)genrand_real1(d); + input_ptr[2][i] = (T)genrand_real1(d); + } + + std::string pragma_str; + if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + } + + for (i = 0; i < 3; i++) + { + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); + } + + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + + for (i = 0; i < kTotalVecCount; i++) + { + std::string kernelSource; + if (i >= kVectorSizeCount) + { + if (vecParam) + { + std::string str = mix_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + else + { + std::string str = mix_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + } + else + { + // regular path + std::string str = mix_fn_code_pattern; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); + } + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); + + for (int j = 0; j < 4; j++) + { + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); + } + + size_t threads = (size_t)n_elems; + + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); + + err = clEnqueueReadBuffer(queue, streams[3], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); + + if (verify_mix(&input_ptr[0].front(), &input_ptr[1].front(), + &input_ptr[2].front(), &output_ptr.front(), n_elems, + g_arrVecSizes[i], vecParam)) + { + log_error("mix %s%d%s test failed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = -1; + } + else + { + log_info("mix %s%d%s test passed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = 0; + } + + if (err) break; + } return err; } +cl_int MixTest::Run() +{ + cl_int error = CL_SUCCESS; + + error = test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = + test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); + } + + return error; +} +int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "mix", + true); +} + +int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "mix", + false); +} diff --git a/test_conformance/commonfns/test_radians.cpp b/test_conformance/commonfns/test_radians.cpp deleted file mode 100644 index 2eb0500f..00000000 --- a/test_conformance/commonfns/test_radians.cpp +++ /dev/null @@ -1,468 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -#ifndef M_PI -#define M_PI 3.14159265358979323846264338327950288 -#endif - -static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *radians_kernel_code = -"__kernel void test_radians(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians2_kernel_code = -"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians4_kernel_code = -"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians8_kernel_code = -"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians16_kernel_code = -"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = radians(src[tid]);\n" -"}\n"; - -const char *radians3_kernel_code = -"__kernel void test_radians3(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(radians(vload3(tid,src)),tid,dst);\n" -"}\n"; - - -#define MAX_ERR 2.0f - -static float -verify_radians(float *inptr, float *outptr, int n) -{ - float error, max_error = 0.0f; - double r, max_val = NAN; - int i, j, max_index = 0; - - for (i=0,j=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - - -int -test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_float *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i max_error) - { - max_error = error; - max_index = i; - max_val = r; - if( fabsf(error) > MAX_ERR) - { - log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error ); - return 1; - } - } - } - - log_info( "radiansd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] ); - - return 0; -} - - -int -test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_double *input_ptr[1], *output_ptr, *p; - cl_program *program; - cl_kernel *kernel; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - - program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount); - kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount); - - //TODO: line below is clearly wrong - num_elements = n_elems * (1 << (kTotalVecCount-1)); - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i -#include -#include -#include - -#include "procs.h" - -static int -test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -const char *sign_kernel_code = -"__kernel void test_sign(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign2_kernel_code = -"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign4_kernel_code = -"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign8_kernel_code = -"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign16_kernel_code = -"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = sign(src[tid]);\n" -"}\n"; - -const char *sign3_kernel_code = -"__kernel void test_sign3(__global float *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(sign(vload3(tid,src)), tid, dst);\n" -"}\n"; - - - -static int -verify_sign(float *inptr, float *outptr, int n) -{ - float r; - int i; - - for (i=0; i 0.0f) - r = 1.0f; - else if (inptr[i] < 0.0f) - r = -1.0f; - else - r = 0.0f; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" }; - -int -test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_float *input_ptr[1], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 16; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i 0.0) - r = 1.0; - else if (inptr[i] < 0.0) - r = -1.0; - else - r = 0.0f; - if (r != outptr[i]) - return -1; - } - - return 0; -} - -static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" }; - -int -test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[2]; - cl_double *input_ptr[1], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 16; - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i #include #include #include #include "procs.h" +#include "test_base.h" -static const char *smoothstep_kernel_code = -"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; -static const char *smoothstep2_kernel_code = -"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; +const char *smoothstep_fn_code_pattern = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s " + "*x, __global %s%s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " dst[tid] = smoothstep(e0[tid], e1[tid], x[tid]);\n" + "}\n"; -static const char *smoothstep4_kernel_code = -"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; +const char *smoothstep_fn_code_pattern_v3 = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(smoothstep(vload3(tid,e0), vload3(tid,e1), vload3(tid,x)), " + "tid, dst);\n" + "}\n"; -static const char *smoothstep8_kernel_code = -"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; +const char *smoothstep_fn_code_pattern_v3_scalar = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, " + "__global %s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + "\n" + " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n" + "}\n"; -static const char *smoothstep16_kernel_code = -"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep3_kernel_code = -"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n" -"}\n"; #define MAX_ERR (1e-5f) -static float -verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n) +namespace { + + +template +int verify_smoothstep(const T *const edge0, const T *const edge1, + const T *const x, const T *const outptr, const int n, + const int veclen, const bool vecParam) { - float r, t, delta, max_err = 0.0f; - int i; + T r, t; + float delta = 0; - for (i=0; i 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[i]); - if (delta > max_err) - max_err = delta; - } - - return max_err; -} - -const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" }; - -int -test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[4]; - cl_float *input_ptr[3], *output_ptr, *p, *p_edge0; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - float max_err; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 16; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i MAX_ERR) - { - log_error("%s test failed %g max err\n", fn_names[i], max_err); - err = -1; + for (int i = 0; i < n * veclen; i++) + { + t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]); + if (t < 0.0f) + t = 0.0f; + else if (t > 1.0f) + t = 1.0f; + r = t * t * (3.0f - 2.0f * t); + delta = (float)fabs(r - outptr[i]); + if (delta > MAX_ERR) + { + log_error("%d) verification error: smoothstep(%a, %a, %a) = " + "*%a vs. %a\n", + i, x[i], edge0[i], edge1[i], r, outptr[i]); + return -1; + } + } } else { - log_info("%s test passed %g max err\n", fn_names[i], max_err); - err = 0; + for (int i = 0; i < n; ++i) + { + int ii = i / veclen; + int vi = i * veclen; + for (int j = 0; j < veclen; ++j, ++vi) + { + t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); + if (t < 0.0f) + t = 0.0f; + else if (t > 1.0f) + t = 1.0f; + r = t * t * (3.0f - 2.0f * t); + delta = (float)fabs(r - outptr[vi]); + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: " + "smoothstep(%a, %a, %a) = *%a vs. %a\n", + ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]); + return -1; + } + } + } } + return 0; +} - if (err) - break; - } - - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - for (i=0; i +int test_smoothstep_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, bool vecParam) +{ + clMemWrapper streams[4]; + std::vector input_ptr[3], output_ptr; + + std::vector programs; + std::vector kernels; + + int err, i; + MTdataHolder d = MTdataHolder(gRandomSeed); + + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 4; i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); + } + + std::string pragma_str; + if (std::is_same::value) + { + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_float(-0x00200000, 0x00010000, d); + input_ptr[1][i] = get_random_float(input_ptr[0][i], 0x00200000, d); + input_ptr[2][i] = get_random_float(-0x20000000, 0x20000000, d); + } + } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_double(-0x00200000, 0x00010000, d); + input_ptr[1][i] = get_random_double(input_ptr[0][i], 0x00200000, d); + input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d); + } + } + + for (i = 0; i < 3; i++) + { + err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, + sizeof(T) * num_elements, + &input_ptr[i].front(), 0, NULL, NULL); + test_error(err, "Unable to write input buffer"); + } + + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + + for (i = 0; i < kTotalVecCount; i++) + { + std::string kernelSource; + if (i >= kVectorSizeCount) + { + if (vecParam) + { + std::string str = smoothstep_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + else + { + std::string str = smoothstep_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); + } + } + else + { + // regular path + std::string str = smoothstep_fn_code_pattern; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + } + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); + + for (int j = 0; j < 4; j++) + { + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); + } + + size_t threads = (size_t)n_elems; + + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); + + err = clEnqueueReadBuffer(queue, streams[3], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); + + if (verify_smoothstep((T *)&input_ptr[0].front(), + (T *)&input_ptr[1].front(), + (T *)&input_ptr[2].front(), &output_ptr[0], + n_elems, g_arrVecSizes[i], vecParam)) + { + log_error("smoothstep %s%d%s test failed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = -1; + } + else + { + log_info("smoothstep %s%d%s test passed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = 0; + } + + if (err) break; + } + + return err; +} + + +cl_int SmoothstepTest::Run() +{ + cl_int error = CL_SUCCESS; + + error = + test_smoothstep_fn(device, context, queue, num_elems, vecParam); + test_error(error, "SmoothstepTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_smoothstep_fn(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run failed"); + } + + return error; +} + + +int test_smoothstep(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "smoothstep", true); +} + + +int test_smoothstepf(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "smoothstep", false); +} diff --git a/test_conformance/commonfns/test_smoothstepf.cpp b/test_conformance/commonfns/test_smoothstepf.cpp deleted file mode 100644 index ac09e9ec..00000000 --- a/test_conformance/commonfns/test_smoothstepf.cpp +++ /dev/null @@ -1,259 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static const char *smoothstep_kernel_code = -"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep2_kernel_code = -"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -static const char *smoothstep4_kernel_code = -"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n" -"}\n"; - -#define MAX_ERR (1e-5f) - -float verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, - int n, int veclen) -{ - float r, t, delta, max_err = 0.0f; - int i, j; - - for (i = 0; i < n; ++i) { - int vi = i * veclen; - for (j = 0; j < veclen; ++j, ++vi) { - t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[vi]); - if (delta > max_err) - max_err = delta; - } - } - return max_err; -} - -const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"}; - -int -test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[4]; - cl_float *input_ptr[3], *output_ptr, *p, *p_edge0; - cl_program program[3]; - cl_kernel kernel[3]; - size_t threads[1]; - float max_err = 0.0f; - int num_elements; - int err; - int i; - MTdata d; - - num_elements = n_elems * 4; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - p = input_ptr[0]; - for (i=0; i MAX_ERR) - { - log_error("%s test failed %g max err\n", fn_names[i], max_err); - err = -1; - } - else - { - log_info("%s test passed %g max err\n", fn_names[i], max_err); - err = 0; - } - - if (err) - break; - } - - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - for (i=0; i<3; i++) - { - clReleaseKernel(kernel[i]); - clReleaseProgram(program[i]); - } - free(input_ptr[0]); - free(input_ptr[1]); - free(input_ptr[2]); - free(output_ptr); - - return err; -} - - diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index ed5bc418..dc91766e 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,524 +13,252 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - #include #include #include #include #include "procs.h" - -static int -test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); +#include "test_base.h" -const char *step_kernel_code = -"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; +const char *step_fn_code_pattern = "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s%s *edge, " + "__global %s%s *x, __global %s%s *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = step(edge[tid], x[tid]);\n" + "}\n"; -const char *step2_kernel_code = -"__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; +const char *step_fn_code_pattern_v3 = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s " + "*dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " vstore3(step(vload3(tid,edge), vload3(tid,x)), tid, dst);\n" + "}\n"; -const char *step4_kernel_code = -"__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step8_kernel_code = -"__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step16_kernel_code = -"__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -const char *step3_kernel_code = -"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n" -"}\n"; +const char *step_fn_code_pattern_v3_scalar = + "%s\n" /* optional pragma */ + "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s " + "*dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n" + "}\n"; -int -verify_step(float *inptrA, float *inptrB, float *outptr, int n) +namespace { + +template +int verify_step(const T *const inptrA, const T *const inptrB, + const T *const outptr, const int n, const int veclen, + const bool vecParam) { - float r; - int i; + T r; - for (i=0; i *%a " + "vs %a\n", + ii, j, inptrA[ii], inptrB[i], r, outptr[i]); + return -1; + } + } + } } return 0; } -int -test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) +} + + +template +int test_step_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, bool vecParam) { - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - num_elements = n_elems * 16; + clMemWrapper streams[3]; + std::vector input_ptr[2], output_ptr; - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) + std::vector programs; + std::vector kernels; + + int err, i; + MTdataHolder d = MTdataHolder(gRandomSeed); + + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 3; i++) { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); } - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i::value) { - p[i] = get_random_float(-0x40000000, 0x40000000, d); - } - p = input_ptr[1]; - for (i=0; i::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = get_random_double(-0x40000000, 0x40000000, d); + input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d); } } - threads[0] = (size_t)n_elems; - for (i=0; i= kVectorSizeCount) { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; + if (vecParam) + { + std::string str = step_fn_code_pattern_v3; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); + } + else + { + std::string str = step_fn_code_pattern_v3_scalar; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); + } + } + else + { + // regular path + std::string str = step_fn_code_pattern; + kernelSource = + string_format(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + } + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + test_error(err, "Unable to create kernel"); + + for (int j = 0; j < 3; j++) + { + err = + clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]); + test_error(err, "Unable to set kernel argument"); } - err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) + size_t threads = (size_t)n_elems; + + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + test_error(err, "Unable to execute kernel"); + + err = clEnqueueReadBuffer(queue, streams[2], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + test_error(err, "Unable to read results"); + + err = verify_step(&input_ptr[0].front(), &input_ptr[1].front(), + &output_ptr.front(), n_elems, g_arrVecSizes[i], + vecParam); + if (err) { - log_error("clEnqueueReadBuffer failed\n"); - return -1; + log_error("step %s%d%s test failed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = -1; } - - switch (i) + else { - case 0: - err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems); - if (err) - log_error("STEP float test failed\n"); - else - log_info("STEP float test passed\n"); - break; - - case 1: - err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2); - if (err) - log_error("STEP float2 test failed\n"); - else - log_info("STEP float2 test passed\n"); - break; - - case 2: - err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4); - if (err) - log_error("STEP float4 test failed\n"); - else - log_info("STEP float4 test passed\n"); - break; - - case 3: - err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8); - if (err) - log_error("STEP float8 test failed\n"); - else - log_info("STEP float8 test passed\n"); - break; - - case 4: - err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16); - if (err) - log_error("STEP float16 test failed\n"); - else - log_info("STEP float16 test passed\n"); - break; - - case 5: - err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3); - if (err) - log_error("STEP float3 test failed\n"); - else - log_info("STEP float3 test passed\n"); - break; + log_info("step %s%d%s test passed\n", tname.c_str(), + ((g_arrVecSizes[i])), + vecParam ? "" : std::string(", " + tname).c_str()); + err = 0; } if (err) break; } - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - for (i=0; i(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = + test_step_fn(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + } + + return error; +} + + +int test_step(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "step", + true); +} + + +int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "step", + false); +} diff --git a/test_conformance/commonfns/test_stepf.cpp b/test_conformance/commonfns/test_stepf.cpp deleted file mode 100644 index efada227..00000000 --- a/test_conformance/commonfns/test_stepf.cpp +++ /dev/null @@ -1,546 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include - -#include "procs.h" - -static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems); - - -static const char *step_kernel_code = -"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step2_kernel_code = -"__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step4_kernel_code = -"__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step8_kernel_code = -"__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step16_kernel_code = -"__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = step(srcA[tid], srcB[tid]);\n" -"}\n"; - -static const char *step3_kernel_code = -"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n" -"}\n"; - - -static int -verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen) -{ - float r; - int i, j; - - for (i=0; i *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] ); - return -1; - } - } - } - - return 0; -} - -int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_float *input_ptr[2], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - num_elements = n_elems * 16; - - input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] ); - return -1; - } - } - } - - return 0; -} - -int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) -{ - cl_mem streams[3]; - cl_double *input_ptr[2], *output_ptr, *p; - cl_program program[kTotalVecCount]; - cl_kernel kernel[kTotalVecCount]; - size_t threads[1]; - int num_elements; - int err; - int i; - MTdata d; - num_elements = n_elems * 16; - - input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements); - output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_double) * num_elements, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - p = input_ptr[0]; - d = init_genrand( gRandomSeed ); - for (i=0; i +#include +#include +#include + +#include + +#include "harness/deviceInfo.h" +#include "harness/typeWrappers.h" + +#include "procs.h" +#include "test_base.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338327950288 +#endif + + +// clang-format off +const char *unary_fn_code_pattern = +"%s\n" /* optional pragma */ +"__kernel void test_fn(__global %s%s *src, __global %s%s *dst)\n" +"{\n" +" int tid = get_global_id(0);\n" +"\n" +" dst[tid] = %s(src[tid]);\n" +"}\n"; + +const char *unary_fn_code_pattern_v3 = +"%s\n" /* optional pragma */ +"__kernel void test_fn(__global %s *src, __global %s *dst)\n" +"{\n" +" int tid = get_global_id(0);\n" +"\n" +" vstore3(%s(vload3(tid,src)), tid, dst);\n" +"}\n"; +// clang-format on + + +#define MAX_ERR 2.0f + +namespace { + + +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + return Ulp_Error_Double(val, r); + else if (std::is_same::value) + return Ulp_Error(val, r); + else if (std::is_same::value) + return Ulp_Error(val, r); +} + + +template +int verify_degrees(const T *const inptr, const T *const outptr, int n) +{ + float error, max_error = 0.0f; + double r, max_val = NAN; + int max_index = 0; + + for (int i = 0, j = 0; i < n; i++, j++) + { + r = (180.0 / M_PI) * inptr[i]; + + error = UlpFn(outptr[i], r); + + if (fabsf(error) > max_error) + { + max_error = error; + max_index = i; + max_val = r; + if (fabsf(error) > MAX_ERR) + { + log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", + i, inptr[i], r, outptr[i], r, outptr[i], error); + return 1; + } + } + } + + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); + + return 0; +} + + +template +int verify_radians(const T *const inptr, const T *const outptr, int n) +{ + float error, max_error = 0.0f; + double r, max_val = NAN; + int max_index = 0; + + for (int i = 0, j = 0; i < n; i++, j++) + { + r = (M_PI / 180.0) * inptr[i]; + error = Ulp_Error(outptr[i], r); + if (fabsf(error) > max_error) + { + max_error = error; + max_index = i; + max_val = r; + if (fabsf(error) > MAX_ERR) + { + log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", + i, inptr[i], r, outptr[i], r, outptr[i], error); + return 1; + } + } + } + + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); + + return 0; +} + + +template +int verify_sign(const T *const inptr, const T *const outptr, int n) +{ + T r = 0; + for (int i = 0; i < n; i++) + { + if (inptr[i] > 0.0f) + r = 1.0; + else if (inptr[i] < 0.0f) + r = -1.0; + else + r = 0.0; + if (r != outptr[i]) return -1; + } + return 0; +} + +} + + +template +int test_unary_fn(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems, + const std::string &fnName, VerifyFuncUnary verifyFn) +{ + clMemWrapper streams[2]; + std::vector input_ptr, output_ptr; + + std::vector programs; + std::vector kernels; + + int err, i; + MTdataHolder d = MTdataHolder(gRandomSeed); + + assert(BaseFunctionTest::type2name.find(sizeof(T)) + != BaseFunctionTest::type2name.end()); + auto tname = BaseFunctionTest::type2name[sizeof(T)]; + + programs.resize(kTotalVecCount); + kernels.resize(kTotalVecCount); + + int num_elements = n_elems * (1 << (kTotalVecCount - 1)); + + input_ptr.resize(num_elements); + output_ptr.resize(num_elements); + + for (i = 0; i < 2; i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(T) * num_elements, NULL, &err); + test_error(err, "clCreateBuffer failed"); + } + + std::string pragma_str; + if (std::is_same::value) + { + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = get_random_float((float)(-100000.f * M_PI), + (float)(100000.f * M_PI), d); + } + } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = + get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d); + } + } + + err = clEnqueueWriteBuffer(queue, streams[0], true, 0, + sizeof(T) * num_elements, &input_ptr.front(), 0, + NULL, NULL); + if (err != CL_SUCCESS) + { + log_error("clEnqueueWriteBuffer failed\n"); + return -1; + } + + for (i = 0; i < kTotalVecCount; i++) + { + std::string kernelSource; + char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + + if (i >= kVectorSizeCount) + { + std::string str = unary_fn_code_pattern_v3; + kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), fnName.c_str()); + } + else + { + std::string str = unary_fn_code_pattern; + kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); + } + + /* Create kernels */ + const char *programPtr = kernelSource.c_str(); + err = + create_single_kernel_helper(context, &programs[i], &kernels[i], 1, + (const char **)&programPtr, "test_fn"); + + err = clSetKernelArg(kernels[i], 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernels[i], 1, sizeof streams[1], &streams[1]); + if (err != CL_SUCCESS) + { + log_error("clSetKernelArgs failed\n"); + return -1; + } + + // Line below is troublesome... + size_t threads = (size_t)num_elements / ((g_arrVecSizes[i])); + err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL, + 0, NULL, NULL); + if (err != CL_SUCCESS) + { + log_error("clEnqueueNDRangeKernel failed\n"); + return -1; + } + + cl_uint dead = 42; + memset_pattern4(&output_ptr[0], &dead, sizeof(T) * num_elements); + err = clEnqueueReadBuffer(queue, streams[1], true, 0, + sizeof(T) * num_elements, &output_ptr[0], 0, + NULL, NULL); + if (err != CL_SUCCESS) + { + log_error("clEnqueueReadBuffer failed\n"); + return -1; + } + + if (verifyFn((T *)&input_ptr.front(), (T *)&output_ptr.front(), + n_elems * (i + 1))) + { + log_error("%s %s%d test failed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i]))); + err = -1; + } + else + { + log_info("%s %s%d test passed\n", fnName.c_str(), tname.c_str(), + ((g_arrVecSizes[i]))); + } + + if (err) break; + } + + return err; +} + + +cl_int DegreesTest::Run() +{ + cl_int error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + } + + return error; +} + + +cl_int RadiansTest::Run() +{ + cl_int error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + } + + return error; +} + + +cl_int SignTest::Run() +{ + cl_int error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + + if (is_extension_available(device, "cl_khr_fp64")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + } + + return error; +} + + +int test_degrees(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "degrees"); +} + + +int test_radians(cl_device_id device, cl_context context, + cl_command_queue queue, int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, + "radians"); +} + + +int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, + int n_elems) +{ + return MakeAndRunTest(device, context, queue, n_elems, "sign"); +}