From c58ead9aeaa7baaf8bcbec8642e79d3ce1cc1e09 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 30 May 2023 17:52:06 +0200 Subject: [PATCH] Added cl_khr_fp16 extension support for test_astype from basic (#1706) * Added support for cl_khr_fp16 extenstion in test_astype from basic (issue #142, basic) * Added correction to iterate over vector of types * Fixed case with both fp16 and fp64 supported * Cosmetic corrections due to code review * Cosmetic corrections due to code review --- test_conformance/basic/test_astype.cpp | 203 +++++++++++-------------- test_conformance/basic/utils.h | 41 +++++ 2 files changed, 126 insertions(+), 118 deletions(-) create mode 100644 test_conformance/basic/utils.h diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 7281f904..08a4cb85 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -15,61 +15,39 @@ // #include "harness/compat.h" +#include #include #include -#include #include #include +#include - -#include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" +#include "procs.h" +#include "utils.h" -static const char *astype_kernel_pattern = -"%s\n" +// clang-format off + +static char extension[128] = { 0 }; +static char strLoad[128] = { 0 }; +static char strStore[128] = { 0 }; +static const char *regLoad = "as_%s%s(src[tid]);\n"; +static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n"; +static const char *regStore = "dst[tid] = tmp;\n"; +static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n"; + +static const char* astype_kernel_pattern[] = { +extension, "__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n" "{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( src[ tid ] );\n" -" dst[ tid ] = tmp;\n" -"}\n"; - -static const char *astype_kernel_pattern_V3srcV3dst = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the third and fifth argument, each of which -// should be a "3", when copying from the printf for astype_kernel_pattern - -static const char *astype_kernel_pattern_V3dst = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s3 tmp = as_%s3( src[ tid ] );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the fifth argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern - - -static const char *astype_kernel_pattern_V3src = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" dst[ tid ] = tmp;\n" -"}\n"; -// in the printf, remove the third argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern +" int tid = get_global_id( 0 );\n", +" %s%s tmp = ", strLoad, +" ", strStore, +"}\n"}; +// clang-format on int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType, unsigned int vecSize, unsigned int outVecSize, @@ -81,68 +59,60 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q clKernelWrapper kernel; clMemWrapper streams[ 2 ]; - char programSrc[ 10240 ]; size_t threads[ 1 ], localThreads[ 1 ]; size_t typeSize = get_explicit_type_size( inVecType ); size_t outTypeSize = get_explicit_type_size(outVecType); char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - MTdata d; + MTdataHolder d(gRandomSeed); + std::ostringstream sstr; + if (outVecType == kDouble || inVecType == kDouble) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + if (outVecType == kHalf || inVecType == kHalf) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; - // Create program - if(outVecSize == 3 && vecSize == 3) { - // astype_kernel_pattern_V3srcV3dst - sprintf( programSrc, astype_kernel_pattern_V3srcV3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), // sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] ); - } else if(outVecSize == 3) { - // astype_kernel_pattern_V3dst - sprintf( programSrc, astype_kernel_pattern_V3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType )); + strcpy(extension, sstr.str().c_str()); - } else if(vecSize == 3) { - // astype_kernel_pattern_V3src - sprintf( programSrc, astype_kernel_pattern_V3src, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ),// sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } else { - sprintf( programSrc, astype_kernel_pattern, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } + if (vecSize == 3) + std::snprintf(strLoad, sizeof(strLoad), v3Load, + get_explicit_type_name(outVecType), sizeNames[outVecSize], + get_explicit_type_name(inVecType)); + else + std::snprintf(strLoad, sizeof(strLoad), regLoad, + get_explicit_type_name(outVecType), + sizeNames[outVecSize]); - const char *ptr = programSrc; + if (outVecSize == 3) + std::snprintf(strStore, sizeof(strStore), v3Store, + get_explicit_type_name(outVecType)); + else + std::snprintf(strStore, sizeof(strStore), "%s", regStore); + + auto str = + concat_kernel(astype_kernel_pattern, + sizeof(astype_kernel_pattern) / sizeof(const char *)); + std::string kernelSource = + str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize], + get_explicit_type_name(outVecType), sizeNames[outVecSize], + get_explicit_type_name(outVecType), sizeNames[outVecSize]); + + const char *ptr = kernelSource.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); test_error( error, "Unable to create testing kernel" ); - // Create some input values size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize; - char *inBuffer = (char*)malloc( inBufferSize ); + std::vector inBuffer(inBufferSize); size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize; - char *outBuffer = (char*)malloc( outBufferSize ); + std::vector outBuffer(outBufferSize); - d = init_genrand( gRandomSeed ); - generate_random_data( inVecType, numElements * vecSize, - d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(inVecType, numElements * vecSize, d, + &inBuffer.front()); // Create I/O streams and set arguments - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, + &inBuffer.front(), &error); test_error( error, "Unable to create I/O stream" ); streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error ); test_error( error, "Unable to create I/O stream" ); @@ -161,15 +131,15 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); test_error( error, "Unable to run kernel" ); - // Get the results and compare // The beauty is that astype is supposed to return the bit pattern as a different type, which means // the output should have the exact same bit pattern as the input. No interpretation necessary! - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize, + &outBuffer.front(), 0, NULL, NULL); test_error( error, "Unable to read results" ); - char *expected = inBuffer; - char *actual = outBuffer; + char *expected = &inBuffer.front(); + char *actual = &outBuffer.front(); size_t compSize = typeSize*vecSize; if(outTypeSize*outVecSize < compSize) { compSize = outTypeSize*outVecSize; @@ -178,8 +148,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q if(outVecSize == 4 && vecSize == 3) { // as_type4(vec3) should compile but produce undefined results?? - free(inBuffer); - free(outBuffer); return 0; } @@ -188,8 +156,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q // as_typen(vecm) should compile and run but produce // implementation-defined results for m != n // and n*sizeof(type) = sizeof(vecm) - free(inBuffer); - free(outBuffer); return 0; } @@ -203,17 +169,14 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q GetDataVectorString( expected, typeSize, vecSize, expectedString ), GetDataVectorString( actual, typeSize, vecSize, actualString ) ); log_error("Src is :\n%s\n----\n%d threads %d localthreads\n", - programSrc, (int)threads[0],(int) localThreads[0]); - free(inBuffer); - free(outBuffer); + kernelSource.c_str(), (int)threads[0], + (int)localThreads[0]); return 1; } expected += typeSize * vecSize; actual += outTypeSize * outVecSize; } - free(inBuffer); - free(outBuffer); return 0; } @@ -223,31 +186,39 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way // for us to verify what is "valid". So the only thing we can test are types that match in size independent // of the element count (char -> uchar, etc) - ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + const std::vector vecTypes = { kChar, kUChar, kShort, + kUShort, kInt, kUInt, + kLong, kULong, kFloat, + kHalf, kDouble }; + const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx; size_t inTypeSize, outTypeSize; int error = 0; - for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ ) + bool fp16Support = is_extension_available(device, "cl_khr_fp16"); + bool fp64Support = is_extension_available(device, "cl_khr_fp64"); + + auto skip_type = [&](ExplicitType et) { + if ((et == kLong || et == kULong) && !gHasLong) + return true; + else if (et == kDouble && !fp64Support) + return true; + else if (et == kHalf && !fp16Support) + return true; + return false; + }; + + for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++) { inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]); - if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) - continue; + if (skip_type(vecTypes[inTypeIdx])) continue; - if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong ) - continue; - - for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ ) + for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++) { outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]); - if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) { - continue; - } - if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong ) - continue; + if (skip_type(vecTypes[outTypeIdx])) continue; // change this check if( inTypeIdx == outTypeIdx ) { @@ -259,7 +230,6 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) { - for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++) { if(vecSizes[sizeIdx]*inTypeSize != @@ -268,10 +238,7 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, continue; } error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems ); - - } - } if(get_explicit_type_size(vecTypes[inTypeIdx]) == get_explicit_type_size(vecTypes[outTypeIdx])) { diff --git a/test_conformance/basic/utils.h b/test_conformance/basic/utils.h new file mode 100644 index 00000000..3f6bf64d --- /dev/null +++ b/test_conformance/basic/utils.h @@ -0,0 +1,41 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef BASIC_UTILS_H +#define BASIC_UTILS_H + +#include +#include + +inline std::string concat_kernel(const char *sstr[], int num) +{ + std::string res; + for (int i = 0; i < num; i++) res += std::string(sstr[i]); + return res; +} + +template +inline std::string str_sprintf(const std::string &str, Args... args) +{ + int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1; + if (str_size <= 0) throw std::runtime_error("Formatting error."); + size_t s = static_cast(str_size); + std::unique_ptr buffer(new char[s]); + std::snprintf(buffer.get(), s, str.c_str(), args...); + return std::string(buffer.get(), buffer.get() + s - 1); +} + +#endif // BASIC_UTIL_H