Added cl_khr_fp16 extension support for test_astype from basic (#1706)

* Added support for cl_khr_fp16 extenstion in test_astype from basic (issue #142, basic)

* Added correction to iterate over vector of types

* Fixed case with both fp16 and fp64 supported

* Cosmetic corrections due to code review

* Cosmetic corrections due to code review
This commit is contained in:
Marcin Hajder
2023-05-30 17:52:06 +02:00
committed by GitHub
parent 9692380505
commit c58ead9aea
2 changed files with 126 additions and 118 deletions

View File

@@ -15,61 +15,39 @@
//
#include "harness/compat.h"
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <vector>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
#include "procs.h"
#include "utils.h"
static const char *astype_kernel_pattern =
"%s\n"
// clang-format off
static char extension[128] = { 0 };
static char strLoad[128] = { 0 };
static char strStore[128] = { 0 };
static const char *regLoad = "as_%s%s(src[tid]);\n";
static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n";
static const char *regStore = "dst[tid] = tmp;\n";
static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n";
static const char* astype_kernel_pattern[] = {
extension,
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( src[ tid ] );\n"
" dst[ tid ] = tmp;\n"
"}\n";
static const char *astype_kernel_pattern_V3srcV3dst =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the third and fifth argument, each of which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3dst =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = as_%s3( src[ tid ] );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the fifth argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3src =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" dst[ tid ] = tmp;\n"
"}\n";
// in the printf, remove the third argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
" int tid = get_global_id( 0 );\n",
" %s%s tmp = ", strLoad,
" ", strStore,
"}\n"};
// clang-format on
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
@@ -81,68 +59,60 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata d;
MTdataHolder d(gRandomSeed);
std::ostringstream sstr;
if (outVecType == kDouble || inVecType == kDouble)
sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
if (outVecType == kHalf || inVecType == kHalf)
sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
// Create program
if(outVecSize == 3 && vecSize == 3) {
// astype_kernel_pattern_V3srcV3dst
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
strcpy(extension, sstr.str().c_str());
if (vecSize == 3)
std::snprintf(strLoad, sizeof(strLoad), v3Load,
get_explicit_type_name(outVecType), sizeNames[outVecSize],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
} else if(outVecSize == 3) {
// astype_kernel_pattern_V3dst
sprintf( programSrc, astype_kernel_pattern_V3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ),
get_explicit_type_name(inVecType));
else
std::snprintf(strLoad, sizeof(strLoad), regLoad,
get_explicit_type_name(outVecType),
sizeNames[outVecSize]);
if (outVecSize == 3)
std::snprintf(strStore, sizeof(strStore), v3Store,
get_explicit_type_name(outVecType));
else
std::snprintf(strStore, sizeof(strStore), "%s", regStore);
} else if(vecSize == 3) {
// astype_kernel_pattern_V3src
sprintf( programSrc, astype_kernel_pattern_V3src,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
auto str =
concat_kernel(astype_kernel_pattern,
sizeof(astype_kernel_pattern) / sizeof(const char *));
std::string kernelSource =
str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize],
get_explicit_type_name(outVecType), sizeNames[outVecSize],
get_explicit_type_name(outVecType), sizeNames[outVecSize]);
} else {
sprintf( programSrc, astype_kernel_pattern,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
}
const char *ptr = programSrc;
const char *ptr = kernelSource.c_str();
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
char *inBuffer = (char*)malloc( inBufferSize );
std::vector<char> inBuffer(inBufferSize);
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
char *outBuffer = (char*)malloc( outBufferSize );
std::vector<char> outBuffer(outBufferSize);
d = init_genrand( gRandomSeed );
generate_random_data( inVecType, numElements * vecSize,
d, inBuffer );
free_mtdata(d); d = NULL;
generate_random_data(inVecType, numElements * vecSize, d,
&inBuffer.front());
// Create I/O streams and set arguments
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
&inBuffer.front(), &error);
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
@@ -161,15 +131,15 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
&outBuffer.front(), 0, NULL, NULL);
test_error( error, "Unable to read results" );
char *expected = inBuffer;
char *actual = outBuffer;
char *expected = &inBuffer.front();
char *actual = &outBuffer.front();
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
@@ -178,8 +148,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
free(inBuffer);
free(outBuffer);
return 0;
}
@@ -188,8 +156,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
free(inBuffer);
free(outBuffer);
return 0;
}
@@ -203,17 +169,14 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
programSrc, (int)threads[0],(int) localThreads[0]);
free(inBuffer);
free(outBuffer);
kernelSource.c_str(), (int)threads[0],
(int)localThreads[0]);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
free(inBuffer);
free(outBuffer);
return 0;
}
@@ -223,31 +186,39 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
const std::vector<ExplicitType> vecTypes = { kChar, kUChar, kShort,
kUShort, kInt, kUInt,
kLong, kULong, kFloat,
kHalf, kDouble };
const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
bool fp16Support = is_extension_available(device, "cl_khr_fp16");
bool fp64Support = is_extension_available(device, "cl_khr_fp64");
auto skip_type = [&](ExplicitType et) {
if ((et == kLong || et == kULong) && !gHasLong)
return true;
else if (et == kDouble && !fp64Support)
return true;
else if (et == kHalf && !fp16Support)
return true;
return false;
};
for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++)
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if (skip_type(vecTypes[inTypeIdx])) continue;
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
continue;
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++)
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
continue;
}
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
continue;
if (skip_type(vecTypes[outTypeIdx])) continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
@@ -259,7 +230,6 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
@@ -268,10 +238,7 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
}
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {

View File

@@ -0,0 +1,41 @@
//
// Copyright (c) 2023 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef BASIC_UTILS_H
#define BASIC_UTILS_H
#include <memory>
#include <string>
inline std::string concat_kernel(const char *sstr[], int num)
{
std::string res;
for (int i = 0; i < num; i++) res += std::string(sstr[i]);
return res;
}
template <typename... Args>
inline std::string str_sprintf(const std::string &str, Args... args)
{
int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1;
if (str_size <= 0) throw std::runtime_error("Formatting error.");
size_t s = static_cast<size_t>(str_size);
std::unique_ptr<char[]> buffer(new char[s]);
std::snprintf(buffer.get(), s, str.c_str(), args...);
return std::string(buffer.get(), buffer.get() + s - 1);
}
#endif // BASIC_UTIL_H