Added support for cl_khr_fp16 extension in test_async_copy from basic (issue #142, basic) (#1707)

This commit is contained in:
Marcin Hajder
2023-06-06 17:46:56 +02:00
committed by GitHub
parent 63a8cb6b9d
commit c467391680

View File

@@ -20,8 +20,7 @@
#include <string.h> #include <string.h>
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <vector>
#include "procs.h" #include "procs.h"
#include "harness/conversions.h" #include "harness/conversions.h"
@@ -86,8 +85,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
clKernelWrapper kernel; clKernelWrapper kernel;
clMemWrapper streams[ 2 ]; clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ]; size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer; MTdataHolder d(gRandomSeed);
MTdata d;
char vecNameString[64]; vecNameString[0] = 0; char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1) if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
@@ -109,9 +107,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
char programSource[4096]; programSource[0]=0; char programSource[4096]; programSource[0]=0;
char *programPtr; char *programPtr;
sprintf(programSource, kernelCode, std::string extStr = "";
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", if (vecType == kDouble)
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString); extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
else if (vecType == kHalf)
extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
vecNameString, vecNameString, vecNameString,
get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource); //log_info("program: %s\n", programSource);
programPtr = programSource; programPtr = programSource;
@@ -150,9 +154,10 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize; size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize); std::vector<unsigned char> inBuffer(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize); std::vector<unsigned char> outBuffer(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
outBuffer.assign(globalBufferSize, 0);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup; cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
@@ -164,13 +169,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
threads[0] = globalWorkgroupSize; threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize; localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed ); generate_random_data(vecType,
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); globalBufferSize / get_explicit_type_size(vecType), d,
free_mtdata(d); d = NULL; &inBuffer.front());
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
&inBuffer.front(), &error);
test_error( error, "Unable to create input buffer" ); test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
&outBuffer.front(), &error);
test_error( error, "Unable to create output buffer" ); test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
@@ -189,16 +196,18 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
test_error( error, "Unable to queue kernel" ); test_error( error, "Unable to queue kernel" );
// Read // Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
&outBuffer.front(), 0, NULL, NULL);
test_error( error, "Unable to read results" ); test_error( error, "Unable to read results" );
// Verify // Verify
int failuresPrinted = 0; int failuresPrinted = 0;
if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 ) if (memcmp(&inBuffer.front(), &outBuffer.front(), globalBufferSize) != 0)
{ {
size_t typeSize = get_explicit_type_size(vecType)* vecSize; size_t typeSize = get_explicit_type_size(vecType)* vecSize;
unsigned char * inchar = (unsigned char*)inBuffer; unsigned char *inchar = static_cast<unsigned char *>(&inBuffer.front());
unsigned char * outchar = (unsigned char*)outBuffer; unsigned char *outchar =
static_cast<unsigned char *>(&outBuffer.front());
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) { for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 ) if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
{ {
@@ -226,26 +235,29 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
} }
} }
free(inBuffer);
free(outBuffer);
return failuresPrinted ? -1 : 0; return failuresPrinted ? -1 : 0;
} }
int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) { int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
kInt, kUInt, kLong, kULong,
kFloat, kHalf, kDouble };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int size, typeIndex; unsigned int size, typeIndex;
int errors = 0; int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
{ bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
{
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue; continue;
else if (vecType[typeIndex] == kDouble && !fp64Support)
continue;
else if (vecType[typeIndex] == kHalf && !fp16Support)
continue;
for( size = 0; vecSizes[ size ] != 0; size++ ) for( size = 0; vecSizes[ size ] != 0; size++ )
{ {
@@ -259,9 +271,6 @@ int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_qu
return 0; return 0;
} }
int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{ {
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel ); return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );