mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-26 08:49:02 +00:00
Added cl_khr_fp16 extension support for test_async_strided_copy from basic (issue #142, basic) (#1711)
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2017 The Khronos Group Inc.
|
// Copyright (c) 2023 The Khronos Group Inc.
|
||||||
//
|
//
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
@@ -20,15 +20,16 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
|
||||||
#include "procs.h"
|
#include "procs.h"
|
||||||
#include "harness/conversions.h"
|
#include "harness/conversions.h"
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
|
||||||
static const char *async_strided_global_to_local_kernel =
|
static const char *async_strided_global_to_local_kernel =
|
||||||
"%s\n" // optional pragma string
|
"%s\n" // optional pragma string
|
||||||
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
|
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int i;\n"
|
" int i;\n"
|
||||||
// Zero the local storage first
|
// Zero the local storage first
|
||||||
@@ -46,7 +47,7 @@ static const char *async_strided_global_to_local_kernel =
|
|||||||
|
|
||||||
static const char *async_strided_local_to_global_kernel =
|
static const char *async_strided_local_to_global_kernel =
|
||||||
"%s\n" // optional pragma string
|
"%s\n" // optional pragma string
|
||||||
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
|
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int i;\n"
|
" int i;\n"
|
||||||
// Zero the local storage first
|
// Zero the local storage first
|
||||||
@@ -63,6 +64,7 @@ static const char *async_strided_local_to_global_kernel =
|
|||||||
" wait_group_events( 1, &event );\n"
|
" wait_group_events( 1, &event );\n"
|
||||||
"}\n" ;
|
"}\n" ;
|
||||||
|
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
|
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
|
||||||
{
|
{
|
||||||
@@ -71,8 +73,7 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
clKernelWrapper kernel;
|
clKernelWrapper kernel;
|
||||||
clMemWrapper streams[ 2 ];
|
clMemWrapper streams[ 2 ];
|
||||||
size_t threads[ 1 ], localThreads[ 1 ];
|
size_t threads[ 1 ], localThreads[ 1 ];
|
||||||
void *inBuffer, *outBuffer;
|
MTdataHolder d(gRandomSeed);
|
||||||
MTdata d;
|
|
||||||
char vecNameString[64]; vecNameString[0] = 0;
|
char vecNameString[64]; vecNameString[0] = 0;
|
||||||
|
|
||||||
if (vecSize == 1)
|
if (vecSize == 1)
|
||||||
@@ -94,10 +95,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
char programSource[4096]; programSource[0]=0;
|
char programSource[4096]; programSource[0]=0;
|
||||||
char *programPtr;
|
char *programPtr;
|
||||||
|
|
||||||
sprintf(programSource, kernelCode,
|
std::string extStr = "";
|
||||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
if (vecType == kDouble)
|
||||||
"",
|
extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
|
||||||
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
|
else if (vecType == kHalf)
|
||||||
|
extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
|
||||||
|
|
||||||
|
sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
|
||||||
|
vecNameString, vecNameString, vecNameString,
|
||||||
|
get_explicit_type_name(vecType), vecNameString, vecNameString);
|
||||||
//log_info("program: %s\n", programSource);
|
//log_info("program: %s\n", programSource);
|
||||||
programPtr = programSource;
|
programPtr = programSource;
|
||||||
|
|
||||||
@@ -151,9 +157,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
|
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
|
||||||
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
|
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
|
||||||
|
|
||||||
inBuffer = (void*)malloc(globalBufferSize);
|
std::vector<unsigned char> inBuffer(globalBufferSize);
|
||||||
outBuffer = (void*)malloc(globalBufferSize);
|
std::vector<unsigned char> outBuffer(globalBufferSize);
|
||||||
memset(outBuffer, 0, globalBufferSize);
|
memset(outBuffer.data(), 0, globalBufferSize);
|
||||||
|
|
||||||
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
|
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
|
||||||
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
|
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
|
||||||
@@ -165,13 +171,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
threads[0] = globalWorkgroupSize;
|
threads[0] = globalWorkgroupSize;
|
||||||
localThreads[0] = localWorkgroupSize;
|
localThreads[0] = localWorkgroupSize;
|
||||||
|
|
||||||
d = init_genrand( gRandomSeed );
|
generate_random_data(vecType,
|
||||||
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
|
globalBufferSize / get_explicit_type_size(vecType), d,
|
||||||
free_mtdata(d); d = NULL;
|
inBuffer.data());
|
||||||
|
|
||||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
|
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
|
||||||
|
inBuffer.data(), &error);
|
||||||
test_error( error, "Unable to create input buffer" );
|
test_error( error, "Unable to create input buffer" );
|
||||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
|
streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
|
||||||
|
outBuffer.data(), &error);
|
||||||
test_error( error, "Unable to create output buffer" );
|
test_error( error, "Unable to create output buffer" );
|
||||||
|
|
||||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||||
@@ -192,17 +200,20 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
test_error( error, "Unable to queue kernel" );
|
test_error( error, "Unable to queue kernel" );
|
||||||
|
|
||||||
// Read
|
// Read
|
||||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
|
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
|
||||||
|
outBuffer.data(), 0, NULL, NULL);
|
||||||
test_error( error, "Unable to read results" );
|
test_error( error, "Unable to read results" );
|
||||||
|
|
||||||
// Verify
|
// Verify
|
||||||
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
|
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
|
||||||
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
|
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
|
||||||
{
|
{
|
||||||
if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 )
|
if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0)
|
||||||
{
|
{
|
||||||
unsigned char * inchar = (unsigned char*)inBuffer + i;
|
unsigned char *inchar =
|
||||||
unsigned char * outchar = (unsigned char*)outBuffer + i;
|
static_cast<unsigned char *>(inBuffer.data());
|
||||||
|
unsigned char *outchar =
|
||||||
|
static_cast<unsigned char *>(outBuffer.data());
|
||||||
char values[4096];
|
char values[4096];
|
||||||
values[0] = 0;
|
values[0] = 0;
|
||||||
|
|
||||||
@@ -215,34 +226,35 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
sprintf(values + strlen( values), "%2x ", outchar[j]);
|
sprintf(values + strlen( values), "%2x ", outchar[j]);
|
||||||
sprintf(values + strlen(values), "]");
|
sprintf(values + strlen(values), "]");
|
||||||
log_error("%s\n", values);
|
log_error("%s\n", values);
|
||||||
free(inBuffer);
|
|
||||||
free(outBuffer);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
free(inBuffer);
|
|
||||||
free(outBuffer);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
|
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
|
||||||
{
|
{
|
||||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
|
||||||
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
kInt, kUInt, kLong, kULong,
|
||||||
unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
|
kFloat, kHalf, kDouble };
|
||||||
|
const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
||||||
|
const unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
|
||||||
unsigned int size, typeIndex, stride;
|
unsigned int size, typeIndex, stride;
|
||||||
|
|
||||||
int errors = 0;
|
int errors = 0;
|
||||||
|
|
||||||
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
|
||||||
{
|
bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
|
||||||
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
|
for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
|
||||||
|
{
|
||||||
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
|
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
|
||||||
continue;
|
continue;
|
||||||
|
else if (vecType[typeIndex] == kDouble && !fp64Support)
|
||||||
|
continue;
|
||||||
|
else if (vecType[typeIndex] == kHalf && !fp16Support)
|
||||||
|
continue;
|
||||||
|
|
||||||
for( size = 0; vecSizes[ size ] != 0; size++ )
|
for( size = 0; vecSizes[ size ] != 0; size++ )
|
||||||
{
|
{
|
||||||
@@ -260,9 +272,6 @@ int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_co
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||||
{
|
{
|
||||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
|
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
|
||||||
|
|||||||
Reference in New Issue
Block a user