mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Fix unused-function warnings and enable -Wunused-function (#1576)
Move functions in .h files to .cpp files where appropriate; align prototypes and definitions; and remove functions that are not used. Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com> Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
This commit is contained in:
committed by
GitHub
parent
58eb3d776d
commit
3cadff7115
@@ -90,6 +90,7 @@ endmacro(add_cxx_flag_if_supported)
|
|||||||
|
|
||||||
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
|
||||||
add_cxx_flag_if_supported(-Wmisleading-indentation)
|
add_cxx_flag_if_supported(-Wmisleading-indentation)
|
||||||
|
add_cxx_flag_if_supported(-Wunused-function)
|
||||||
add_cxx_flag_if_supported(-Wunused-variable)
|
add_cxx_flag_if_supported(-Wunused-variable)
|
||||||
add_cxx_flag_if_supported(-Wno-narrowing)
|
add_cxx_flag_if_supported(-Wno-narrowing)
|
||||||
add_cxx_flag_if_supported(-Wno-format)
|
add_cxx_flag_if_supported(-Wno-format)
|
||||||
|
|||||||
@@ -94,18 +94,6 @@ struct structArg
|
|||||||
float f;
|
float f;
|
||||||
};
|
};
|
||||||
|
|
||||||
static unsigned char *
|
|
||||||
generate_8888_image(int w, int h, MTdata d)
|
|
||||||
{
|
|
||||||
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i=0; i<w*h*4; i++)
|
|
||||||
ptr[i] = (unsigned char)genrand_int32( d);
|
|
||||||
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
|
int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
|||||||
@@ -409,8 +409,9 @@ static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
|
|||||||
|
|
||||||
static void l_set_randomly(cl_uchar* buf, size_t buf_size,
|
static void l_set_randomly(cl_uchar* buf, size_t buf_size,
|
||||||
RandomSeed& rand_state);
|
RandomSeed& rand_state);
|
||||||
static int l_compare(const cl_uchar* expected, const cl_uchar* received,
|
static int l_compare(const char* test_name, const cl_uchar* expected,
|
||||||
unsigned num_values, const TypeInfo& ti);
|
const cl_uchar* received, size_t num_values,
|
||||||
|
const TypeInfo& ti);
|
||||||
static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
|
static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
|
||||||
unsigned src_idx, const TypeInfo& ti);
|
unsigned src_idx, const TypeInfo& ti);
|
||||||
|
|
||||||
@@ -436,8 +437,7 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
|
|||||||
static int l_capacity(cl_device_id device, cl_context context,
|
static int l_capacity(cl_device_id device, cl_context context,
|
||||||
cl_command_queue queue, size_t max_size);
|
cl_command_queue queue, size_t max_size);
|
||||||
static int l_user_type(cl_device_id device, cl_context context,
|
static int l_user_type(cl_device_id device, cl_context context,
|
||||||
cl_command_queue queue, size_t max_size,
|
cl_command_queue queue, bool separate_compile);
|
||||||
bool separate_compilation);
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////
|
////////////////////
|
||||||
|
|||||||
@@ -124,10 +124,6 @@ static int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, size_t blockC
|
|||||||
void *FlushToZero( void );
|
void *FlushToZero( void );
|
||||||
void UnFlushToZero( void *);
|
void UnFlushToZero( void *);
|
||||||
|
|
||||||
static cl_program CreateImplicitConvertProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, char testName[256], cl_int *error );
|
|
||||||
static cl_program CreateStandardProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, char testName[256], cl_int *error );
|
|
||||||
|
|
||||||
|
|
||||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||||
// (that's x87 default state). This causes problems with the tests that
|
// (that's x87 default state). This causes problems with the tests that
|
||||||
// convert long and ulong to float and double or otherwise deal with values
|
// convert long and ulong to float and double or otherwise deal with values
|
||||||
|
|||||||
@@ -281,59 +281,6 @@ template <class T> int determine_validation_error( void *imagePtr, image_descrip
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
|
|
||||||
{
|
|
||||||
size_t i = 0;
|
|
||||||
if( gDisableOffsets )
|
|
||||||
{
|
|
||||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
|
||||||
{
|
|
||||||
for( size_t x = 0; x < imageInfo->width; x++, i++ )
|
|
||||||
{
|
|
||||||
xOffsets[ i ] = (float) (xfract + (double) x);
|
|
||||||
yOffsets[ i ] = (float) (yfract + (double) y);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
|
||||||
{
|
|
||||||
for( size_t x = 0; x < imageInfo->width; x++, i++ )
|
|
||||||
{
|
|
||||||
xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
|
|
||||||
yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
|
|
||||||
{
|
|
||||||
i = 0;
|
|
||||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
|
||||||
{
|
|
||||||
for( size_t x = 0; x < imageInfo->width; x++, i++ )
|
|
||||||
{
|
|
||||||
xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
|
|
||||||
yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( normalized_coords )
|
|
||||||
{
|
|
||||||
i = 0;
|
|
||||||
for( size_t y = 0; y < imageInfo->height; y++ )
|
|
||||||
{
|
|
||||||
for( size_t x = 0; x < imageInfo->width; x++, i++ )
|
|
||||||
{
|
|
||||||
xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
|
|
||||||
yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
|
static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
|
||||||
{
|
{
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
|||||||
@@ -124,23 +124,6 @@ void dealloc(T *p)
|
|||||||
if (p) delete p;
|
if (p) delete p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_dir_exits(const char* path)
|
|
||||||
{
|
|
||||||
assert(path && "NULL directory");
|
|
||||||
#if defined(_WIN32)
|
|
||||||
DWORD ftyp = GetFileAttributesA(path);
|
|
||||||
if (ftyp != INVALID_FILE_ATTRIBUTES && (ftyp & FILE_ATTRIBUTE_DIRECTORY))
|
|
||||||
return true;
|
|
||||||
#else // Linux assumed here.
|
|
||||||
if (DIR *pDir = opendir(path))
|
|
||||||
{
|
|
||||||
closedir(pDir);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_spir_version(cl_device_id device,
|
static void get_spir_version(cl_device_id device,
|
||||||
std::vector<Version> &versions)
|
std::vector<Version> &versions)
|
||||||
{
|
{
|
||||||
@@ -205,21 +188,6 @@ static void printError(const std::string& S){
|
|||||||
std::cerr << S << std::endl;
|
std::cerr << S << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool extractKernelAttribute(std::string& kernel_attributes,
|
|
||||||
const std::string& attribute, std::vector<std::string>& attribute_vector) {
|
|
||||||
size_t start = kernel_attributes.find(attribute + "(");
|
|
||||||
if (start == 0) {
|
|
||||||
size_t end = kernel_attributes.find(")", start);
|
|
||||||
if (end != std::string::npos) {
|
|
||||||
size_t length = end-start+1;
|
|
||||||
attribute_vector.push_back(kernel_attributes.substr(start, length));
|
|
||||||
kernel_attributes.erase(start, length);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extracts suite with the given name, and saves it to disk.
|
// Extracts suite with the given name, and saves it to disk.
|
||||||
static void extract_suite(const char *suiteName)
|
static void extract_suite(const char *suiteName)
|
||||||
{
|
{
|
||||||
@@ -6454,7 +6422,8 @@ std::vector<std::string> &split(const std::string &s, char delim, std::vector<st
|
|||||||
return elems;
|
return elems;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Temporarily disabled, see GitHub #1284
|
||||||
|
#if 0
|
||||||
static bool
|
static bool
|
||||||
test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
|
test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
|
||||||
{
|
{
|
||||||
@@ -6539,6 +6508,7 @@ test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
|
|||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
return success;
|
return success;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static bool test_binary_type(cl_device_id device, cl_uint width, const char *folder)
|
static bool test_binary_type(cl_device_id device, cl_uint width, const char *folder)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -21,39 +21,6 @@
|
|||||||
#include "subhelpers.h"
|
#include "subhelpers.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <random>
|
|
||||||
|
|
||||||
static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
|
|
||||||
const std::string &mask_type,
|
|
||||||
cl_uint max_sub_group_size)
|
|
||||||
{
|
|
||||||
bs128 mask128;
|
|
||||||
cl_uint4 mask;
|
|
||||||
cl_uint pos = subgroup_local_id;
|
|
||||||
if (mask_type == "eq") mask128.set(pos);
|
|
||||||
if (mask_type == "le" || mask_type == "lt")
|
|
||||||
{
|
|
||||||
for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
|
|
||||||
if (mask_type == "lt") mask128.reset(pos);
|
|
||||||
}
|
|
||||||
if (mask_type == "ge" || mask_type == "gt")
|
|
||||||
{
|
|
||||||
for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
|
|
||||||
if (mask_type == "gt") mask128.reset(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// convert std::bitset<128> to uint4
|
|
||||||
auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
|
|
||||||
mask.s0 = (mask128 & uint_mask).to_ulong();
|
|
||||||
mask128 >>= 32;
|
|
||||||
mask.s1 = (mask128 & uint_mask).to_ulong();
|
|
||||||
mask128 >>= 32;
|
|
||||||
mask.s2 = (mask128 & uint_mask).to_ulong();
|
|
||||||
mask128 >>= 32;
|
|
||||||
mask.s3 = (mask128 & uint_mask).to_ulong();
|
|
||||||
|
|
||||||
return mask;
|
|
||||||
}
|
|
||||||
|
|
||||||
// DESCRIPTION :
|
// DESCRIPTION :
|
||||||
// sub_group_broadcast - each work_item registers it's own value.
|
// sub_group_broadcast - each work_item registers it's own value.
|
||||||
@@ -393,33 +360,6 @@ template <typename Ty> bool is_floating_point()
|
|||||||
|| std::is_same<Ty, subgroups::cl_half>::value;
|
|| std::is_same<Ty, subgroups::cl_half>::value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// limit possible input values to avoid arithmetic rounding/overflow issues.
|
|
||||||
// for each subgroup values defined different values
|
|
||||||
// for rest of workitems set 1
|
|
||||||
// shuffle values
|
|
||||||
static void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
|
|
||||||
int sb_size)
|
|
||||||
{
|
|
||||||
// max product is 720, cl_half has enough precision for it
|
|
||||||
const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
|
|
||||||
|
|
||||||
if (sb_size <= non_one_values.size())
|
|
||||||
{
|
|
||||||
safe_values.assign(non_one_values.begin(),
|
|
||||||
non_one_values.begin() + sb_size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
safe_values.assign(sb_size, 1);
|
|
||||||
std::copy(non_one_values.begin(), non_one_values.end(),
|
|
||||||
safe_values.begin());
|
|
||||||
}
|
|
||||||
|
|
||||||
std::mt19937 mersenne_twister_engine(10000);
|
|
||||||
std::shuffle(safe_values.begin(), safe_values.end(),
|
|
||||||
mersenne_twister_engine);
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Ty, ArithmeticOp operation>
|
template <typename Ty, ArithmeticOp operation>
|
||||||
void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -16,6 +16,8 @@
|
|||||||
|
|
||||||
#include "subhelpers.h"
|
#include "subhelpers.h"
|
||||||
|
|
||||||
|
#include <random>
|
||||||
|
|
||||||
// Define operator<< for cl_ types, accessing the .s member.
|
// Define operator<< for cl_ types, accessing the .s member.
|
||||||
#define OP_OSTREAM(Ty, VecSize) \
|
#define OP_OSTREAM(Ty, VecSize) \
|
||||||
std::ostream& operator<<(std::ostream& os, const Ty##VecSize& val) \
|
std::ostream& operator<<(std::ostream& os, const Ty##VecSize& val) \
|
||||||
@@ -60,3 +62,168 @@ OP_OSTREAM_SUBGROUP(subgroups::cl_half, 2)
|
|||||||
OP_OSTREAM_SUBGROUP(subgroups::cl_half, 4)
|
OP_OSTREAM_SUBGROUP(subgroups::cl_half, 4)
|
||||||
OP_OSTREAM_SUBGROUP(subgroups::cl_half, 8)
|
OP_OSTREAM_SUBGROUP(subgroups::cl_half, 8)
|
||||||
OP_OSTREAM_SUBGROUP(subgroups::cl_half, 16)
|
OP_OSTREAM_SUBGROUP(subgroups::cl_half, 16)
|
||||||
|
|
||||||
|
bs128 cl_uint4_to_bs128(cl_uint4 v)
|
||||||
|
{
|
||||||
|
return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
|
||||||
|
| (bs128(v.s3) << 96);
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_uint4 bs128_to_cl_uint4(bs128 v)
|
||||||
|
{
|
||||||
|
bs128 bs128_ffffffff = 0xffffffffU;
|
||||||
|
|
||||||
|
cl_uint4 r;
|
||||||
|
r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong();
|
||||||
|
r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong();
|
||||||
|
r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong();
|
||||||
|
r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong();
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
|
||||||
|
const std::string &mask_type,
|
||||||
|
cl_uint max_sub_group_size)
|
||||||
|
{
|
||||||
|
bs128 mask128;
|
||||||
|
cl_uint4 mask;
|
||||||
|
cl_uint pos = subgroup_local_id;
|
||||||
|
if (mask_type == "eq") mask128.set(pos);
|
||||||
|
if (mask_type == "le" || mask_type == "lt")
|
||||||
|
{
|
||||||
|
for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
|
||||||
|
if (mask_type == "lt") mask128.reset(pos);
|
||||||
|
}
|
||||||
|
if (mask_type == "ge" || mask_type == "gt")
|
||||||
|
{
|
||||||
|
for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
|
||||||
|
if (mask_type == "gt") mask128.reset(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// convert std::bitset<128> to uint4
|
||||||
|
auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
|
||||||
|
mask.s0 = (mask128 & uint_mask).to_ulong();
|
||||||
|
mask128 >>= 32;
|
||||||
|
mask.s1 = (mask128 & uint_mask).to_ulong();
|
||||||
|
mask128 >>= 32;
|
||||||
|
mask.s2 = (mask128 & uint_mask).to_ulong();
|
||||||
|
mask128 >>= 32;
|
||||||
|
mask.s3 = (mask128 & uint_mask).to_ulong();
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const operation_names(ArithmeticOp operation)
|
||||||
|
{
|
||||||
|
switch (operation)
|
||||||
|
{
|
||||||
|
case ArithmeticOp::add_: return "add";
|
||||||
|
case ArithmeticOp::max_: return "max";
|
||||||
|
case ArithmeticOp::min_: return "min";
|
||||||
|
case ArithmeticOp::mul_: return "mul";
|
||||||
|
case ArithmeticOp::and_: return "and";
|
||||||
|
case ArithmeticOp::or_: return "or";
|
||||||
|
case ArithmeticOp::xor_: return "xor";
|
||||||
|
case ArithmeticOp::logical_and: return "logical_and";
|
||||||
|
case ArithmeticOp::logical_or: return "logical_or";
|
||||||
|
case ArithmeticOp::logical_xor: return "logical_xor";
|
||||||
|
default: log_error("Unknown operation request\n"); break;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const operation_names(BallotOp operation)
|
||||||
|
{
|
||||||
|
switch (operation)
|
||||||
|
{
|
||||||
|
case BallotOp::ballot: return "ballot";
|
||||||
|
case BallotOp::inverse_ballot: return "inverse_ballot";
|
||||||
|
case BallotOp::ballot_bit_extract: return "bit_extract";
|
||||||
|
case BallotOp::ballot_bit_count: return "bit_count";
|
||||||
|
case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
|
||||||
|
case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
|
||||||
|
case BallotOp::ballot_find_lsb: return "find_lsb";
|
||||||
|
case BallotOp::ballot_find_msb: return "find_msb";
|
||||||
|
case BallotOp::eq_mask: return "eq";
|
||||||
|
case BallotOp::ge_mask: return "ge";
|
||||||
|
case BallotOp::gt_mask: return "gt";
|
||||||
|
case BallotOp::le_mask: return "le";
|
||||||
|
case BallotOp::lt_mask: return "lt";
|
||||||
|
default: log_error("Unknown operation request\n"); break;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const operation_names(ShuffleOp operation)
|
||||||
|
{
|
||||||
|
switch (operation)
|
||||||
|
{
|
||||||
|
case ShuffleOp::shuffle: return "shuffle";
|
||||||
|
case ShuffleOp::shuffle_up: return "shuffle_up";
|
||||||
|
case ShuffleOp::shuffle_down: return "shuffle_down";
|
||||||
|
case ShuffleOp::shuffle_xor: return "shuffle_xor";
|
||||||
|
case ShuffleOp::rotate: return "rotate";
|
||||||
|
case ShuffleOp::clustered_rotate: return "clustered_rotate";
|
||||||
|
default: log_error("Unknown operation request\n"); break;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const operation_names(NonUniformVoteOp operation)
|
||||||
|
{
|
||||||
|
switch (operation)
|
||||||
|
{
|
||||||
|
case NonUniformVoteOp::all: return "all";
|
||||||
|
case NonUniformVoteOp::all_equal: return "all_equal";
|
||||||
|
case NonUniformVoteOp::any: return "any";
|
||||||
|
case NonUniformVoteOp::elect: return "elect";
|
||||||
|
default: log_error("Unknown operation request\n"); break;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *const operation_names(SubgroupsBroadcastOp operation)
|
||||||
|
{
|
||||||
|
switch (operation)
|
||||||
|
{
|
||||||
|
case SubgroupsBroadcastOp::broadcast: return "broadcast";
|
||||||
|
case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
|
||||||
|
case SubgroupsBroadcastOp::non_uniform_broadcast:
|
||||||
|
return "non_uniform_broadcast";
|
||||||
|
default: log_error("Unknown operation request\n"); break;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups,
|
||||||
|
int subgroup_size, int &workgroup_size,
|
||||||
|
int &last_subgroup_size)
|
||||||
|
{
|
||||||
|
number_of_subgroups = 1 + non_uniform_size / subgroup_size;
|
||||||
|
last_subgroup_size = non_uniform_size % subgroup_size;
|
||||||
|
workgroup_size = non_uniform_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
|
||||||
|
int sb_size)
|
||||||
|
{
|
||||||
|
// max product is 720, cl_half has enough precision for it
|
||||||
|
const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
|
||||||
|
|
||||||
|
if (sb_size <= non_one_values.size())
|
||||||
|
{
|
||||||
|
safe_values.assign(non_one_values.begin(),
|
||||||
|
non_one_values.begin() + sb_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
safe_values.assign(sb_size, 1);
|
||||||
|
std::copy(non_one_values.begin(), non_one_values.end(),
|
||||||
|
safe_values.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
std::mt19937 mersenne_twister_engine(10000);
|
||||||
|
std::shuffle(safe_values.begin(), safe_values.end(),
|
||||||
|
mersenne_twister_engine);
|
||||||
|
}
|
||||||
|
|||||||
@@ -34,24 +34,17 @@ extern MTdata gMTdata;
|
|||||||
typedef std::bitset<128> bs128;
|
typedef std::bitset<128> bs128;
|
||||||
extern cl_half_rounding_mode g_rounding_mode;
|
extern cl_half_rounding_mode g_rounding_mode;
|
||||||
|
|
||||||
static bs128 cl_uint4_to_bs128(cl_uint4 v)
|
bs128 cl_uint4_to_bs128(cl_uint4 v);
|
||||||
{
|
cl_uint4 bs128_to_cl_uint4(bs128 v);
|
||||||
return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
|
cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
|
||||||
| (bs128(v.s3) << 96);
|
const std::string &mask_type,
|
||||||
}
|
cl_uint max_sub_group_size);
|
||||||
|
|
||||||
static cl_uint4 bs128_to_cl_uint4(bs128 v)
|
// limit possible input values to avoid arithmetic rounding/overflow issues.
|
||||||
{
|
// for each subgroup values defined different values
|
||||||
bs128 bs128_ffffffff = 0xffffffffU;
|
// for rest of workitems set 1 shuffle values
|
||||||
|
void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
|
||||||
cl_uint4 r;
|
int sb_size);
|
||||||
r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong();
|
|
||||||
r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong();
|
|
||||||
r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong();
|
|
||||||
r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong();
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct WorkGroupParams
|
struct WorkGroupParams
|
||||||
{
|
{
|
||||||
@@ -270,87 +263,11 @@ enum class ArithmeticOp
|
|||||||
logical_xor
|
logical_xor
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char *const operation_names(ArithmeticOp operation)
|
const char *const operation_names(ArithmeticOp operation);
|
||||||
{
|
const char *const operation_names(BallotOp operation);
|
||||||
switch (operation)
|
const char *const operation_names(ShuffleOp operation);
|
||||||
{
|
const char *const operation_names(NonUniformVoteOp operation);
|
||||||
case ArithmeticOp::add_: return "add";
|
const char *const operation_names(SubgroupsBroadcastOp operation);
|
||||||
case ArithmeticOp::max_: return "max";
|
|
||||||
case ArithmeticOp::min_: return "min";
|
|
||||||
case ArithmeticOp::mul_: return "mul";
|
|
||||||
case ArithmeticOp::and_: return "and";
|
|
||||||
case ArithmeticOp::or_: return "or";
|
|
||||||
case ArithmeticOp::xor_: return "xor";
|
|
||||||
case ArithmeticOp::logical_and: return "logical_and";
|
|
||||||
case ArithmeticOp::logical_or: return "logical_or";
|
|
||||||
case ArithmeticOp::logical_xor: return "logical_xor";
|
|
||||||
default: log_error("Unknown operation request\n"); break;
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char *const operation_names(BallotOp operation)
|
|
||||||
{
|
|
||||||
switch (operation)
|
|
||||||
{
|
|
||||||
case BallotOp::ballot: return "ballot";
|
|
||||||
case BallotOp::inverse_ballot: return "inverse_ballot";
|
|
||||||
case BallotOp::ballot_bit_extract: return "bit_extract";
|
|
||||||
case BallotOp::ballot_bit_count: return "bit_count";
|
|
||||||
case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
|
|
||||||
case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
|
|
||||||
case BallotOp::ballot_find_lsb: return "find_lsb";
|
|
||||||
case BallotOp::ballot_find_msb: return "find_msb";
|
|
||||||
case BallotOp::eq_mask: return "eq";
|
|
||||||
case BallotOp::ge_mask: return "ge";
|
|
||||||
case BallotOp::gt_mask: return "gt";
|
|
||||||
case BallotOp::le_mask: return "le";
|
|
||||||
case BallotOp::lt_mask: return "lt";
|
|
||||||
default: log_error("Unknown operation request\n"); break;
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char *const operation_names(ShuffleOp operation)
|
|
||||||
{
|
|
||||||
switch (operation)
|
|
||||||
{
|
|
||||||
case ShuffleOp::shuffle: return "shuffle";
|
|
||||||
case ShuffleOp::shuffle_up: return "shuffle_up";
|
|
||||||
case ShuffleOp::shuffle_down: return "shuffle_down";
|
|
||||||
case ShuffleOp::shuffle_xor: return "shuffle_xor";
|
|
||||||
case ShuffleOp::rotate: return "rotate";
|
|
||||||
case ShuffleOp::clustered_rotate: return "clustered_rotate";
|
|
||||||
default: log_error("Unknown operation request\n"); break;
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char *const operation_names(NonUniformVoteOp operation)
|
|
||||||
{
|
|
||||||
switch (operation)
|
|
||||||
{
|
|
||||||
case NonUniformVoteOp::all: return "all";
|
|
||||||
case NonUniformVoteOp::all_equal: return "all_equal";
|
|
||||||
case NonUniformVoteOp::any: return "any";
|
|
||||||
case NonUniformVoteOp::elect: return "elect";
|
|
||||||
default: log_error("Unknown operation request\n"); break;
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char *const operation_names(SubgroupsBroadcastOp operation)
|
|
||||||
{
|
|
||||||
switch (operation)
|
|
||||||
{
|
|
||||||
case SubgroupsBroadcastOp::broadcast: return "broadcast";
|
|
||||||
case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
|
|
||||||
case SubgroupsBroadcastOp::non_uniform_broadcast:
|
|
||||||
return "non_uniform_broadcast";
|
|
||||||
default: log_error("Unknown operation request\n"); break;
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
class subgroupsAPI {
|
class subgroupsAPI {
|
||||||
public:
|
public:
|
||||||
@@ -1732,15 +1649,9 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void set_last_workgroup_params(int non_uniform_size,
|
void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups,
|
||||||
int &number_of_subgroups,
|
int subgroup_size, int &workgroup_size,
|
||||||
int subgroup_size, int &workgroup_size,
|
int &last_subgroup_size);
|
||||||
int &last_subgroup_size)
|
|
||||||
{
|
|
||||||
number_of_subgroups = 1 + non_uniform_size / subgroup_size;
|
|
||||||
last_subgroup_size = non_uniform_size % subgroup_size;
|
|
||||||
workgroup_size = non_uniform_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Ty>
|
template <typename Ty>
|
||||||
static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
|
static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
|
||||||
|
|||||||
Reference in New Issue
Block a user