mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
add tests for cl_khr_integer_dot_product (#1276)
* cl_khr_integer_dot_product_tests * remove emulated codepaths * fix formatting * address code review comments * remove emulated codepaths again * address one more review comment
This commit is contained in:
91
test_common/harness/integer_ops_test_info.h
Normal file
91
test_common/harness/integer_ops_test_info.h
Normal file
@@ -0,0 +1,91 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
#ifndef INTEGER_OPS_TEST_INFO_H
|
||||
#define INTEGER_OPS_TEST_INFO_H
|
||||
|
||||
#include "conversions.h"
|
||||
|
||||
// TODO: expand usage to other tests.
|
||||
|
||||
template <typename T> struct TestInfo
|
||||
{
|
||||
};
|
||||
template <> struct TestInfo<cl_char>
|
||||
{
|
||||
static const ExplicitType explicitType = kChar;
|
||||
static constexpr const char* deviceTypeName = "char";
|
||||
static constexpr const char* deviceTypeNameSigned = "char";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "uchar";
|
||||
};
|
||||
template <> struct TestInfo<cl_uchar>
|
||||
{
|
||||
static const ExplicitType explicitType = kUChar;
|
||||
static constexpr const char* deviceTypeName = "uchar";
|
||||
static constexpr const char* deviceTypeNameSigned = "char";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "uchar";
|
||||
};
|
||||
template <> struct TestInfo<cl_short>
|
||||
{
|
||||
static const ExplicitType explicitType = kShort;
|
||||
static constexpr const char* deviceTypeName = "short";
|
||||
static constexpr const char* deviceTypeNameSigned = "short";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "ushort";
|
||||
};
|
||||
template <> struct TestInfo<cl_ushort>
|
||||
{
|
||||
static const ExplicitType explicitType = kUShort;
|
||||
static constexpr const char* deviceTypeName = "ushort";
|
||||
static constexpr const char* deviceTypeNameSigned = "short";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "ushort";
|
||||
};
|
||||
template <> struct TestInfo<cl_int>
|
||||
{
|
||||
static const ExplicitType explicitType = kInt;
|
||||
static constexpr const char* deviceTypeName = "int";
|
||||
static constexpr const char* deviceTypeNameSigned = "int";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "uint";
|
||||
};
|
||||
template <> struct TestInfo<cl_uint>
|
||||
{
|
||||
static const ExplicitType explicitType = kUInt;
|
||||
static constexpr const char* deviceTypeName = "uint";
|
||||
static constexpr const char* deviceTypeNameSigned = "int";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "uint";
|
||||
};
|
||||
template <> struct TestInfo<cl_long>
|
||||
{
|
||||
static const ExplicitType explicitType = kLong;
|
||||
static constexpr const char* deviceTypeName = "long";
|
||||
static constexpr const char* deviceTypeNameSigned = "long";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "ulong";
|
||||
};
|
||||
template <> struct TestInfo<cl_ulong>
|
||||
{
|
||||
static const ExplicitType explicitType = kULong;
|
||||
static constexpr const char* deviceTypeName = "ulong";
|
||||
static constexpr const char* deviceTypeNameSigned = "long";
|
||||
static constexpr const char* deviceTypeNameUnsigned = "ulong";
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
static void fill_vector_with_random_data(std::vector<T>& v)
|
||||
{
|
||||
MTdataHolder d(gRandomSeed);
|
||||
generate_random_data(TestInfo<T>::explicitType, v.size(), d, v.data());
|
||||
}
|
||||
|
||||
#endif /* INTEGER_OPS_TEST_INFO_H */
|
||||
@@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES
|
||||
test_unary_ops.cpp
|
||||
verification_and_generation_functions.cpp
|
||||
test_popcount.cpp
|
||||
test_integer_dot_product.cpp
|
||||
)
|
||||
|
||||
include(../CMakeCommon.txt)
|
||||
|
||||
@@ -25,127 +25,129 @@
|
||||
#endif
|
||||
|
||||
test_definition test_list[] = {
|
||||
ADD_TEST( integer_clz ),
|
||||
ADD_TEST_VERSION( integer_ctz, Version(2, 0)),
|
||||
ADD_TEST( integer_hadd ),
|
||||
ADD_TEST( integer_rhadd ),
|
||||
ADD_TEST( integer_mul_hi ),
|
||||
ADD_TEST( integer_rotate ),
|
||||
ADD_TEST( integer_clamp ),
|
||||
ADD_TEST( integer_mad_sat ),
|
||||
ADD_TEST( integer_mad_hi ),
|
||||
ADD_TEST( integer_min ),
|
||||
ADD_TEST( integer_max ),
|
||||
ADD_TEST( integer_upsample ),
|
||||
ADD_TEST(integer_clz),
|
||||
ADD_TEST_VERSION(integer_ctz, Version(2, 0)),
|
||||
ADD_TEST(integer_hadd),
|
||||
ADD_TEST(integer_rhadd),
|
||||
ADD_TEST(integer_mul_hi),
|
||||
ADD_TEST(integer_rotate),
|
||||
ADD_TEST(integer_clamp),
|
||||
ADD_TEST(integer_mad_sat),
|
||||
ADD_TEST(integer_mad_hi),
|
||||
ADD_TEST(integer_min),
|
||||
ADD_TEST(integer_max),
|
||||
ADD_TEST(integer_upsample),
|
||||
|
||||
ADD_TEST( integer_abs ),
|
||||
ADD_TEST( integer_abs_diff ),
|
||||
ADD_TEST( integer_add_sat ),
|
||||
ADD_TEST( integer_sub_sat ),
|
||||
ADD_TEST(integer_abs),
|
||||
ADD_TEST(integer_abs_diff),
|
||||
ADD_TEST(integer_add_sat),
|
||||
ADD_TEST(integer_sub_sat),
|
||||
|
||||
ADD_TEST( integer_addAssign ),
|
||||
ADD_TEST( integer_subtractAssign ),
|
||||
ADD_TEST( integer_multiplyAssign ),
|
||||
ADD_TEST( integer_divideAssign ),
|
||||
ADD_TEST( integer_moduloAssign ),
|
||||
ADD_TEST( integer_andAssign ),
|
||||
ADD_TEST( integer_orAssign ),
|
||||
ADD_TEST( integer_exclusiveOrAssign ),
|
||||
ADD_TEST(integer_addAssign),
|
||||
ADD_TEST(integer_subtractAssign),
|
||||
ADD_TEST(integer_multiplyAssign),
|
||||
ADD_TEST(integer_divideAssign),
|
||||
ADD_TEST(integer_moduloAssign),
|
||||
ADD_TEST(integer_andAssign),
|
||||
ADD_TEST(integer_orAssign),
|
||||
ADD_TEST(integer_exclusiveOrAssign),
|
||||
|
||||
ADD_TEST( unary_ops_increment ),
|
||||
ADD_TEST( unary_ops_decrement ),
|
||||
ADD_TEST( unary_ops_full ),
|
||||
ADD_TEST(unary_ops_increment),
|
||||
ADD_TEST(unary_ops_decrement),
|
||||
ADD_TEST(unary_ops_full),
|
||||
|
||||
ADD_TEST( integer_mul24 ),
|
||||
ADD_TEST( integer_mad24 ),
|
||||
ADD_TEST(integer_mul24),
|
||||
ADD_TEST(integer_mad24),
|
||||
|
||||
ADD_TEST( long_math ),
|
||||
ADD_TEST( long_logic ),
|
||||
ADD_TEST( long_shift ),
|
||||
ADD_TEST( long_compare ),
|
||||
ADD_TEST(long_math),
|
||||
ADD_TEST(long_logic),
|
||||
ADD_TEST(long_shift),
|
||||
ADD_TEST(long_compare),
|
||||
|
||||
ADD_TEST( ulong_math ),
|
||||
ADD_TEST( ulong_logic ),
|
||||
ADD_TEST( ulong_shift ),
|
||||
ADD_TEST( ulong_compare ),
|
||||
ADD_TEST(ulong_math),
|
||||
ADD_TEST(ulong_logic),
|
||||
ADD_TEST(ulong_shift),
|
||||
ADD_TEST(ulong_compare),
|
||||
|
||||
ADD_TEST( int_math ),
|
||||
ADD_TEST( int_logic ),
|
||||
ADD_TEST( int_shift ),
|
||||
ADD_TEST( int_compare ),
|
||||
ADD_TEST(int_math),
|
||||
ADD_TEST(int_logic),
|
||||
ADD_TEST(int_shift),
|
||||
ADD_TEST(int_compare),
|
||||
|
||||
ADD_TEST( uint_math ),
|
||||
ADD_TEST( uint_logic ),
|
||||
ADD_TEST( uint_shift ),
|
||||
ADD_TEST( uint_compare ),
|
||||
ADD_TEST(uint_math),
|
||||
ADD_TEST(uint_logic),
|
||||
ADD_TEST(uint_shift),
|
||||
ADD_TEST(uint_compare),
|
||||
|
||||
ADD_TEST( short_math ),
|
||||
ADD_TEST( short_logic ),
|
||||
ADD_TEST( short_shift ),
|
||||
ADD_TEST( short_compare ),
|
||||
ADD_TEST(short_math),
|
||||
ADD_TEST(short_logic),
|
||||
ADD_TEST(short_shift),
|
||||
ADD_TEST(short_compare),
|
||||
|
||||
ADD_TEST( ushort_math ),
|
||||
ADD_TEST( ushort_logic ),
|
||||
ADD_TEST( ushort_shift ),
|
||||
ADD_TEST( ushort_compare ),
|
||||
ADD_TEST(ushort_math),
|
||||
ADD_TEST(ushort_logic),
|
||||
ADD_TEST(ushort_shift),
|
||||
ADD_TEST(ushort_compare),
|
||||
|
||||
ADD_TEST( char_math ),
|
||||
ADD_TEST( char_logic ),
|
||||
ADD_TEST( char_shift ),
|
||||
ADD_TEST( char_compare ),
|
||||
ADD_TEST(char_math),
|
||||
ADD_TEST(char_logic),
|
||||
ADD_TEST(char_shift),
|
||||
ADD_TEST(char_compare),
|
||||
|
||||
ADD_TEST( uchar_math ),
|
||||
ADD_TEST( uchar_logic ),
|
||||
ADD_TEST( uchar_shift ),
|
||||
ADD_TEST( uchar_compare ),
|
||||
ADD_TEST(uchar_math),
|
||||
ADD_TEST(uchar_logic),
|
||||
ADD_TEST(uchar_shift),
|
||||
ADD_TEST(uchar_compare),
|
||||
|
||||
ADD_TEST( popcount ),
|
||||
ADD_TEST(popcount),
|
||||
|
||||
// Quick
|
||||
ADD_TEST( quick_long_math ),
|
||||
ADD_TEST( quick_long_logic ),
|
||||
ADD_TEST( quick_long_shift ),
|
||||
ADD_TEST( quick_long_compare ),
|
||||
ADD_TEST(quick_long_math),
|
||||
ADD_TEST(quick_long_logic),
|
||||
ADD_TEST(quick_long_shift),
|
||||
ADD_TEST(quick_long_compare),
|
||||
|
||||
ADD_TEST( quick_ulong_math ),
|
||||
ADD_TEST( quick_ulong_logic ),
|
||||
ADD_TEST( quick_ulong_shift ),
|
||||
ADD_TEST( quick_ulong_compare ),
|
||||
ADD_TEST(quick_ulong_math),
|
||||
ADD_TEST(quick_ulong_logic),
|
||||
ADD_TEST(quick_ulong_shift),
|
||||
ADD_TEST(quick_ulong_compare),
|
||||
|
||||
ADD_TEST( quick_int_math ),
|
||||
ADD_TEST( quick_int_logic ),
|
||||
ADD_TEST( quick_int_shift ),
|
||||
ADD_TEST( quick_int_compare ),
|
||||
ADD_TEST(quick_int_math),
|
||||
ADD_TEST(quick_int_logic),
|
||||
ADD_TEST(quick_int_shift),
|
||||
ADD_TEST(quick_int_compare),
|
||||
|
||||
ADD_TEST( quick_uint_math ),
|
||||
ADD_TEST( quick_uint_logic ),
|
||||
ADD_TEST( quick_uint_shift ),
|
||||
ADD_TEST( quick_uint_compare ),
|
||||
ADD_TEST(quick_uint_math),
|
||||
ADD_TEST(quick_uint_logic),
|
||||
ADD_TEST(quick_uint_shift),
|
||||
ADD_TEST(quick_uint_compare),
|
||||
|
||||
ADD_TEST( quick_short_math ),
|
||||
ADD_TEST( quick_short_logic ),
|
||||
ADD_TEST( quick_short_shift ),
|
||||
ADD_TEST( quick_short_compare ),
|
||||
ADD_TEST(quick_short_math),
|
||||
ADD_TEST(quick_short_logic),
|
||||
ADD_TEST(quick_short_shift),
|
||||
ADD_TEST(quick_short_compare),
|
||||
|
||||
ADD_TEST( quick_ushort_math ),
|
||||
ADD_TEST( quick_ushort_logic ),
|
||||
ADD_TEST( quick_ushort_shift ),
|
||||
ADD_TEST( quick_ushort_compare ),
|
||||
ADD_TEST(quick_ushort_math),
|
||||
ADD_TEST(quick_ushort_logic),
|
||||
ADD_TEST(quick_ushort_shift),
|
||||
ADD_TEST(quick_ushort_compare),
|
||||
|
||||
ADD_TEST( quick_char_math ),
|
||||
ADD_TEST( quick_char_logic ),
|
||||
ADD_TEST( quick_char_shift ),
|
||||
ADD_TEST( quick_char_compare ),
|
||||
ADD_TEST(quick_char_math),
|
||||
ADD_TEST(quick_char_logic),
|
||||
ADD_TEST(quick_char_shift),
|
||||
ADD_TEST(quick_char_compare),
|
||||
|
||||
ADD_TEST( quick_uchar_math ),
|
||||
ADD_TEST( quick_uchar_logic ),
|
||||
ADD_TEST( quick_uchar_shift ),
|
||||
ADD_TEST( quick_uchar_compare ),
|
||||
ADD_TEST(quick_uchar_math),
|
||||
ADD_TEST(quick_uchar_logic),
|
||||
ADD_TEST(quick_uchar_shift),
|
||||
ADD_TEST(quick_uchar_compare),
|
||||
|
||||
ADD_TEST( vector_scalar ),
|
||||
ADD_TEST(vector_scalar),
|
||||
|
||||
ADD_TEST(integer_dot_product),
|
||||
};
|
||||
|
||||
const int test_num = ARRAY_SIZE( test_list );
|
||||
const int test_num = ARRAY_SIZE(test_list);
|
||||
|
||||
void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
|
||||
{
|
||||
|
||||
@@ -141,3 +141,5 @@ extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, c
|
||||
|
||||
extern int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_integer_dot_product(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements);
|
||||
|
||||
380
test_conformance/integer_ops/test_integer_dot_product.cpp
Normal file
380
test_conformance/integer_ops/test_integer_dot_product.cpp
Normal file
@@ -0,0 +1,380 @@
|
||||
//
|
||||
// Copyright (c) 2021 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
// This is needed for std::numeric_limits<>::min() and max() to work on Windows.
|
||||
#if defined(_WIN32)
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "procs.h"
|
||||
#include "harness/integer_ops_test_info.h"
|
||||
#include "harness/testHarness.h"
|
||||
|
||||
template <size_t N, typename DstType, typename SrcTypeA, typename SrcTypeB>
|
||||
static void
|
||||
calculate_reference(std::vector<DstType>& ref, const std::vector<SrcTypeA>& a,
|
||||
const std::vector<SrcTypeB>& b, const bool AccSat = false,
|
||||
const std::vector<DstType>& acc = {})
|
||||
{
|
||||
assert(a.size() == b.size());
|
||||
assert(AccSat == false || acc.size() == a.size() / N);
|
||||
|
||||
ref.resize(a.size() / N);
|
||||
for (size_t r = 0; r < ref.size(); r++)
|
||||
{
|
||||
cl_long result = AccSat ? acc[r] : 0;
|
||||
for (size_t c = 0; c < N; c++)
|
||||
{
|
||||
// OK to assume no overflow?
|
||||
result += a[r * N + c] * b[r * N + c];
|
||||
}
|
||||
if (AccSat && result > std::numeric_limits<DstType>::max())
|
||||
{
|
||||
result = std::numeric_limits<DstType>::max();
|
||||
}
|
||||
ref[r] = static_cast<DstType>(result);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename SrcTypeA, typename SrcTypeB>
|
||||
void generate_inputs_with_special_values(std::vector<SrcTypeA>& a,
|
||||
std::vector<SrcTypeB>& b)
|
||||
{
|
||||
const std::vector<SrcTypeA> specialValuesA(
|
||||
{ static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min()),
|
||||
static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() + 1),
|
||||
static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() / 2), 0,
|
||||
static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() / 2),
|
||||
static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() - 1),
|
||||
static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max()) });
|
||||
const std::vector<SrcTypeB> specialValuesB(
|
||||
{ static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min()),
|
||||
static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() + 1),
|
||||
static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() / 2), 0,
|
||||
static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() / 2),
|
||||
static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() - 1),
|
||||
static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max()) });
|
||||
|
||||
size_t count = 0;
|
||||
for (auto svA : specialValuesA)
|
||||
{
|
||||
for (auto svB : specialValuesB)
|
||||
{
|
||||
a[count] = svA;
|
||||
b[count] = svB;
|
||||
++count;
|
||||
}
|
||||
}
|
||||
|
||||
// Generate random data for the rest of the inputs:
|
||||
MTdataHolder d(gRandomSeed);
|
||||
generate_random_data(TestInfo<SrcTypeA>::explicitType, a.size() - count, d,
|
||||
a.data() + count);
|
||||
generate_random_data(TestInfo<SrcTypeB>::explicitType, b.size() - count, d,
|
||||
b.data() + count);
|
||||
}
|
||||
|
||||
template <typename SrcType>
|
||||
void generate_acc_sat_inputs(std::vector<SrcType>& acc)
|
||||
{
|
||||
// First generate random data:
|
||||
fill_vector_with_random_data(acc);
|
||||
|
||||
// Now go through the generated data, and make every other element large.
|
||||
// This ensures we have some elements that need saturation.
|
||||
for (size_t i = 0; i < acc.size(); i += 2)
|
||||
{
|
||||
acc[i] = std::numeric_limits<SrcType>::max() - acc[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> struct PackedTestInfo
|
||||
{
|
||||
static constexpr const char* deviceTypeName = "UNSUPPORTED";
|
||||
};
|
||||
template <> struct PackedTestInfo<cl_char>
|
||||
{
|
||||
static constexpr const char* deviceTypeName = "int";
|
||||
};
|
||||
template <> struct PackedTestInfo<cl_uchar>
|
||||
{
|
||||
static constexpr const char* deviceTypeName = "uint";
|
||||
};
|
||||
|
||||
static constexpr const char* kernel_source_dot = R"CLC(
|
||||
__kernel void test_dot(__global DSTTYPE* dst, __global SRCTYPEA* a, __global SRCTYPEB* b)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
dst[index] = DOT(a[index], b[index]);
|
||||
}
|
||||
)CLC";
|
||||
|
||||
static constexpr const char* kernel_source_dot_acc_sat = R"CLC(
|
||||
__kernel void test_dot_acc_sat(
|
||||
__global DSTTYPE* dst,
|
||||
__global SRCTYPEA* a, __global SRCTYPEB* b, __global DSTTYPE* acc)
|
||||
{
|
||||
int index = get_global_id(0);
|
||||
dst[index] = DOT_ACC_SAT(a[index], b[index], acc[index]);
|
||||
}
|
||||
)CLC";
|
||||
|
||||
template <typename DstType, typename SrcTypeA, typename SrcTypeB, size_t N>
|
||||
static int test_case_dot(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements, bool packed,
|
||||
bool sat)
|
||||
{
|
||||
log_info(" testing %s = dot%s%s(%s, %s)\n",
|
||||
std::numeric_limits<DstType>::is_signed ? "signed" : "unsigned",
|
||||
sat ? "_acc_sat" : "", packed ? "_packed" : "",
|
||||
std::numeric_limits<SrcTypeA>::is_signed ? "signed" : "unsigned",
|
||||
std::numeric_limits<SrcTypeB>::is_signed ? "signed" : "unsigned");
|
||||
|
||||
cl_int error = CL_SUCCESS;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
std::string buildOptions;
|
||||
buildOptions += " -DDSTTYPE=";
|
||||
buildOptions += TestInfo<DstType>::deviceTypeName;
|
||||
buildOptions += " -DSRCTYPEA=";
|
||||
buildOptions += packed
|
||||
? PackedTestInfo<SrcTypeA>::deviceTypeName
|
||||
: TestInfo<SrcTypeA>::deviceTypeName + std::to_string(N);
|
||||
buildOptions += " -DSRCTYPEB=";
|
||||
buildOptions += packed
|
||||
? PackedTestInfo<SrcTypeB>::deviceTypeName
|
||||
: TestInfo<SrcTypeB>::deviceTypeName + std::to_string(N);
|
||||
std::string packedSuffix;
|
||||
packedSuffix += std::numeric_limits<SrcTypeA>::is_signed ? "s" : "u";
|
||||
packedSuffix += std::numeric_limits<SrcTypeB>::is_signed ? "s" : "u";
|
||||
packedSuffix += std::numeric_limits<DstType>::is_signed ? "_int" : "_uint";
|
||||
if (sat)
|
||||
{
|
||||
buildOptions += packed
|
||||
? " -DDOT_ACC_SAT=dot_acc_sat_4x8packed_" + packedSuffix
|
||||
: " -DDOT_ACC_SAT=dot_acc_sat";
|
||||
}
|
||||
else
|
||||
{
|
||||
buildOptions +=
|
||||
packed ? " -DDOT=dot_4x8packed_" + packedSuffix : " -DDOT=dot";
|
||||
}
|
||||
|
||||
std::vector<SrcTypeA> a(N * num_elements);
|
||||
std::vector<SrcTypeB> b(N * num_elements);
|
||||
generate_inputs_with_special_values(a, b);
|
||||
|
||||
std::vector<DstType> acc;
|
||||
if (sat)
|
||||
{
|
||||
acc.resize(num_elements);
|
||||
generate_acc_sat_inputs(acc);
|
||||
}
|
||||
|
||||
std::vector<DstType> reference(num_elements);
|
||||
calculate_reference<N>(reference, a, b, sat, acc);
|
||||
|
||||
const char* source = sat ? kernel_source_dot_acc_sat : kernel_source_dot;
|
||||
const char* name = sat ? "test_dot_acc_sat" : "test_dot";
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
|
||||
name, buildOptions.c_str());
|
||||
test_error(error, "Unable to create test kernel");
|
||||
|
||||
clMemWrapper dst = clCreateBuffer(
|
||||
context, 0, reference.size() * sizeof(DstType), NULL, &error);
|
||||
test_error(error, "Unable to create output buffer");
|
||||
|
||||
clMemWrapper srcA =
|
||||
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
||||
a.size() * sizeof(SrcTypeA), a.data(), &error);
|
||||
test_error(error, "Unable to create srcA buffer");
|
||||
|
||||
clMemWrapper srcB =
|
||||
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
||||
b.size() * sizeof(SrcTypeB), b.data(), &error);
|
||||
test_error(error, "Unable to create srcB buffer");
|
||||
|
||||
clMemWrapper srcAcc;
|
||||
if (sat)
|
||||
{
|
||||
srcAcc =
|
||||
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
||||
acc.size() * sizeof(DstType), acc.data(), &error);
|
||||
test_error(error, "Unable to create acc buffer");
|
||||
}
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
|
||||
test_error(error, "Unable to set output buffer kernel arg");
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof(srcA), &srcA);
|
||||
test_error(error, "Unable to set srcA buffer kernel arg");
|
||||
|
||||
error = clSetKernelArg(kernel, 2, sizeof(srcB), &srcB);
|
||||
test_error(error, "Unable to set srcB buffer kernel arg");
|
||||
|
||||
if (sat)
|
||||
{
|
||||
error = clSetKernelArg(kernel, 3, sizeof(srcAcc), &srcAcc);
|
||||
test_error(error, "Unable to set acc buffer kernel arg");
|
||||
}
|
||||
|
||||
size_t global_work_size[] = { reference.size() };
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
|
||||
NULL, 0, NULL, NULL);
|
||||
test_error(error, "Unable to enqueue test kernel");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed after test kernel");
|
||||
|
||||
std::vector<DstType> results(reference.size(), 99);
|
||||
error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0,
|
||||
results.size() * sizeof(DstType),
|
||||
results.data(), 0, NULL, NULL);
|
||||
test_error(error, "Unable to read data after test kernel");
|
||||
|
||||
if (results != reference)
|
||||
{
|
||||
log_error("Result buffer did not match reference buffer!\n");
|
||||
return TEST_FAIL;
|
||||
}
|
||||
|
||||
return TEST_PASS;
|
||||
}
|
||||
|
||||
template <typename SrcType, typename DstType, size_t N>
|
||||
static int test_vectype(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int result = TEST_PASS;
|
||||
|
||||
typedef typename std::make_signed<SrcType>::type SSrcType;
|
||||
typedef typename std::make_signed<DstType>::type SDstType;
|
||||
|
||||
typedef typename std::make_unsigned<SrcType>::type USrcType;
|
||||
typedef typename std::make_unsigned<DstType>::type UDstType;
|
||||
|
||||
// dot testing:
|
||||
result |= test_case_dot<UDstType, USrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, false);
|
||||
result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, false);
|
||||
result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, false);
|
||||
result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, false);
|
||||
|
||||
// dot_acc_sat testing:
|
||||
result |= test_case_dot<UDstType, USrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, true);
|
||||
result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, true);
|
||||
result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, true);
|
||||
result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, false, true);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename SrcType, typename DstType, size_t N>
|
||||
static int test_vectype_packed(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int result = TEST_PASS;
|
||||
|
||||
typedef typename std::make_signed<SrcType>::type SSrcType;
|
||||
typedef typename std::make_signed<DstType>::type SDstType;
|
||||
|
||||
typedef typename std::make_unsigned<SrcType>::type USrcType;
|
||||
typedef typename std::make_unsigned<DstType>::type UDstType;
|
||||
|
||||
// packed dot testing:
|
||||
result |= test_case_dot<UDstType, USrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, false);
|
||||
result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, false);
|
||||
result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, false);
|
||||
result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, false);
|
||||
|
||||
// packed dot_acc_sat testing:
|
||||
result |= test_case_dot<UDstType, USrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, true);
|
||||
result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, true);
|
||||
result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, true);
|
||||
result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
|
||||
deviceID, context, queue, num_elements, true, true);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int test_integer_dot_product(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
|
||||
{
|
||||
log_info("cl_khr_integer_dot_product is not supported\n");
|
||||
return TEST_SKIPPED_ITSELF;
|
||||
}
|
||||
|
||||
cl_int error = CL_SUCCESS;
|
||||
int result = TEST_PASS;
|
||||
|
||||
cl_device_integer_dot_product_capabilities_khr dotCaps = 0;
|
||||
error = clGetDeviceInfo(deviceID,
|
||||
CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
|
||||
sizeof(dotCaps), &dotCaps, NULL);
|
||||
test_error(
|
||||
error,
|
||||
"Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR");
|
||||
test_assert_error(
|
||||
dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR,
|
||||
"When cl_khr_integer_dot_product is supported "
|
||||
"CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be "
|
||||
"supported");
|
||||
|
||||
if (dotCaps
|
||||
& ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR
|
||||
| CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
|
||||
{
|
||||
log_info("NOTE: found an unknown / untested capability!\n");
|
||||
}
|
||||
|
||||
if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)
|
||||
{
|
||||
result |= test_vectype<cl_uchar, cl_uint, 4>(deviceID, context, queue,
|
||||
num_elements);
|
||||
}
|
||||
|
||||
if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR)
|
||||
{
|
||||
result |= test_vectype_packed<cl_uchar, cl_uint, 4>(
|
||||
deviceID, context, queue, num_elements);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Reference in New Issue
Block a user