mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 22:19:02 +00:00
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
1779 lines
69 KiB
C++
1779 lines
69 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
#include "../../test_common/harness/conversions.h"
|
|
|
|
#define TEST_SIZE 512
|
|
|
|
#ifndef MIN
|
|
#define MIN( _a, _b ) ((_a) < (_b) ? (_a) : (_b))
|
|
#endif
|
|
#ifndef MAX
|
|
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
|
|
#endif
|
|
|
|
const char *singleParamIntegerKernelSourcePattern =
|
|
"__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" %s%s tmp = vload%s( tid, destValues );\n"
|
|
" tmp %s= %s( vload%s( tid, sourceA ) );\n"
|
|
" vstore%s( tmp, tid, destValues );\n"
|
|
"\n"
|
|
"}\n";
|
|
|
|
const char *singleParamSingleSizeIntegerKernelSourcePattern =
|
|
"__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" destValues[tid] %s= %s( sourceA[tid] );\n"
|
|
"}\n";
|
|
|
|
typedef bool (*singleParamIntegerVerifyFn)( void *source, void *destination, ExplicitType vecType );
|
|
static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType );
|
|
bool verify_integer_divideAssign( void *source, void *destination, ExplicitType vecType );
|
|
bool verify_integer_moduloAssign( void *source, void *destination, ExplicitType vecType );
|
|
|
|
int test_single_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
|
|
ExplicitType vecType, size_t vecSize, singleParamIntegerVerifyFn verifyFn,
|
|
MTdata d, bool useOpKernel = false )
|
|
{
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper streams[2];
|
|
cl_long inDataA[TEST_SIZE * 16], outData[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], expected;
|
|
int error, i;
|
|
size_t threads[1], localThreads[1];
|
|
char kernelSource[10240];
|
|
char *programPtr;
|
|
char sizeName[4];
|
|
|
|
if (! gHasLong && strstr(get_explicit_type_name(vecType),"long"))
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecType) );
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
/* Create the source */
|
|
if( vecSize == 1 )
|
|
sizeName[ 0 ] = 0;
|
|
else
|
|
sprintf( sizeName, "%d", (int)vecSize );
|
|
|
|
if( vecSize == 1 )
|
|
sprintf( kernelSource, singleParamSingleSizeIntegerKernelSourcePattern,
|
|
get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
|
|
useOpKernel ? fnName : "", useOpKernel ? "" : fnName );
|
|
else
|
|
sprintf( kernelSource, singleParamIntegerKernelSourcePattern,
|
|
get_explicit_type_name( vecType ), get_explicit_type_name( vecType ),
|
|
get_explicit_type_name( vecType ), sizeName, sizeName,
|
|
useOpKernel ? fnName : "", useOpKernel ? "" : fnName, sizeName,
|
|
sizeName );
|
|
|
|
/* Create kernels */
|
|
programPtr = kernelSource;
|
|
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
|
{
|
|
log_error("The program we attempted to compile was: \n%s\n", kernelSource);
|
|
return -1;
|
|
}
|
|
|
|
/* Generate some streams */
|
|
generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataA );
|
|
|
|
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
|
|
get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
|
|
inDataA, NULL);
|
|
if( streams[0] == NULL )
|
|
{
|
|
log_error("ERROR: Creating input array A failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
if( useOpKernel )
|
|
{
|
|
// Op kernels use an r/w buffer for the second param, so we need to init it with data
|
|
generate_random_data( vecType, vecSize * TEST_SIZE, d, inDataB );
|
|
}
|
|
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | ( useOpKernel ? CL_MEM_COPY_HOST_PTR : 0 )),
|
|
get_explicit_type_size( vecType ) * vecSize * TEST_SIZE,
|
|
( useOpKernel ) ? &inDataB : NULL, NULL );
|
|
if( streams[1] == NULL )
|
|
{
|
|
log_error("ERROR: Creating output array failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Assign streams and execute */
|
|
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
|
|
/* Run the kernel */
|
|
threads[0] = TEST_SIZE;
|
|
|
|
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
|
test_error( error, "Unable to get work group size to use" );
|
|
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
memset(outData, 0xFF, get_explicit_type_size( vecType ) * TEST_SIZE * vecSize );
|
|
|
|
/* Now get the results */
|
|
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0,
|
|
get_explicit_type_size( vecType ) * TEST_SIZE * vecSize,
|
|
outData, 0, NULL, NULL );
|
|
test_error( error, "Unable to read output array!" );
|
|
|
|
// deal with division by 0 -- any answer is allowed here
|
|
if( verifyFn == verify_integer_divideAssign || verifyFn == verify_integer_moduloAssign )
|
|
patchup_divide_results( outData, inDataA, inDataB, TEST_SIZE * vecSize, vecType );
|
|
|
|
/* And verify! */
|
|
char *p = (char *)outData;
|
|
char *in = (char *)inDataA;
|
|
char *in2 = (char *)inDataB;
|
|
for( i = 0; i < (int)TEST_SIZE; i++ )
|
|
{
|
|
for( size_t j = 0; j < vecSize; j++ )
|
|
{
|
|
if( useOpKernel )
|
|
memcpy( &expected, in2, get_explicit_type_size( vecType ) );
|
|
|
|
verifyFn( in, &expected, vecType );
|
|
if( memcmp( &expected, p, get_explicit_type_size( vecType ) ) != 0 )
|
|
{
|
|
switch( get_explicit_type_size( vecType ))
|
|
{
|
|
case 1:
|
|
if( useOpKernel )
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x)\n",
|
|
(int)i, (int)j,
|
|
((cl_uchar*)&expected)[0],
|
|
*( (cl_uchar *)p ),
|
|
*( (cl_uchar *)in ),
|
|
*( (cl_uchar *)in2 ) );
|
|
else
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x)\n",
|
|
(int)i, (int)j,
|
|
((cl_uchar*)&expected)[0],
|
|
*( (cl_uchar *)p ),
|
|
*( (cl_uchar *)in ) );
|
|
break;
|
|
|
|
case 2:
|
|
if( useOpKernel )
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x)\n",
|
|
(int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
|
|
*( (cl_ushort *)in ), *( (cl_ushort *)in2 ) );
|
|
else
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x)\n",
|
|
(int)i, (int)j, ((cl_ushort*)&expected)[0], *( (cl_ushort *)p ),
|
|
*( (cl_ushort *)in ) );
|
|
break;
|
|
|
|
case 4:
|
|
if( useOpKernel )
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
|
|
(int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
|
|
*( (cl_uint *)in ), *( (cl_uint *)in2 ) );
|
|
else
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x)\n",
|
|
(int)i, (int)j, ((cl_uint*)&expected)[0], *( (cl_uint *)p ),
|
|
*( (cl_uint *)in ) );
|
|
break;
|
|
|
|
case 8:
|
|
if( useOpKernel )
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
|
|
(int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
|
|
*( (cl_ulong *)in ), *( (cl_ulong *)in2 ) );
|
|
else
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx)\n",
|
|
(int)i, (int)j, ((cl_ulong*)&expected)[0], *( (cl_ulong *)p ),
|
|
*( (cl_ulong *)in ) );
|
|
break;
|
|
}
|
|
return -1;
|
|
}
|
|
p += get_explicit_type_size( vecType );
|
|
in += get_explicit_type_size( vecType );
|
|
in2 += get_explicit_type_size( vecType );
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_single_param_integer_fn( cl_command_queue queue, cl_context context, const char *fnName, singleParamIntegerVerifyFn verifyFn, bool useOpKernel = false )
|
|
{
|
|
ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
|
|
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO 3 not tested
|
|
unsigned int index, typeIndex;
|
|
int retVal = 0;
|
|
RandomSeed seed(gRandomSeed );
|
|
|
|
for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
|
{
|
|
for( index = 0; vecSizes[ index ] != 0; index++ )
|
|
{
|
|
if( test_single_param_integer_kernel(queue, context, fnName, types[ typeIndex ], vecSizes[ index ], verifyFn, seed, useOpKernel ) != 0 )
|
|
{
|
|
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
|
|
retVal = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
bool verify_integer_clz( void *source, void *destination, ExplicitType vecType )
|
|
{
|
|
cl_long testValue;
|
|
int count;
|
|
int typeBits;
|
|
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
testValue = *( (cl_char *)source );
|
|
typeBits = 8 * sizeof( cl_char );
|
|
break;
|
|
case kUChar:
|
|
testValue = *( (cl_uchar *)source );
|
|
typeBits = 8 * sizeof( cl_uchar );
|
|
break;
|
|
case kShort:
|
|
testValue = *( (cl_short *)source );
|
|
typeBits = 8 * sizeof( cl_short );
|
|
break;
|
|
case kUShort:
|
|
testValue = *( (cl_ushort *)source );
|
|
typeBits = 8 * sizeof( cl_ushort );
|
|
break;
|
|
case kInt:
|
|
testValue = *( (cl_int *)source );
|
|
typeBits = 8 * sizeof( cl_int );
|
|
break;
|
|
case kUInt:
|
|
testValue = *( (cl_uint *)source );
|
|
typeBits = 8 * sizeof( cl_uint );
|
|
break;
|
|
case kLong:
|
|
testValue = *( (cl_long *)source );
|
|
typeBits = 8 * sizeof( cl_long );
|
|
break;
|
|
case kULong:
|
|
// Hack for now: just treat it as a signed cl_long, since it won't matter for bitcounting
|
|
testValue = *( (cl_ulong *)source );
|
|
typeBits = 8 * sizeof( cl_ulong );
|
|
break;
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
|
|
count = typeBits;
|
|
if( testValue )
|
|
{
|
|
testValue <<= 8 * sizeof( testValue ) - typeBits;
|
|
for( count = 0; 0 == (testValue & CL_LONG_MIN); count++ )
|
|
testValue <<= 1;
|
|
}
|
|
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
*( (cl_char *)destination ) = count;
|
|
break;
|
|
case kUChar:
|
|
*( (cl_uchar *)destination ) = count;
|
|
break;
|
|
case kShort:
|
|
*( (cl_short *)destination ) = count;
|
|
break;
|
|
case kUShort:
|
|
*( (cl_ushort *)destination ) = count;
|
|
break;
|
|
case kInt:
|
|
*( (cl_int *)destination ) = count;
|
|
break;
|
|
case kUInt:
|
|
*( (cl_uint *)destination ) = count;
|
|
break;
|
|
case kLong:
|
|
*( (cl_long *)destination ) = count;
|
|
break;
|
|
case kULong:
|
|
*( (cl_ulong *)destination ) = count;
|
|
break;
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_clz(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_single_param_integer_fn( queue, context, "clz", verify_integer_clz );
|
|
}
|
|
|
|
#define OP_CASE( op, sizeName, size ) \
|
|
case sizeName: \
|
|
{ \
|
|
cl_##size *d = (cl_##size *)destination; \
|
|
*d op##= *( (cl_##size *)source ); \
|
|
break; \
|
|
}
|
|
|
|
#define OP_CASES( op ) \
|
|
switch( vecType ) \
|
|
{ \
|
|
OP_CASE( op, kChar, char ) \
|
|
OP_CASE( op, kUChar, uchar ) \
|
|
OP_CASE( op, kShort, short ) \
|
|
OP_CASE( op, kUShort, ushort ) \
|
|
OP_CASE( op, kInt, int ) \
|
|
OP_CASE( op, kUInt, uint ) \
|
|
OP_CASE( op, kLong, long ) \
|
|
OP_CASE( op, kULong, ulong ) \
|
|
default: \
|
|
break; \
|
|
}
|
|
|
|
#define OP_TEST( op, opName ) \
|
|
bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType ) \
|
|
{ \
|
|
OP_CASES( op ) \
|
|
return true; \
|
|
} \
|
|
int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) \
|
|
{ \
|
|
return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
|
|
}
|
|
|
|
OP_TEST( +, add )
|
|
OP_TEST( -, subtract )
|
|
OP_TEST( *, multiply )
|
|
OP_TEST( ^, exclusiveOr )
|
|
OP_TEST( |, or )
|
|
OP_TEST( &, and )
|
|
|
|
#define OP_CASE_GUARD( op, sizeName, size ) \
|
|
case sizeName: \
|
|
{ \
|
|
cl_##size *d = (cl_##size *)destination; \
|
|
cl_##size *s = (cl_##size *)source; \
|
|
if( *s == 0 ) \
|
|
*d = -1; \
|
|
else \
|
|
*d op##= *s; \
|
|
break; \
|
|
}
|
|
|
|
#define OP_CASE_GUARD_SIGNED( op, sizeName, size, MIN_VAL ) \
|
|
case sizeName: \
|
|
{ \
|
|
cl_##size *d = (cl_##size *)destination; \
|
|
cl_##size *s = (cl_##size *)source; \
|
|
if( *s == 0 || (*d == MIN_VAL && *s == -1)) \
|
|
*d = -1 - MIN_VAL; \
|
|
else \
|
|
*d op##= *s; \
|
|
break; \
|
|
}
|
|
|
|
#define OP_CASES_GUARD( op ) \
|
|
switch( vecType ) \
|
|
{ \
|
|
OP_CASE_GUARD_SIGNED( op, kChar, char, CL_CHAR_MIN ) \
|
|
OP_CASE_GUARD( op, kUChar, uchar ) \
|
|
OP_CASE_GUARD_SIGNED( op, kShort, short, CL_SHRT_MIN ) \
|
|
OP_CASE_GUARD( op, kUShort, ushort ) \
|
|
OP_CASE_GUARD_SIGNED( op, kInt, int, CL_INT_MIN ) \
|
|
OP_CASE_GUARD( op, kUInt, uint ) \
|
|
OP_CASE_GUARD_SIGNED( op, kLong, long, CL_LONG_MIN ) \
|
|
OP_CASE_GUARD( op, kULong, ulong ) \
|
|
default: \
|
|
break; \
|
|
}
|
|
|
|
#define OP_TEST_GUARD( op, opName ) \
|
|
bool verify_integer_##opName##Assign( void *source, void *destination, ExplicitType vecType ) \
|
|
{ \
|
|
OP_CASES_GUARD( op ) \
|
|
return true; \
|
|
} \
|
|
int test_integer_##opName##Assign(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) \
|
|
{ \
|
|
return test_single_param_integer_fn( queue, context, #op, verify_integer_##opName##Assign, true ); \
|
|
}
|
|
|
|
OP_TEST_GUARD( /, divide )
|
|
OP_TEST_GUARD( %, modulo )
|
|
|
|
#define PATCH_CASE( _out, _src, _dest, _count, _cl_type ) \
|
|
{ \
|
|
const _cl_type *denom = (const _cl_type* ) _src; \
|
|
_cl_type *result = (_cl_type* ) _out; \
|
|
for( size_t i = 0; i < _count; i++ ) \
|
|
if( denom[i] == 0 ) \
|
|
result[i] = (_cl_type) -1; \
|
|
}
|
|
|
|
#define PATCH_CASE_SIGNED( _out, _src, _dest, _count, _cl_type, _MIN_VAL ) \
|
|
{ \
|
|
const _cl_type *num = (const _cl_type* ) _dest; \
|
|
const _cl_type *denom = (const _cl_type* ) _src; \
|
|
_cl_type *result = (_cl_type* ) _out; \
|
|
for( size_t i = 0; i < _count; i++ ) \
|
|
if( denom[i] == 0 || ( num[i] == _MIN_VAL && denom[i] == -1)) \
|
|
result[i] = -1 - _MIN_VAL; \
|
|
}
|
|
|
|
static void patchup_divide_results( void *outData, const void *inDataA, const void *inDataB, size_t count, ExplicitType vecType )
|
|
{
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_char, CL_CHAR_MIN )
|
|
break;
|
|
case kUChar:
|
|
PATCH_CASE( outData, inDataA, inDataB, count, cl_uchar )
|
|
break;
|
|
case kShort:
|
|
PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_short, CL_SHRT_MIN )
|
|
break;
|
|
case kUShort:
|
|
PATCH_CASE( outData, inDataA, inDataB, count, cl_ushort )
|
|
break;
|
|
case kInt:
|
|
PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_int, CL_INT_MIN )
|
|
break;
|
|
case kUInt:
|
|
PATCH_CASE( outData, inDataA, inDataB, count, cl_uint )
|
|
break;
|
|
case kLong:
|
|
PATCH_CASE_SIGNED( outData, inDataA, inDataB, count, cl_long, CL_LONG_MIN )
|
|
break;
|
|
case kULong:
|
|
PATCH_CASE( outData, inDataA, inDataB, count, cl_ulong )
|
|
break;
|
|
default:
|
|
log_error( "ERROR: internal test error -- unknown data type %d\n", vecType );
|
|
break;
|
|
}
|
|
}
|
|
|
|
const char *twoParamIntegerKernelSourcePattern =
|
|
"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *destValues)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" %s%s sA = %s;\n"
|
|
" %s%s sB = %s;\n"
|
|
" %s%s dst = %s( sA, sB );\n"
|
|
" %s;\n"
|
|
"\n"
|
|
"}\n";
|
|
|
|
typedef bool (*twoParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *destination, ExplicitType vecType );
|
|
|
|
static char * build_load_statement( char *outString, size_t vecSize, const char *name )
|
|
{
|
|
if( vecSize != 3 )
|
|
sprintf( outString, "%s[ tid ]", name );
|
|
else
|
|
sprintf( outString, "vload3( tid, %s )", name );
|
|
return outString;
|
|
}
|
|
|
|
static char * build_store_statement( char *outString, size_t vecSize, const char *name, const char *srcName )
|
|
{
|
|
if( vecSize != 3 )
|
|
sprintf( outString, "%s[ tid ] = %s", name, srcName );
|
|
else
|
|
sprintf( outString, "vstore3( %s, tid, %s )", srcName, name );
|
|
return outString;
|
|
}
|
|
|
|
int test_two_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
|
|
ExplicitType vecAType, ExplicitType vecBType, unsigned int vecSize, twoParamIntegerVerifyFn verifyFn, MTdata d )
|
|
{
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper streams[3];
|
|
cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
|
|
int error, i;
|
|
size_t threads[1], localThreads[1];
|
|
char kernelSource[10240];
|
|
char *programPtr;
|
|
char sizeName[4], paramSizeName[4];
|
|
|
|
// embedded profiles don't support long/ulong datatypes
|
|
if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
/* Create the source */
|
|
if( vecSize == 1 )
|
|
sizeName[ 0 ] = 0;
|
|
else
|
|
sprintf( sizeName, "%d", vecSize );
|
|
if( ( vecSize == 1 ) || ( vecSize == 3 ) )
|
|
paramSizeName[ 0 ] = 0;
|
|
else
|
|
sprintf( paramSizeName, "%d", vecSize );
|
|
|
|
char sourceALoad[ 128 ], sourceBLoad[ 128 ], destStore[ 128 ];
|
|
|
|
sprintf( kernelSource, twoParamIntegerKernelSourcePattern,
|
|
get_explicit_type_name( vecAType ), paramSizeName,
|
|
get_explicit_type_name( vecBType ), paramSizeName,
|
|
get_explicit_type_name( vecAType ), paramSizeName,
|
|
get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
|
|
get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
|
|
get_explicit_type_name( vecAType ), sizeName,
|
|
fnName,
|
|
build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
|
|
);
|
|
|
|
/* Create kernels */
|
|
programPtr = kernelSource;
|
|
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
|
{
|
|
log_error("The program we attempted to compile was: \n%s\n", kernelSource);
|
|
return -1;
|
|
}
|
|
|
|
/* Generate some streams */
|
|
generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
|
|
generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
|
|
|
|
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
|
|
get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
|
|
&inDataA, NULL);
|
|
if( streams[0] == NULL )
|
|
{
|
|
log_error("ERROR: Creating input array A failed!\n");
|
|
return -1;
|
|
}
|
|
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR),
|
|
get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE,
|
|
&inDataB, NULL);
|
|
if( streams[1] == NULL )
|
|
{
|
|
log_error("ERROR: Creating input array B failed!\n");
|
|
return -1;
|
|
}
|
|
streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
|
get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE,
|
|
NULL, NULL );
|
|
if( streams[2] == NULL )
|
|
{
|
|
log_error("ERROR: Creating output array failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Assign streams and execute */
|
|
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
|
|
/* Run the kernel */
|
|
threads[0] = TEST_SIZE;
|
|
|
|
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
|
test_error( error, "Unable to get work group size to use" );
|
|
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
memset(outData, 0xFF, get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize);
|
|
|
|
/* Now get the results */
|
|
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0,
|
|
get_explicit_type_size( vecAType ) * TEST_SIZE * vecSize, outData, 0,
|
|
NULL, NULL );
|
|
test_error( error, "Unable to read output array!" );
|
|
|
|
/* And verify! */
|
|
char *inA = (char *)inDataA;
|
|
char *inB = (char *)inDataB;
|
|
char *out = (char *)outData;
|
|
for( i = 0; i < (int)TEST_SIZE; i++ )
|
|
{
|
|
for( size_t j = 0; j < vecSize; j++ )
|
|
{
|
|
bool test = verifyFn( inA, inB, &expected, vecAType );
|
|
if( test && ( memcmp( &expected, out, get_explicit_type_size( vecAType ) ) != 0 ) )
|
|
{
|
|
switch( get_explicit_type_size( vecAType ))
|
|
{
|
|
case 1:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x), TEST_SIZE %d\n",
|
|
(int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
|
|
*( (cl_uchar *)inA ),
|
|
*( (cl_uchar *)inB ) ,
|
|
TEST_SIZE);
|
|
break;
|
|
|
|
case 2:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x), TEST_SIZE %d\n",
|
|
(int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
|
|
*( (cl_ushort *)inA ),
|
|
*( (cl_ushort *)inB ),
|
|
TEST_SIZE);
|
|
break;
|
|
|
|
case 4:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x)\n",
|
|
(int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
|
|
*( (cl_uint *)inA ),
|
|
*( (cl_uint *)inB ) );
|
|
break;
|
|
|
|
case 8:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx)\n",
|
|
(int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
|
|
*( (cl_ulong *)inA ),
|
|
*( (cl_ulong *)inB ) );
|
|
break;
|
|
}
|
|
return -1;
|
|
}
|
|
inA += get_explicit_type_size( vecAType );
|
|
inB += get_explicit_type_size( vecBType );
|
|
out += get_explicit_type_size( vecAType );
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_two_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
|
|
{
|
|
ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
|
|
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; // TODO : 3 not tested
|
|
unsigned int index, typeIndex;
|
|
int retVal = 0;
|
|
RandomSeed seed(gRandomSeed );
|
|
|
|
for( typeIndex = 0; types[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
|
{
|
|
for( index = 0; vecSizes[ index ] != 0; index++ )
|
|
{
|
|
if( test_two_param_integer_kernel(queue, context, fnName, types[ typeIndex ], types[ typeIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
|
|
{
|
|
log_error( " Vector %s%d FAILED\n", get_explicit_type_name( types[ typeIndex ] ), vecSizes[ index ] );
|
|
retVal = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
int test_two_param_unmatched_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, twoParamIntegerVerifyFn verifyFn)
|
|
{
|
|
ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
|
|
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
|
unsigned int index, typeAIndex, typeBIndex;
|
|
int retVal = 0;
|
|
RandomSeed seed( gRandomSeed );
|
|
|
|
for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
|
|
{
|
|
for( typeBIndex = 0; types[ typeBIndex ] != kNumExplicitTypes; typeBIndex++ )
|
|
{
|
|
for( index = 0; vecSizes[ index ] != 0; index++ )
|
|
{
|
|
if( test_two_param_integer_kernel( queue, context, fnName, types[ typeAIndex ], types[ typeBIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
|
|
{
|
|
log_error( " Vector %s%d / %s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ], get_explicit_type_name( types[ typeBIndex ] ), vecSizes[ index ] );
|
|
retVal = -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
bool verify_integer_hadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
|
|
{
|
|
cl_long testValueA, testValueB, overflow;
|
|
cl_ulong uValueA, uValueB, uOverflow;
|
|
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
testValueA = *( (cl_char *)sourceA );
|
|
testValueB = *( (cl_char *)sourceB );
|
|
*( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB ) >> 1 );
|
|
break;
|
|
case kUChar:
|
|
testValueA = *( (cl_uchar *)sourceA );
|
|
testValueB = *( (cl_uchar *)sourceB );
|
|
*( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB ) >> 1 );
|
|
break;
|
|
case kShort:
|
|
testValueA = *( (cl_short *)sourceA );
|
|
testValueB = *( (cl_short *)sourceB );
|
|
*( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB ) >> 1 );
|
|
break;
|
|
case kUShort:
|
|
testValueA = *( (cl_ushort *)sourceA );
|
|
testValueB = *( (cl_ushort *)sourceB );
|
|
*( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB ) >> 1 );
|
|
break;
|
|
case kInt:
|
|
testValueA = *( (cl_int *)sourceA );
|
|
testValueB = *( (cl_int *)sourceB );
|
|
*( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB ) >> 1 );
|
|
break;
|
|
case kUInt:
|
|
testValueA = *( (cl_uint *)sourceA );
|
|
testValueB = *( (cl_uint *)sourceB );
|
|
*( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB ) >> 1 );
|
|
break;
|
|
case kLong:
|
|
// The long way to avoid dropping bits
|
|
testValueA = *( (cl_long *)sourceA );
|
|
testValueB = *( (cl_long *)sourceB );
|
|
overflow = ( testValueA & 0x1 ) + ( testValueB & 0x1 );
|
|
*( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + ( overflow >> 1 );
|
|
break;
|
|
case kULong:
|
|
// The long way to avoid dropping bits
|
|
uValueA = *( (cl_ulong *)sourceA );
|
|
uValueB = *( (cl_ulong *)sourceB );
|
|
uOverflow = ( uValueA & 0x1 ) + ( uValueB & 0x1 );
|
|
*( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + ( uOverflow >> 1 );
|
|
break;
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_hadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_two_param_integer_fn( queue, context, "hadd", verify_integer_hadd );
|
|
}
|
|
|
|
bool verify_integer_rhadd( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
|
|
{
|
|
cl_long testValueA, testValueB, overflow;
|
|
cl_ulong uValueA, uValueB, uOverflow;
|
|
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
testValueA = *( (cl_char *)sourceA );
|
|
testValueB = *( (cl_char *)sourceB );
|
|
*( (cl_char *)destination ) = (cl_char)( ( testValueA + testValueB + 1 ) >> 1 );
|
|
break;
|
|
case kUChar:
|
|
testValueA = *( (cl_uchar *)sourceA );
|
|
testValueB = *( (cl_uchar *)sourceB );
|
|
*( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA + testValueB + 1 ) >> 1 );
|
|
break;
|
|
case kShort:
|
|
testValueA = *( (cl_short *)sourceA );
|
|
testValueB = *( (cl_short *)sourceB );
|
|
*( (cl_short *)destination ) = (cl_short)( ( testValueA + testValueB + 1 ) >> 1 );
|
|
break;
|
|
case kUShort:
|
|
testValueA = *( (cl_ushort *)sourceA );
|
|
testValueB = *( (cl_ushort *)sourceB );
|
|
*( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA + testValueB + 1 ) >> 1 );
|
|
break;
|
|
case kInt:
|
|
testValueA = *( (cl_int *)sourceA );
|
|
testValueB = *( (cl_int *)sourceB );
|
|
*( (cl_int *)destination ) = (cl_int)( ( testValueA + testValueB + 1 ) >> 1 );
|
|
break;
|
|
case kUInt:
|
|
testValueA = *( (cl_uint *)sourceA );
|
|
testValueB = *( (cl_uint *)sourceB );
|
|
*( (cl_uint *)destination ) = (cl_uint)( ( testValueA + testValueB + 1 ) >> 1 );
|
|
break;
|
|
case kLong:
|
|
// The long way to avoid dropping bits
|
|
testValueA = *( (cl_long *)sourceA );
|
|
testValueB = *( (cl_long *)sourceB );
|
|
overflow = ( testValueA | testValueB ) & 0x1;
|
|
*( (cl_long *)destination ) = ( ( testValueA >> 1 ) + ( testValueB >> 1 ) ) + overflow;
|
|
break;
|
|
case kULong:
|
|
// The long way to avoid dropping bits
|
|
uValueA = *( (cl_ulong *)sourceA );
|
|
uValueB = *( (cl_ulong *)sourceB );
|
|
uOverflow = ( uValueA | uValueB ) & 0x1;
|
|
*( (cl_ulong *)destination ) = ( ( uValueA >> 1 ) + ( uValueB >> 1 ) ) + uOverflow;
|
|
break;
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_rhadd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_two_param_integer_fn( queue, context, "rhadd", verify_integer_rhadd );
|
|
}
|
|
|
|
#define MIN_CASE( type, const ) \
|
|
case const : \
|
|
{ \
|
|
cl_##type valueA = *( (cl_##type *)sourceA ); \
|
|
cl_##type valueB = *( (cl_##type *)sourceB ); \
|
|
*( (cl_##type *)destination ) = (cl_##type)( valueB < valueA ? valueB : valueA ); \
|
|
break; \
|
|
}
|
|
|
|
bool verify_integer_min( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
|
|
{
|
|
switch( vecType )
|
|
{
|
|
MIN_CASE( char, kChar )
|
|
MIN_CASE( uchar, kUChar )
|
|
MIN_CASE( short, kShort )
|
|
MIN_CASE( ushort, kUShort )
|
|
MIN_CASE( int, kInt )
|
|
MIN_CASE( uint, kUInt )
|
|
MIN_CASE( long, kLong )
|
|
MIN_CASE( ulong, kULong )
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_two_param_integer_fn( queue, context, "min", verify_integer_min);
|
|
}
|
|
|
|
#define MAX_CASE( type, const ) \
|
|
case const : \
|
|
{ \
|
|
cl_##type valueA = *( (cl_##type *)sourceA ); \
|
|
cl_##type valueB = *( (cl_##type *)sourceB ); \
|
|
*( (cl_##type *)destination ) = (cl_##type)( valueA < valueB ? valueB : valueA ); \
|
|
break; \
|
|
}
|
|
|
|
bool verify_integer_max( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
|
|
{
|
|
switch( vecType )
|
|
{
|
|
MAX_CASE( char, kChar )
|
|
MAX_CASE( uchar, kUChar )
|
|
MAX_CASE( short, kShort )
|
|
MAX_CASE( ushort, kUShort )
|
|
MAX_CASE( int, kInt )
|
|
MAX_CASE( uint, kUInt )
|
|
MAX_CASE( long, kLong )
|
|
MAX_CASE( ulong, kULong )
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_two_param_integer_fn( queue, context, "max", verify_integer_max );
|
|
}
|
|
|
|
|
|
void multiply_unsigned_64_by_64( cl_ulong sourceA, cl_ulong sourceB, cl_ulong &destLow, cl_ulong &destHi )
|
|
{
|
|
cl_ulong lowA, lowB;
|
|
cl_ulong highA, highB;
|
|
|
|
// Split up the values
|
|
lowA = sourceA & 0xffffffff;
|
|
highA = sourceA >> 32;
|
|
lowB = sourceB & 0xffffffff;
|
|
highB = sourceB >> 32;
|
|
|
|
// Note that, with this split, our multiplication becomes:
|
|
// ( a * b )
|
|
// = ( ( aHI << 32 + aLO ) * ( bHI << 32 + bLO ) ) >> 64
|
|
// = ( ( aHI << 32 * bHI << 32 ) + ( aHI << 32 * bLO ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
|
|
// = ( ( aHI * bHI << 64 ) + ( aHI * bLO << 32 ) + ( aLO * bHI << 32 ) + ( aLO * bLO ) ) >> 64
|
|
// = ( aHI * bHI ) + ( aHI * bLO >> 32 ) + ( aLO * bHI >> 32 ) + ( aLO * bLO >> 64 )
|
|
|
|
// Now, since each value is 32 bits, the max size of any multiplication is:
|
|
// ( 2 ^ 32 - 1 ) * ( 2 ^ 32 - 1 ) = 2^64 - 4^32 + 1 = 2^64 - 2^33 + 1, which fits within 64 bits
|
|
// Which means we can do each component within a 64-bit integer as necessary (each component above marked as AB1 - AB4)
|
|
cl_ulong aHibHi = highA * highB;
|
|
cl_ulong aHibLo = highA * lowB;
|
|
cl_ulong aLobHi = lowA * highB;
|
|
cl_ulong aLobLo = lowA * lowB;
|
|
|
|
// Assemble terms.
|
|
// We note that in certain cases, sums of products cannot overflow:
|
|
//
|
|
// The maximum product of two N-bit unsigned numbers is
|
|
//
|
|
// (2**N-1)^2 = 2**2N - 2**(N+1) + 1
|
|
//
|
|
// We note that we can add the maximum N-bit number to the 2N-bit product twice without overflow:
|
|
//
|
|
// (2**N-1)^2 + 2*(2**N-1) = 2**2N - 2**(N+1) + 1 + 2**(N+1) - 2 = 2**2N - 1
|
|
//
|
|
// If we breakdown the product of two numbers a,b into high and low halves of partial products as follows:
|
|
//
|
|
// a.hi a.lo
|
|
// x b.hi b.lo
|
|
//===============================================================================
|
|
// (b.hi*a.hi).hi (b.hi*a.hi).lo
|
|
// (b.lo*a.hi).hi (b.lo*a.hi).lo
|
|
// (b.hi*a.lo).hi (b.hi*a.lo).lo
|
|
// + (b.lo*a.lo).hi (b.lo*a.lo).lo
|
|
//===============================================================================
|
|
//
|
|
// The (b.lo*a.lo).lo term cannot cause a carry, so we can ignore them for now. We also know from above, that we can add (b.lo*a.lo).hi
|
|
// and (b.hi*a.lo).lo to the 2N bit term [(b.lo*a.hi).hi + (b.lo*a.hi).lo] without overflow. That takes care of all of the terms
|
|
// on the right half that might carry. Do that now.
|
|
//
|
|
cl_ulong aLobLoHi = aLobLo >> 32;
|
|
cl_ulong aLobHiLo = aLobHi & 0xFFFFFFFFULL;
|
|
aHibLo += aLobLoHi + aLobHiLo;
|
|
|
|
// That leaves us with these terms:
|
|
//
|
|
// a.hi a.lo
|
|
// x b.hi b.lo
|
|
//===============================================================================
|
|
// (b.hi*a.hi).hi (b.hi*a.hi).lo
|
|
// (b.hi*a.lo).hi
|
|
// [ (b.lo*a.hi).hi + (b.lo*a.hi).lo + other ]
|
|
// + (b.lo*a.lo).lo
|
|
//===============================================================================
|
|
|
|
// All of the overflow potential from the right half has now been accumulated into the [ (b.lo*a.hi).hi + (b.lo*a.hi).lo ] 2N bit term.
|
|
// We can safely separate into high and low parts. Per our rule above, we know we can accumulate the high part of that and (b.hi*a.lo).hi
|
|
// into the 2N bit term (b.lo*a.hi) without carry. The low part can be pieced together with (b.lo*a.lo).lo, to give the final low result
|
|
|
|
destHi = aHibHi + (aHibLo >> 32 ) + (aLobHi >> 32); // Cant overflow
|
|
destLow = (aHibLo << 32) | ( aLobLo & 0xFFFFFFFFULL );
|
|
}
|
|
|
|
void multiply_signed_64_by_64( cl_long sourceA, cl_long sourceB, cl_ulong &destLow, cl_long &destHi )
|
|
{
|
|
// Find sign of result
|
|
cl_long aSign = sourceA >> 63;
|
|
cl_long bSign = sourceB >> 63;
|
|
cl_long resultSign = aSign ^ bSign;
|
|
|
|
// take absolute values of the argument
|
|
sourceA = (sourceA ^ aSign) - aSign;
|
|
sourceB = (sourceB ^ bSign) - bSign;
|
|
|
|
cl_ulong hi;
|
|
multiply_unsigned_64_by_64( (cl_ulong) sourceA, (cl_ulong) sourceB, destLow, hi );
|
|
|
|
// Fix the sign
|
|
if( resultSign )
|
|
{
|
|
destLow ^= resultSign;
|
|
hi ^= resultSign;
|
|
destLow -= resultSign;
|
|
|
|
//carry if necessary
|
|
if( 0 == destLow )
|
|
hi -= resultSign;
|
|
}
|
|
|
|
destHi = (cl_long) hi;
|
|
}
|
|
|
|
bool verify_integer_mul_hi( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
|
|
{
|
|
cl_long testValueA, testValueB, highSigned;
|
|
cl_ulong highUnsigned, lowHalf;
|
|
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
testValueA = *( (cl_char *)sourceA );
|
|
testValueB = *( (cl_char *)sourceB );
|
|
*( (cl_char *)destination ) = (cl_char)( ( testValueA * testValueB ) >> 8 );
|
|
break;
|
|
case kUChar:
|
|
testValueA = *( (cl_uchar *)sourceA );
|
|
testValueB = *( (cl_uchar *)sourceB );
|
|
*( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA * testValueB ) >> 8 );
|
|
break;
|
|
case kShort:
|
|
testValueA = *( (cl_short *)sourceA );
|
|
testValueB = *( (cl_short *)sourceB );
|
|
*( (cl_short *)destination ) = (cl_short)( ( testValueA * testValueB ) >> 16 );
|
|
break;
|
|
case kUShort:
|
|
testValueA = *( (cl_ushort *)sourceA );
|
|
testValueB = *( (cl_ushort *)sourceB );
|
|
*( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA * testValueB ) >> 16 );
|
|
break;
|
|
case kInt:
|
|
testValueA = *( (cl_int *)sourceA );
|
|
testValueB = *( (cl_int *)sourceB );
|
|
*( (cl_int *)destination ) = (cl_int)( ( testValueA * testValueB ) >> 32 );
|
|
break;
|
|
case kUInt:
|
|
testValueA = *( (cl_uint *)sourceA );
|
|
testValueB = *( (cl_uint *)sourceB );
|
|
*( (cl_uint *)destination ) = (cl_uint)( ( testValueA * testValueB ) >> 32 );
|
|
break;
|
|
case kLong:
|
|
testValueA = *( (cl_long *)sourceA );
|
|
testValueB = *( (cl_long *)sourceB );
|
|
|
|
multiply_signed_64_by_64( testValueA, testValueB, lowHalf, highSigned );
|
|
*( (cl_long *)destination ) = highSigned;
|
|
break;
|
|
case kULong:
|
|
testValueA = *( (cl_ulong *)sourceA );
|
|
testValueB = *( (cl_ulong *)sourceB );
|
|
|
|
multiply_unsigned_64_by_64( testValueA, testValueB, lowHalf, highUnsigned );
|
|
*( (cl_ulong *)destination ) = highUnsigned;
|
|
break;
|
|
default:
|
|
// Should never happen
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_mul_hi(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_two_param_integer_fn( queue, context, "mul_hi", verify_integer_mul_hi );
|
|
}
|
|
|
|
bool verify_integer_rotate( void *sourceA, void *sourceB, void *destination, ExplicitType vecType )
|
|
{
|
|
cl_ulong testValueA;
|
|
char numBits;
|
|
|
|
switch( vecType )
|
|
{
|
|
case kChar:
|
|
case kUChar:
|
|
testValueA = *( (cl_uchar *)sourceA );
|
|
numBits = *( (cl_uchar *)sourceB );
|
|
numBits &= 7;
|
|
if ( numBits == 0 )
|
|
*( (cl_uchar *)destination ) = (cl_uchar)testValueA;
|
|
else
|
|
*( (cl_uchar *)destination ) = (cl_uchar)( ( testValueA << numBits ) | ( testValueA >> ( 8 - numBits ) ) );
|
|
break;
|
|
case kShort:
|
|
case kUShort:
|
|
testValueA = *( (cl_ushort *)sourceA );
|
|
numBits = *( (cl_ushort *)sourceB );
|
|
numBits &= 15;
|
|
if ( numBits == 0 )
|
|
*( (cl_ushort *)destination ) = (cl_ushort)testValueA;
|
|
else
|
|
*( (cl_ushort *)destination ) = (cl_ushort)( ( testValueA << numBits ) | ( testValueA >> ( 16 - numBits ) ) );
|
|
break;
|
|
case kInt:
|
|
case kUInt:
|
|
testValueA = *( (cl_uint *)sourceA );
|
|
numBits = *( (cl_uint *)sourceB );
|
|
numBits &= 31;
|
|
if ( numBits == 0 )
|
|
*( (cl_uint *)destination ) = (cl_uint) testValueA;
|
|
else
|
|
*( (cl_uint *)destination ) = (cl_uint)( ( testValueA << numBits ) | ( testValueA >> ( 32 - numBits ) ) );
|
|
break;
|
|
case kLong:
|
|
case kULong:
|
|
testValueA = *( (cl_ulong *)sourceA );
|
|
numBits = *( (cl_ulong *)sourceB );
|
|
numBits &= 63;
|
|
if ( numBits == 0 )
|
|
*( (cl_ulong *)destination ) = (cl_ulong)testValueA;
|
|
else
|
|
*( (cl_ulong *)destination ) = (cl_ulong)( ( testValueA << numBits ) | ( testValueA >> ( 64 - numBits ) ) );
|
|
break;
|
|
default:
|
|
// Should never happen
|
|
log_error( "Unknown type encountered in verify_integer_rotate. Test failed. Aborting...\n" );
|
|
abort();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_rotate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_two_param_integer_fn( queue, context, "rotate", verify_integer_rotate );
|
|
}
|
|
|
|
const char *threeParamIntegerKernelSourcePattern =
|
|
"__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" %s%s sA = %s;\n"
|
|
" %s%s sB = %s;\n"
|
|
" %s%s sC = %s;\n"
|
|
" %s%s dst = %s( sA, sB, sC );\n"
|
|
" %s;\n"
|
|
"\n"
|
|
"}\n";
|
|
|
|
typedef bool (*threeParamIntegerVerifyFn)( void *sourceA, void *sourceB, void *sourceC, void *destination,
|
|
ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType );
|
|
|
|
int test_three_param_integer_kernel(cl_command_queue queue, cl_context context, const char *fnName,
|
|
ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType,
|
|
unsigned int vecSize, threeParamIntegerVerifyFn verifyFn, MTdata d )
|
|
{
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper streams[4];
|
|
cl_long inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16], inDataC[TEST_SIZE * 16], outData[TEST_SIZE * 16], expected;
|
|
int error, i;
|
|
size_t threads[1], localThreads[1];
|
|
char kernelSource[10240];
|
|
char *programPtr;
|
|
char sizeName[4], paramSizeName[4];
|
|
|
|
if (! gHasLong && strstr(get_explicit_type_name(vecAType),"long"))
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", get_explicit_type_name(vecAType) );
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
|
|
/* Create the source */
|
|
if( vecSize == 1 )
|
|
sizeName[ 0 ] = 0;
|
|
else
|
|
sprintf( sizeName, "%d", vecSize );
|
|
if( ( vecSize == 1 ) || ( vecSize == 3 ) )
|
|
paramSizeName[ 0 ] = 0;
|
|
else
|
|
sprintf( paramSizeName, "%d", vecSize );
|
|
|
|
char sourceALoad[ 128 ], sourceBLoad[ 128 ], sourceCLoad[ 128 ], destStore[ 128 ];
|
|
|
|
sprintf( kernelSource, threeParamIntegerKernelSourcePattern,
|
|
get_explicit_type_name( vecAType ), paramSizeName,
|
|
get_explicit_type_name( vecBType ), paramSizeName,
|
|
get_explicit_type_name( vecCType ), paramSizeName,
|
|
get_explicit_type_name( destType ), paramSizeName,
|
|
get_explicit_type_name( vecAType ), sizeName, build_load_statement( sourceALoad, (size_t)vecSize, "sourceA" ),
|
|
get_explicit_type_name( vecBType ), sizeName, build_load_statement( sourceBLoad, (size_t)vecSize, "sourceB" ),
|
|
get_explicit_type_name( vecCType ), sizeName, build_load_statement( sourceCLoad, (size_t)vecSize, "sourceC" ),
|
|
get_explicit_type_name( destType ), sizeName,
|
|
fnName,
|
|
build_store_statement( destStore, (size_t)vecSize, "destValues", "dst" )
|
|
);
|
|
|
|
/* Create kernels */
|
|
programPtr = kernelSource;
|
|
if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
|
|
{
|
|
log_error("The program we attempted to compile was: \n%s\n", kernelSource);
|
|
return -1;
|
|
}
|
|
|
|
/* Generate some streams */
|
|
generate_random_data( vecAType, vecSize * TEST_SIZE, d, inDataA );
|
|
generate_random_data( vecBType, vecSize * TEST_SIZE, d, inDataB );
|
|
generate_random_data( vecCType, vecSize * TEST_SIZE, d, inDataC );
|
|
|
|
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecAType ) * vecSize * TEST_SIZE, &inDataA, NULL);
|
|
if( streams[0] == NULL )
|
|
{
|
|
log_error("ERROR: Creating input array A failed!\n");
|
|
return -1;
|
|
}
|
|
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecBType ) * vecSize * TEST_SIZE, &inDataB, NULL);
|
|
if( streams[1] == NULL )
|
|
{
|
|
log_error("ERROR: Creating input array B failed!\n");
|
|
return -1;
|
|
}
|
|
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), get_explicit_type_size( vecCType ) * vecSize * TEST_SIZE, &inDataC, NULL);
|
|
if( streams[2] == NULL )
|
|
{
|
|
log_error("ERROR: Creating input array C failed!\n");
|
|
return -1;
|
|
}
|
|
streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( destType ) * vecSize * TEST_SIZE, NULL, NULL );
|
|
if( streams[3] == NULL )
|
|
{
|
|
log_error("ERROR: Creating output array failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Assign streams and execute */
|
|
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
|
|
/* Run the kernel */
|
|
threads[0] = TEST_SIZE;
|
|
|
|
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
|
test_error( error, "Unable to get work group size to use" );
|
|
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
memset(outData, 0xFF, get_explicit_type_size( destType ) * TEST_SIZE * vecSize);
|
|
|
|
/* Now get the results */
|
|
error = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, get_explicit_type_size( destType ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
|
|
test_error( error, "Unable to read output array!" );
|
|
|
|
/* And verify! */
|
|
char *inA = (char *)inDataA;
|
|
char *inB = (char *)inDataB;
|
|
char *inC = (char *)inDataC;
|
|
char *out = (char *)outData;
|
|
for( i = 0; i < (int)TEST_SIZE; i++ )
|
|
{
|
|
for( size_t j = 0; j < vecSize; j++ )
|
|
{
|
|
bool test = verifyFn( inA, inB, inC, &expected, vecAType, vecBType, vecCType, destType );
|
|
if( test && ( memcmp( &expected, out, get_explicit_type_size( destType ) ) != 0 ) )
|
|
{
|
|
switch( get_explicit_type_size( vecAType ))
|
|
{
|
|
case 1:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%2.2x), got (0x%2.2x), sources (0x%2.2x, 0x%2.2x, 0x%2.2x)\n",
|
|
(int)i, (int)j, ((cl_uchar*)&expected)[ 0 ], *( (cl_uchar *)out ),
|
|
*( (cl_uchar *)inA ),
|
|
*( (cl_uchar *)inB ),
|
|
*( (cl_uchar *)inC ) );
|
|
break;
|
|
|
|
case 2:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%4.4x), got (0x%4.4x), sources (0x%4.4x, 0x%4.4x, 0x%4.4x)\n",
|
|
(int)i, (int)j, ((cl_ushort*)&expected)[ 0 ], *( (cl_ushort *)out ),
|
|
*( (cl_ushort *)inA ),
|
|
*( (cl_ushort *)inB ),
|
|
*( (cl_ushort *)inC ) );
|
|
break;
|
|
|
|
case 4:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%8.8x), got (0x%8.8x), sources (0x%8.8x, 0x%8.8x, 0x%8.8x)\n",
|
|
(int)i, (int)j, ((cl_uint*)&expected)[ 0 ], *( (cl_uint *)out ),
|
|
*( (cl_uint *)inA ),
|
|
*( (cl_uint *)inB ),
|
|
*( (cl_uint *)inC ) );
|
|
break;
|
|
|
|
case 8:
|
|
log_error( "ERROR: Data sample %d:%d does not validate! Expected (0x%16.16llx), got (0x%16.16llx), sources (0x%16.16llx, 0x%16.16llx, 0x%16.16llx)\n",
|
|
(int)i, (int)j, ((cl_ulong*)&expected)[ 0 ], *( (cl_ulong *)out ),
|
|
*( (cl_ulong *)inA ),
|
|
*( (cl_ulong *)inB ),
|
|
*( (cl_ulong *)inC ) );
|
|
break;
|
|
}
|
|
return -1;
|
|
}
|
|
inA += get_explicit_type_size( vecAType );
|
|
inB += get_explicit_type_size( vecBType );
|
|
inC += get_explicit_type_size( vecCType );
|
|
out += get_explicit_type_size( destType );
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_three_param_integer_fn(cl_command_queue queue, cl_context context, const char *fnName, threeParamIntegerVerifyFn verifyFn)
|
|
{
|
|
ExplicitType types[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kNumExplicitTypes };
|
|
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
|
|
unsigned int index, typeAIndex;
|
|
int retVal = 0;
|
|
RandomSeed seed(gRandomSeed);
|
|
|
|
for( typeAIndex = 0; types[ typeAIndex ] != kNumExplicitTypes; typeAIndex++ )
|
|
{
|
|
for( index = 0; vecSizes[ index ] != 0; index++ )
|
|
{
|
|
if( test_three_param_integer_kernel(queue, context, fnName, types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], types[ typeAIndex ], vecSizes[ index ], verifyFn, seed ) != 0 )
|
|
{
|
|
log_error( " Vector %s%d,%s%d,%s%d FAILED\n", get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ],
|
|
get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] ,
|
|
get_explicit_type_name( types[ typeAIndex ] ), vecSizes[ index ] );
|
|
retVal = -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return retVal;
|
|
}
|
|
|
|
bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *destination,
|
|
ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
|
|
{
|
|
if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
|
|
{
|
|
cl_ulong valueA, valueB, valueC;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kULong:
|
|
valueA = ((cl_ulong*) sourceA)[0];
|
|
valueB = ((cl_ulong*) sourceB)[0];
|
|
valueC = ((cl_ulong*) sourceC)[0];
|
|
break;
|
|
case kUInt:
|
|
valueA = ((cl_uint*) sourceA)[0];
|
|
valueB = ((cl_uint*) sourceB)[0];
|
|
valueC = ((cl_uint*) sourceC)[0];
|
|
break;
|
|
case kUShort:
|
|
valueA = ((cl_ushort*) sourceA)[0];
|
|
valueB = ((cl_ushort*) sourceB)[0];
|
|
valueC = ((cl_ushort*) sourceC)[0];
|
|
break;
|
|
case kUChar:
|
|
valueA = ((cl_uchar*) sourceA)[0];
|
|
valueB = ((cl_uchar*) sourceB)[0];
|
|
valueC = ((cl_uchar*) sourceC)[0];
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
|
|
if(valueB > valueC) {
|
|
return false; // results are undefined : let expected alone.
|
|
}
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kULong:
|
|
((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
|
|
break;
|
|
case kUInt:
|
|
((cl_uint*) destination)[0] = (cl_uint)
|
|
(MAX(MIN(valueA, valueC), valueB));
|
|
break;
|
|
case kUShort:
|
|
((cl_ushort*) destination)[0] = (cl_ushort)
|
|
(MAX(MIN(valueA, valueC), valueB));
|
|
break;
|
|
case kUChar:
|
|
((cl_uchar*) destination)[0] = (cl_uchar)
|
|
(MAX(MIN(valueA, valueC), valueB));
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
else
|
|
{
|
|
cl_long valueA, valueB, valueC;
|
|
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kLong:
|
|
valueA = ((cl_long*) sourceA)[0];
|
|
valueB = ((cl_long*) sourceB)[0];
|
|
valueC = ((cl_long*) sourceC)[0];
|
|
break;
|
|
case kInt:
|
|
valueA = ((cl_int*) sourceA)[0];
|
|
valueB = ((cl_int*) sourceB)[0];
|
|
valueC = ((cl_int*) sourceC)[0];
|
|
break;
|
|
case kShort:
|
|
valueA = ((cl_short*) sourceA)[0];
|
|
valueB = ((cl_short*) sourceB)[0];
|
|
valueC = ((cl_short*) sourceC)[0];
|
|
break;
|
|
case kChar:
|
|
valueA = ((cl_char*) sourceA)[0];
|
|
valueB = ((cl_char*) sourceB)[0];
|
|
valueC = ((cl_char*) sourceC)[0];
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
if(valueB > valueC) {
|
|
return false; // undefined behavior : leave "expected" alone
|
|
}
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kLong:
|
|
((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
|
|
break;
|
|
case kInt:
|
|
((cl_int*) destination)[0] = (cl_int)
|
|
(MAX(MIN(valueA, valueC), valueB));
|
|
break;
|
|
case kShort:
|
|
((cl_short*) destination)[0] = (cl_short)
|
|
(MAX(MIN(valueA, valueC), valueB));
|
|
break;
|
|
case kChar:
|
|
((cl_char*) destination)[0] = (cl_char)
|
|
(MAX(MIN(valueA, valueC), valueB));
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_clamp(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_three_param_integer_fn( queue, context, "clamp", verify_integer_clamp );
|
|
}
|
|
|
|
bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *destination,
|
|
ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
|
|
{
|
|
if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
|
|
{
|
|
cl_ulong valueA, valueB, valueC;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kULong:
|
|
valueA = ((cl_ulong*) sourceA)[0];
|
|
valueB = ((cl_ulong*) sourceB)[0];
|
|
valueC = ((cl_ulong*) sourceC)[0];
|
|
break;
|
|
case kUInt:
|
|
valueA = ((cl_uint*) sourceA)[0];
|
|
valueB = ((cl_uint*) sourceB)[0];
|
|
valueC = ((cl_uint*) sourceC)[0];
|
|
break;
|
|
case kUShort:
|
|
valueA = ((cl_ushort*) sourceA)[0];
|
|
valueB = ((cl_ushort*) sourceB)[0];
|
|
valueC = ((cl_ushort*) sourceC)[0];
|
|
break;
|
|
case kUChar:
|
|
valueA = ((cl_uchar*) sourceA)[0];
|
|
valueB = ((cl_uchar*) sourceB)[0];
|
|
valueC = ((cl_uchar*) sourceC)[0];
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
cl_ulong multHi, multLo;
|
|
multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
|
|
|
|
multLo += valueC;
|
|
multHi += multLo < valueC; // carry if overflow
|
|
if( multHi )
|
|
multLo = 0xFFFFFFFFFFFFFFFFULL;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kULong:
|
|
((cl_ulong*) destination)[0] = multLo;
|
|
break;
|
|
case kUInt:
|
|
((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX );
|
|
break;
|
|
case kUShort:
|
|
((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX );
|
|
break;
|
|
case kUChar:
|
|
((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX );
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl_long valueA, valueB, valueC;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kLong:
|
|
valueA = ((cl_long*) sourceA)[0];
|
|
valueB = ((cl_long*) sourceB)[0];
|
|
valueC = ((cl_long*) sourceC)[0];
|
|
break;
|
|
case kInt:
|
|
valueA = ((cl_int*) sourceA)[0];
|
|
valueB = ((cl_int*) sourceB)[0];
|
|
valueC = ((cl_int*) sourceC)[0];
|
|
break;
|
|
case kShort:
|
|
valueA = ((cl_short*) sourceA)[0];
|
|
valueB = ((cl_short*) sourceB)[0];
|
|
valueC = ((cl_short*) sourceC)[0];
|
|
break;
|
|
case kChar:
|
|
valueA = ((cl_char*) sourceA)[0];
|
|
valueB = ((cl_char*) sourceB)[0];
|
|
valueC = ((cl_char*) sourceC)[0];
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
cl_long multHi;
|
|
cl_ulong multLo;
|
|
multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
|
|
|
|
cl_ulong sum = multLo + valueC;
|
|
// carry if overflow
|
|
if( valueC >= 0 )
|
|
{
|
|
if( multLo > sum )
|
|
{
|
|
multHi++;
|
|
if( CL_LONG_MIN == multHi )
|
|
{
|
|
multHi = CL_LONG_MAX;
|
|
sum = CL_ULONG_MAX;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( multLo < sum )
|
|
{
|
|
multHi--;
|
|
if( CL_LONG_MAX == multHi )
|
|
{
|
|
multHi = CL_LONG_MIN;
|
|
sum = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
// saturate
|
|
if( multHi > 0 )
|
|
sum = CL_LONG_MAX;
|
|
else if( multHi < -1 )
|
|
sum = CL_LONG_MIN;
|
|
cl_long result = (cl_long) sum;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kLong:
|
|
((cl_long*) destination)[0] = result;
|
|
break;
|
|
case kInt:
|
|
result = MIN( result, (cl_long) CL_INT_MAX );
|
|
result = MAX( result, (cl_long) CL_INT_MIN );
|
|
((cl_int*) destination)[0] = (cl_int) result;
|
|
break;
|
|
case kShort:
|
|
result = MIN( result, (cl_long) CL_SHRT_MAX );
|
|
result = MAX( result, (cl_long) CL_SHRT_MIN );
|
|
((cl_short*) destination)[0] = (cl_short) result;
|
|
break;
|
|
case kChar:
|
|
result = MIN( result, (cl_long) CL_CHAR_MAX );
|
|
result = MAX( result, (cl_long) CL_CHAR_MIN );
|
|
((cl_char*) destination)[0] = (cl_char) result;
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_mad_sat(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_three_param_integer_fn( queue, context, "mad_sat", verify_integer_mad_sat );
|
|
}
|
|
|
|
bool verify_integer_mad_hi( void *sourceA, void *sourceB, void *sourceC, void *destination,
|
|
ExplicitType vecAType, ExplicitType vecBType, ExplicitType vecCType, ExplicitType destType )
|
|
{
|
|
if( vecAType == kULong || vecAType == kUInt || vecAType == kUShort || vecAType == kUChar )
|
|
{
|
|
cl_ulong valueA, valueB, valueC;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kULong:
|
|
valueA = ((cl_ulong*) sourceA)[0];
|
|
valueB = ((cl_ulong*) sourceB)[0];
|
|
valueC = ((cl_ulong*) sourceC)[0];
|
|
break;
|
|
case kUInt:
|
|
valueA = ((cl_uint*) sourceA)[0];
|
|
valueB = ((cl_uint*) sourceB)[0];
|
|
valueC = ((cl_uint*) sourceC)[0];
|
|
break;
|
|
case kUShort:
|
|
valueA = ((cl_ushort*) sourceA)[0];
|
|
valueB = ((cl_ushort*) sourceB)[0];
|
|
valueC = ((cl_ushort*) sourceC)[0];
|
|
break;
|
|
case kUChar:
|
|
valueA = ((cl_uchar*) sourceA)[0];
|
|
valueB = ((cl_uchar*) sourceB)[0];
|
|
valueC = ((cl_uchar*) sourceC)[0];
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
cl_ulong multHi, multLo;
|
|
multiply_unsigned_64_by_64( valueA, valueB, multLo, multHi );
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kULong:
|
|
((cl_ulong*) destination)[0] = multHi + valueC;
|
|
break;
|
|
case kUInt:
|
|
((cl_uint*) destination)[0] = (cl_uint) (( multLo >> 32) + valueC );
|
|
break;
|
|
case kUShort:
|
|
((cl_ushort*) destination)[0] = (cl_ushort) (( multLo >> 16) + valueC );
|
|
break;
|
|
case kUChar:
|
|
((cl_uchar*) destination)[0] = (cl_uchar) (( multLo >> 8) + valueC );
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl_long valueA, valueB, valueC;
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kLong:
|
|
valueA = ((cl_long*) sourceA)[0];
|
|
valueB = ((cl_long*) sourceB)[0];
|
|
valueC = ((cl_long*) sourceC)[0];
|
|
break;
|
|
case kInt:
|
|
valueA = ((cl_int*) sourceA)[0];
|
|
valueB = ((cl_int*) sourceB)[0];
|
|
valueC = ((cl_int*) sourceC)[0];
|
|
break;
|
|
case kShort:
|
|
valueA = ((cl_short*) sourceA)[0];
|
|
valueB = ((cl_short*) sourceB)[0];
|
|
valueC = ((cl_short*) sourceC)[0];
|
|
break;
|
|
case kChar:
|
|
valueA = ((cl_char*) sourceA)[0];
|
|
valueB = ((cl_char*) sourceB)[0];
|
|
valueC = ((cl_char*) sourceC)[0];
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
cl_long multHi;
|
|
cl_ulong multLo;
|
|
multiply_signed_64_by_64( valueA, valueB, multLo, multHi );
|
|
|
|
switch( vecAType )
|
|
{
|
|
case kLong:
|
|
((cl_long*) destination)[0] = multHi + valueC;
|
|
break;
|
|
case kInt:
|
|
((cl_int*) destination)[0] = (cl_int) ((multLo >> 32) + valueC);
|
|
break;
|
|
case kShort:
|
|
((cl_short*) destination)[0] = (cl_int) ((multLo >> 16) + valueC);
|
|
break;
|
|
case kChar:
|
|
((cl_char*) destination)[0] = (cl_char) (cl_int) ((multLo >> 8) + valueC);
|
|
break;
|
|
default:
|
|
//error -- should never get here
|
|
abort();
|
|
break;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int test_integer_mad_hi( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_three_param_integer_fn( queue, context, "mad_hi", verify_integer_mad_hi );
|
|
}
|
|
|
|
|