mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
1125 lines
42 KiB
C++
1125 lines
42 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
#include "../../test_common/harness/conversions.h"
|
|
#ifndef _WIN32
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#define INT_TEST_VALUE 402258822
|
|
#define LONG_TEST_VALUE 515154531254381446LL
|
|
|
|
|
|
extern cl_uint gRandomSeed;
|
|
|
|
const char *atomic_global_pattern[] = {
|
|
"__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
,
|
|
"\n"
|
|
"}\n" };
|
|
|
|
const char *atomic_local_pattern[] = {
|
|
"__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" int dstItemIdx;\n"
|
|
"\n"
|
|
" // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
|
|
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
|
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
|
|
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
|
"\n"
|
|
,
|
|
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
|
" // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
|
|
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
|
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
|
|
"}\n" };
|
|
|
|
|
|
#define TEST_COUNT 128 * 1024
|
|
|
|
|
|
struct TestFns
|
|
{
|
|
cl_int mIntStartValue;
|
|
cl_long mLongStartValue;
|
|
|
|
size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
|
|
|
|
// Integer versions
|
|
cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
|
|
void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
|
|
bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
|
|
|
|
// Long versions
|
|
cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
|
|
void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
|
|
bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
|
|
|
|
// Float versions
|
|
cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
|
|
void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
|
|
bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
|
|
};
|
|
|
|
bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
|
|
{
|
|
const char *extensionNames[8] = {
|
|
"cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
|
|
"cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
|
|
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
|
|
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
|
|
};
|
|
|
|
size_t index = 0;
|
|
if( extended )
|
|
index += 1;
|
|
if( isLocal )
|
|
index += 2;
|
|
|
|
size_t major, minor;
|
|
|
|
int error = get_device_version(device, &major, &minor);
|
|
test_error( error, "get_device_version" );
|
|
|
|
switch (dataType)
|
|
{
|
|
case kInt:
|
|
case kUInt:
|
|
if( major * 10 + minor >= 11 )
|
|
return 1;
|
|
break;
|
|
case kLong:
|
|
case kULong:
|
|
index += 4;
|
|
break;
|
|
case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
|
|
return major * 10 + minor >= 11;
|
|
default:
|
|
log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
|
|
return 0;
|
|
}
|
|
|
|
return is_extension_available( device, extensionNames[index] );
|
|
}
|
|
|
|
int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programCore,
|
|
TestFns testFns,
|
|
bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
|
|
{
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
int error;
|
|
size_t threads[1];
|
|
clMemWrapper streams[2];
|
|
void *refValues, *startRefValues;
|
|
size_t threadSize, groupSize;
|
|
const char *programLines[4];
|
|
char pragma[ 512 ];
|
|
char programHeader[ 512 ];
|
|
MTdata d;
|
|
size_t typeSize = get_explicit_type_size( dataType );
|
|
|
|
|
|
// Verify we can run first
|
|
bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
|
|
if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
|
|
{
|
|
// Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
|
|
if( dataType == kFloat )
|
|
log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
|
|
else if( !isUnsigned )
|
|
log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
|
|
// Since we don't support the operation, they implicitly pass
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
if( dataType == kFloat )
|
|
log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
|
|
else
|
|
log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
|
|
(int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
|
|
}
|
|
|
|
//// Set up the kernel code
|
|
|
|
// Create the pragma line for this kernel
|
|
bool isLong = ( dataType == kLong || dataType == kULong );
|
|
sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
|
|
isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
|
|
extended ? "extended" : "base" );
|
|
|
|
// Now create the program header
|
|
const char *typeName = get_explicit_type_name( dataType );
|
|
if( isLocal )
|
|
sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
|
|
else
|
|
sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
|
|
|
|
// Set up our entire program now
|
|
programLines[ 0 ] = pragma;
|
|
programLines[ 1 ] = programHeader;
|
|
programLines[ 2 ] = programCore;
|
|
programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
|
|
|
|
if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
//// Set up to actually run
|
|
threadSize = 1024;
|
|
if( threadSize == 0 )
|
|
threadSize = TEST_COUNT;
|
|
|
|
error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
|
|
test_error( error, "Unable to get thread group max size" );
|
|
|
|
if( matchGroupSize )
|
|
// HACK because xchg and cmpxchg apparently are limited by hardware
|
|
threadSize = groupSize;
|
|
|
|
if( isLocal )
|
|
{
|
|
size_t workSize;
|
|
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
|
|
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
|
|
|
threadSize = groupSize = workSize;
|
|
}
|
|
|
|
|
|
log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
|
|
|
|
refValues = (cl_int *)malloc( typeSize * threadSize );
|
|
|
|
if( testFns.GenerateRefsIntFn != NULL )
|
|
{
|
|
// We have a ref generator provided
|
|
d = init_genrand( gRandomSeed );
|
|
startRefValues = malloc( typeSize * threadSize );
|
|
if( typeSize == 4 )
|
|
testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
|
|
else
|
|
testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
|
|
free_mtdata(d);
|
|
d = NULL;
|
|
}
|
|
else
|
|
startRefValues = NULL;
|
|
|
|
// If we're given a num_results function, we need to determine how many result objects we need. If
|
|
// we don't have it, we assume it's just 1
|
|
size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
|
|
|
|
char * destItems = new char[ typeSize * numDestItems ];
|
|
if( destItems == NULL )
|
|
{
|
|
log_error( "ERROR: Unable to allocate memory!\n" );
|
|
return -1;
|
|
}
|
|
void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
|
|
for( size_t i = 0; i < numDestItems; i++ )
|
|
memcpy( destItems + i * typeSize, startValue, typeSize );
|
|
|
|
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), typeSize * numDestItems, destItems, NULL);
|
|
if (!streams[0])
|
|
{
|
|
log_error("ERROR: Creating output array failed!\n");
|
|
return -1;
|
|
}
|
|
streams[1] = clCreateBuffer(context, (cl_mem_flags)(( startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE )), typeSize * threadSize, startRefValues, NULL);
|
|
if (!streams[1])
|
|
{
|
|
log_error("ERROR: Creating reference array failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Set the arguments */
|
|
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
|
|
if( isLocal )
|
|
{
|
|
error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
|
|
test_error( error, "Unable to set indexed local kernel argument" );
|
|
|
|
cl_int numDestItemsInt = (cl_int)numDestItems;
|
|
error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
|
|
test_error( error, "Unable to set indexed kernel argument" );
|
|
}
|
|
|
|
/* Run the kernel */
|
|
threads[0] = threadSize;
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
|
|
test_error( error, "Unable to read result value!" );
|
|
|
|
error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
|
|
test_error( error, "Unable to read reference values!" );
|
|
|
|
// If we have an expectedFn, then we need to generate a final value to compare against. If we don't
|
|
// have one, it's because we're comparing ref values only
|
|
if( testFns.ExpectedValueIntFn != NULL )
|
|
{
|
|
for( size_t i = 0; i < numDestItems; i++ )
|
|
{
|
|
char expected[ 8 ];
|
|
cl_int intVal;
|
|
cl_long longVal;
|
|
if( typeSize == 4 )
|
|
{
|
|
// Int version
|
|
intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
|
|
memcpy( expected, &intVal, sizeof( intVal ) );
|
|
}
|
|
else
|
|
{
|
|
// Long version
|
|
longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
|
|
memcpy( expected, &longVal, sizeof( longVal ) );
|
|
}
|
|
|
|
if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
|
|
{
|
|
if( typeSize == 4 )
|
|
{
|
|
cl_int *outValue = (cl_int *)( destItems + i * typeSize );
|
|
log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
|
|
cl_int *startRefs = (cl_int *)startRefValues;
|
|
cl_int *refs = (cl_int *)refValues;
|
|
for( i = 0; i < threadSize; i++ )
|
|
{
|
|
if( startRefs != NULL )
|
|
log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
|
|
else
|
|
log_info( " --- %ld --- %d\n", i, refs[i] );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl_long *outValue = (cl_long *)( destItems + i * typeSize );
|
|
log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
|
|
cl_long *startRefs = (cl_long *)startRefValues;
|
|
cl_long *refs = (cl_long *)refValues;
|
|
for( i = 0; i < threadSize; i++ )
|
|
{
|
|
if( startRefs != NULL )
|
|
log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
|
|
else
|
|
log_info( " --- %ld --- %lld\n", i, refs[i] );
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( testFns.VerifyRefsIntFn != NULL )
|
|
{
|
|
/* Use the verify function to also check the results */
|
|
if( dataType == kFloat )
|
|
{
|
|
cl_float *outValue = (cl_float *)destItems;
|
|
if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
|
|
{
|
|
log_error( "ERROR: Reference values did not validate!\n" );
|
|
return -1;
|
|
}
|
|
}
|
|
else if( typeSize == 4 )
|
|
{
|
|
cl_int *outValue = (cl_int *)destItems;
|
|
if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
|
|
{
|
|
log_error( "ERROR: Reference values did not validate!\n" );
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl_long *outValue = (cl_long *)destItems;
|
|
if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
|
|
{
|
|
log_error( "ERROR: Reference values did not validate!\n" );
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
else if( testFns.ExpectedValueIntFn == NULL )
|
|
{
|
|
log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
|
|
return -1;
|
|
}
|
|
|
|
|
|
/* Re-write the starting value */
|
|
for( size_t i = 0; i < numDestItems; i++ )
|
|
memcpy( destItems + i * typeSize, startValue, typeSize );
|
|
error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
|
|
test_error( error, "Unable to write starting values!" );
|
|
|
|
/* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
|
|
threads[0] = 1;
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
|
|
test_error( error, "Unable to read reference values!" );
|
|
|
|
if( memcmp( refValues, destItems, typeSize ) != 0 )
|
|
{
|
|
if( typeSize == 4 )
|
|
{
|
|
cl_int *s = (cl_int *)destItems;
|
|
cl_int *r = (cl_int *)refValues;
|
|
log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
|
|
" (should have been %d, returned %d)!\n", *s, *r );
|
|
}
|
|
else
|
|
{
|
|
cl_long *s = (cl_long *)destItems;
|
|
cl_long *r = (cl_long *)refValues;
|
|
log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
|
|
" (should have been %lld, returned %lld)!\n", *s, *r );
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
delete [] destItems;
|
|
free( refValues );
|
|
if( startRefValues != NULL )
|
|
free( startRefValues );
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programCore,
|
|
TestFns testFns,
|
|
bool extended, bool matchGroupSize, bool usingAtomicPrefix )
|
|
{
|
|
log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
|
|
|
|
int errors = 0;
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, false, kInt, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, false, kUInt, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, true, kInt, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, true, kUInt, matchGroupSize );
|
|
|
|
// Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
|
|
// The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
|
|
if (!usingAtomicPrefix) {
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, false, kLong, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, false, kULong, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, true, kLong, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, programCore, testFns, extended, true, kULong, matchGroupSize );
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
#pragma mark ---- add
|
|
|
|
const char atom_add_core[] =
|
|
" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
|
|
" atom_add( &destMemory[0], tid + 3 );\n"
|
|
" atom_add( &destMemory[0], tid + 3 );\n"
|
|
" atom_add( &destMemory[0], tid + 3 );\n";
|
|
|
|
const char atomic_add_core[] =
|
|
" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
|
|
" atomic_add( &destMemory[0], tid + 3 );\n"
|
|
" atomic_add( &destMemory[0], tid + 3 );\n"
|
|
" atomic_add( &destMemory[0], tid + 3 );\n";
|
|
|
|
cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += ( (cl_int)i + 3 ) * 4;
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += ( ( i + 3 ) * 4 );
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
#pragma mark ---- sub
|
|
|
|
const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
|
|
|
|
const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
|
|
|
|
cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = INT_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total -= (cl_int)i + 3;
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = LONG_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total -= i + 3;
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
#pragma mark ---- xchg
|
|
|
|
const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
|
|
|
|
const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
|
|
const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
|
|
|
|
bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
|
|
{
|
|
/* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
|
|
char *valids;
|
|
size_t i;
|
|
char originalValidCount = 0;
|
|
|
|
valids = (char *)malloc( sizeof( char ) * size );
|
|
memset( valids, 0, sizeof( char ) * size );
|
|
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( refValues[ i ] == INT_TEST_VALUE )
|
|
{
|
|
// Special initial value
|
|
originalValidCount++;
|
|
continue;
|
|
}
|
|
if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
|
|
{
|
|
log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
|
|
return false;
|
|
}
|
|
valids[ refValues[ i ] ] ++;
|
|
}
|
|
|
|
/* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
|
|
the final value outputted */
|
|
if( valids[ finalValue ] > 0 )
|
|
{
|
|
log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
|
|
return false;
|
|
}
|
|
else
|
|
valids[ finalValue ] = 1; // So the following loop will be okay
|
|
|
|
/* Now check that every entry has one and only one count */
|
|
if( originalValidCount != 1 )
|
|
{
|
|
log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
|
|
return false;
|
|
}
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( valids[ i ] != 1 )
|
|
{
|
|
log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
|
|
for( size_t j = 0; j < size; j++ )
|
|
log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
free( valids );
|
|
return true;
|
|
}
|
|
|
|
bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
|
|
{
|
|
/* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
|
|
char *valids;
|
|
size_t i;
|
|
char originalValidCount = 0;
|
|
|
|
valids = (char *)malloc( sizeof( char ) * size );
|
|
memset( valids, 0, sizeof( char ) * size );
|
|
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( refValues[ i ] == LONG_TEST_VALUE )
|
|
{
|
|
// Special initial value
|
|
originalValidCount++;
|
|
continue;
|
|
}
|
|
if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
|
|
{
|
|
log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
|
|
return false;
|
|
}
|
|
valids[ refValues[ i ] ] ++;
|
|
}
|
|
|
|
/* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
|
|
the final value outputted */
|
|
if( valids[ finalValue ] > 0 )
|
|
{
|
|
log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
|
|
return false;
|
|
}
|
|
else
|
|
valids[ finalValue ] = 1; // So the following loop will be okay
|
|
|
|
/* Now check that every entry has one and only one count */
|
|
if( originalValidCount != 1 )
|
|
{
|
|
log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
|
|
return false;
|
|
}
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( valids[ i ] != 1 )
|
|
{
|
|
log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
|
|
for( size_t j = 0; j < size; j++ )
|
|
log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
free( valids );
|
|
return true;
|
|
}
|
|
|
|
bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
|
|
{
|
|
/* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
|
|
char *valids;
|
|
size_t i;
|
|
char originalValidCount = 0;
|
|
|
|
valids = (char *)malloc( sizeof( char ) * size );
|
|
memset( valids, 0, sizeof( char ) * size );
|
|
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
|
|
if( *intRefValue == INT_TEST_VALUE )
|
|
{
|
|
// Special initial value
|
|
originalValidCount++;
|
|
continue;
|
|
}
|
|
if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
|
|
{
|
|
log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
|
|
return false;
|
|
}
|
|
valids[ (int)refValues[ i ] ] ++;
|
|
}
|
|
|
|
/* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
|
|
the final value outputted */
|
|
if( valids[ (int)finalValue ] > 0 )
|
|
{
|
|
log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
|
|
return false;
|
|
}
|
|
else
|
|
valids[ (int)finalValue ] = 1; // So the following loop will be okay
|
|
|
|
/* Now check that every entry has one and only one count */
|
|
if( originalValidCount != 1 )
|
|
{
|
|
log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
|
|
return false;
|
|
}
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( valids[ i ] != 1 )
|
|
{
|
|
log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
|
|
for( size_t j = 0; j < size; j++ )
|
|
log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
free( valids );
|
|
return true;
|
|
}
|
|
|
|
int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
|
|
|
|
int errors = test_atomic_function_set( deviceID, context, queue, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
|
|
errors |= test_atomic_function_set( deviceID, context, queue, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
|
|
|
|
errors |= test_atomic_function( deviceID, context, queue, atomic_xchg_float_core, set, false, false, kFloat, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atomic_xchg_float_core, set, false, true, kFloat, true );
|
|
|
|
return errors;
|
|
}
|
|
|
|
|
|
#pragma mark ---- min
|
|
|
|
const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = 0x7fffffffL;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] < total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
|
|
}
|
|
|
|
cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = 0x7fffffffffffffffLL;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] < total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
|
|
}
|
|
|
|
int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- max
|
|
|
|
const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] > total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
|
|
}
|
|
|
|
cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] > total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
|
|
}
|
|
|
|
int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- inc
|
|
|
|
const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
|
|
|
|
const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
|
|
|
|
cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
return INT_TEST_VALUE + (cl_int)size;
|
|
}
|
|
|
|
cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
return LONG_TEST_VALUE + size;
|
|
}
|
|
|
|
int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- dec
|
|
|
|
const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
|
|
|
|
const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
|
|
|
|
cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
return INT_TEST_VALUE - (cl_int)size;
|
|
}
|
|
|
|
cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
return LONG_TEST_VALUE - size;
|
|
}
|
|
|
|
int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- cmpxchg
|
|
|
|
/* We test cmpxchg by implementing (the long way) atom_add */
|
|
const char atom_cmpxchg_core[] =
|
|
" int oldValue, origValue, newValue;\n"
|
|
" do { \n"
|
|
" origValue = destMemory[0];\n"
|
|
" newValue = origValue + tid + 2;\n"
|
|
" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
|
|
" } while( oldValue != origValue );\n"
|
|
" oldValues[tid] = oldValue;\n"
|
|
;
|
|
|
|
const char atom_cmpxchg64_core[] =
|
|
" long oldValue, origValue, newValue;\n"
|
|
" do { \n"
|
|
" origValue = destMemory[0];\n"
|
|
" newValue = origValue + tid + 2;\n"
|
|
" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
|
|
" } while( oldValue != origValue );\n"
|
|
" oldValues[tid] = oldValue;\n"
|
|
;
|
|
|
|
const char atomic_cmpxchg_core[] =
|
|
" int oldValue, origValue, newValue;\n"
|
|
" do { \n"
|
|
" origValue = destMemory[0];\n"
|
|
" newValue = origValue + tid + 2;\n"
|
|
" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
|
|
" } while( oldValue != origValue );\n"
|
|
" oldValues[tid] = oldValue;\n"
|
|
;
|
|
|
|
cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = INT_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += (cl_int)i + 2;
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = LONG_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += i + 2;
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
|
|
|
|
int errors = 0;
|
|
|
|
log_info(" Testing atom_ functions...\n");
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg_core, set, false, false, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg_core, set, false, false, kUInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg_core, set, false, true, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg_core, set, false, true, kUInt, true );
|
|
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg64_core, set, false, false, kLong, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg64_core, set, false, false, kULong, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg64_core, set, false, true, kLong, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atom_cmpxchg64_core, set, false, true, kULong, true );
|
|
|
|
log_info(" Testing atomic_ functions...\n");
|
|
errors |= test_atomic_function( deviceID, context, queue, atomic_cmpxchg_core, set, false, false, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atomic_cmpxchg_core, set, false, false, kUInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atomic_cmpxchg_core, set, false, true, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, atomic_cmpxchg_core, set, false, true, kUInt, true );
|
|
|
|
if( errors )
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#pragma mark -------- Bitwise functions
|
|
|
|
size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
|
|
{
|
|
size_t numBits = get_explicit_type_size( dataType ) * 8;
|
|
|
|
return ( threadCount + numBits - 1 ) / numBits;
|
|
}
|
|
|
|
#pragma mark ---- and
|
|
|
|
const char atom_and_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
const char atomic_and_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
|
|
cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 31 ) / 32;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 32;
|
|
cl_int bits = (cl_int)0xffffffffL;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits &= ~( 1 << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 63 ) / 64;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 64;
|
|
cl_long bits = (cl_long)0xffffffffffffffffLL;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits &= ~( 1 << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
|
|
test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- or
|
|
|
|
const char atom_or_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
const char atomic_or_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 31 ) / 32;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0xffffffff;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 32;
|
|
cl_int bits = 0;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits |= ( 1 << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 63 ) / 64;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0x0ffffffffffffffffLL;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 64;
|
|
cl_long bits = 0;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits |= ( 1LL << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- xor
|
|
|
|
const char atom_xor_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int bitIndex = tid & ( numBits - 1 );\n"
|
|
"\n"
|
|
" oldValues[tid] = atom_xor( &destMemory[0], 1 << bitIndex );\n"
|
|
;
|
|
|
|
const char atomic_xor_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int bitIndex = tid & ( numBits - 1 );\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_xor( &destMemory[0], 1 << bitIndex );\n"
|
|
;
|
|
|
|
cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
|
|
{
|
|
cl_int total = 0x2f08ab41;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total ^= ( 1 << ( i & 31 ) );
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
|
|
{
|
|
cl_long total = 0x2f08ab418ba0541LL;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total ^= ( 1LL << ( i & 63 ) );
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|