mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Contributes to https://github.com/KhronosGroup/OpenCL-CTS/issues/2181. Signed-off-by: Michael Rizkalla <michael.rizkalla@arm.com>
1690 lines
62 KiB
C++
1690 lines
62 KiB
C++
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
|
|
#include "harness/conversions.h"
|
|
#include "harness/ThreadPool.h"
|
|
|
|
#define NUM_TESTS 23
|
|
|
|
#define LONG_MATH_SHIFT_SIZE 26
|
|
#define QUICK_MATH_SHIFT_SIZE 16
|
|
|
|
static const char *kernel_code =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = srcA[tid] %s srcB[tid];\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_V3 =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_V3_scalar_vector =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_V3_vector_scalar =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n"
|
|
"}\n";
|
|
|
|
|
|
// Separate kernel here because it does not fit the pattern
|
|
static const char *not_kernel_code =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = %ssrcA[tid];\n"
|
|
"}\n";
|
|
|
|
static const char *not_kernel_code_V3 =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( %s vload3( tid, srcA ), tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_scalar_shift =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = srcA[tid] %s srcB[tid]%s;\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_scalar_shift_V3 =
|
|
"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_question_colon =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_question_colon_V3 =
|
|
"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n"
|
|
"}\n";
|
|
|
|
|
|
|
|
|
|
// External verification and data generation functions
|
|
extern const char *tests[];
|
|
extern const char *test_names[];
|
|
extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n);
|
|
extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ;
|
|
extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n);
|
|
extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ;
|
|
extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n);
|
|
extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ;
|
|
extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n);
|
|
extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ;
|
|
extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n);
|
|
extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ;
|
|
extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n);
|
|
extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ;
|
|
extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n);
|
|
extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ;
|
|
extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n);
|
|
extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ;
|
|
|
|
// Supported type list
|
|
const ExplicitType types[] = {
|
|
kChar,
|
|
kUChar,
|
|
kShort,
|
|
kUShort,
|
|
kInt,
|
|
kUInt,
|
|
kLong,
|
|
kULong,
|
|
};
|
|
|
|
enum TestStyle
|
|
{
|
|
kDontCare=0,
|
|
kBothVectors,
|
|
kInputAScalar,
|
|
kInputBScalar,
|
|
kVectorScalarScalar, // for the ?: operator only; indicates vector ? scalar : scalar.
|
|
kInputCAlsoScalar = 0x80 // Or'ed flag to indicate that the selector for the ?: operator is also scalar
|
|
};
|
|
|
|
typedef struct _perThreadData
|
|
{
|
|
cl_mem m_streams[3];
|
|
cl_int *m_input_ptr[2], *m_output_ptr;
|
|
size_t m_type_size;
|
|
cl_program m_program[NUM_TESTS];
|
|
cl_kernel m_kernel[NUM_TESTS];
|
|
} perThreadData;
|
|
|
|
|
|
perThreadData * perThreadDataNew()
|
|
{
|
|
perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData));
|
|
|
|
|
|
memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS);
|
|
memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS);
|
|
|
|
pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL;
|
|
pThis->m_output_ptr = NULL;
|
|
|
|
return pThis;
|
|
}
|
|
|
|
|
|
void perThreadDataDestroy(perThreadData * pThis)
|
|
{
|
|
int i;
|
|
// cleanup
|
|
clReleaseMemObject(pThis->m_streams[0]);
|
|
clReleaseMemObject(pThis->m_streams[1]);
|
|
clReleaseMemObject(pThis->m_streams[2]);
|
|
for (i=0; i<NUM_TESTS; i++)
|
|
{
|
|
if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]);
|
|
if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]);
|
|
}
|
|
free(pThis->m_input_ptr[0]);
|
|
free(pThis->m_input_ptr[1]);
|
|
free(pThis->m_output_ptr);
|
|
|
|
free(pThis);
|
|
}
|
|
|
|
|
|
cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type,
|
|
int num_elements, int vectorSize,
|
|
int inputAVecSize, int inputBVecSize,
|
|
cl_context context, int start_test_ID,
|
|
int end_test_ID, int testID)
|
|
{
|
|
int i;
|
|
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
|
|
const char *type_name = get_explicit_type_name(type);
|
|
pThis->m_type_size = get_explicit_type_size(type);
|
|
int err;
|
|
// Used for the && and || tests where the vector case returns a signed value
|
|
const char *signed_type_name;
|
|
switch (type) {
|
|
case kChar:
|
|
case kUChar:
|
|
signed_type_name = get_explicit_type_name(kChar);
|
|
break;
|
|
case kShort:
|
|
case kUShort:
|
|
signed_type_name = get_explicit_type_name(kShort);
|
|
break;
|
|
case kInt:
|
|
case kUInt:
|
|
signed_type_name = get_explicit_type_name(kInt);
|
|
break;
|
|
case kLong:
|
|
case kULong:
|
|
signed_type_name = get_explicit_type_name(kLong);
|
|
break;
|
|
default:
|
|
log_error("Invalid type.\n");
|
|
return -1;
|
|
break;
|
|
}
|
|
|
|
pThis->m_input_ptr[0] =
|
|
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
|
|
pThis->m_input_ptr[1] =
|
|
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
|
|
pThis->m_output_ptr =
|
|
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
|
|
pThis->m_streams[0] = clCreateBuffer(
|
|
context, CL_MEM_READ_WRITE,
|
|
pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
|
|
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
pThis->m_streams[1] = clCreateBuffer(
|
|
context, CL_MEM_READ_WRITE,
|
|
pThis->m_type_size * num_elements * inputBVecSize, NULL, &err);
|
|
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
pThis->m_streams[2] = clCreateBuffer(
|
|
context, CL_MEM_READ_WRITE,
|
|
pThis->m_type_size * num_elements * vectorSize, NULL, &err);
|
|
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
const char *vectorString = sizeNames[ vectorSize ];
|
|
const char *inputAVectorString = sizeNames[ inputAVecSize ];
|
|
const char *inputBVectorString = sizeNames[ inputBVecSize ];
|
|
|
|
if (testID == -1)
|
|
{
|
|
log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize));
|
|
}
|
|
|
|
char programString[4096];
|
|
const char *ptr;
|
|
|
|
|
|
const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3;
|
|
|
|
for (i=start_test_ID; i<end_test_ID; i++) {
|
|
switch (i) {
|
|
case 10:
|
|
case 11:
|
|
sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0"));
|
|
break;
|
|
case 12:
|
|
sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, tests[i]);
|
|
break;
|
|
case 13:
|
|
sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon,
|
|
type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ;
|
|
break;
|
|
case 14:
|
|
case 15:
|
|
case 16:
|
|
case 17:
|
|
case 18:
|
|
case 19:
|
|
case 20:
|
|
case 21:
|
|
// Need an unsigned result here for vector sizes > 1
|
|
sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
|
|
break;
|
|
case 22:
|
|
// Need an unsigned result here for vector sizes > 1
|
|
sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
|
|
break;
|
|
default:
|
|
sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, tests[i]);
|
|
break;
|
|
}
|
|
|
|
//printf("kernel: %s\n", programString);
|
|
ptr = programString;
|
|
err = create_single_kernel_helper( context,
|
|
&(pThis->m_program[ i ]),
|
|
&(pThis->m_kernel[ i ]), 1,
|
|
&ptr, "test" );
|
|
test_error( err, "Unable to create test kernel" );
|
|
err = clSetKernelArg(pThis->m_kernel[i], 0,
|
|
sizeof pThis->m_streams[0],
|
|
&(pThis->m_streams[0]) );
|
|
err |= clSetKernelArg(pThis->m_kernel[i], 1,
|
|
sizeof pThis->m_streams[1],
|
|
&(pThis->m_streams[1]) );
|
|
err |= clSetKernelArg(pThis->m_kernel[i], 2,
|
|
sizeof pThis->m_streams[2],
|
|
&(pThis->m_streams[2]) );
|
|
test_error(err, "clSetKernelArgs failed");
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
typedef struct _globalThreadData
|
|
{
|
|
cl_device_id m_deviceID;
|
|
cl_context m_context;
|
|
// cl_command_queue m_queue;
|
|
int m_num_elements;
|
|
int m_threadcount;
|
|
int m_vectorSize;
|
|
int m_num_runs_shift;
|
|
TestStyle m_style;
|
|
ExplicitType m_type;
|
|
MTdata * m_pRandData;
|
|
uint64_t m_offset;
|
|
int m_testID;
|
|
perThreadData **m_arrPerThreadData;
|
|
} globalThreadData;
|
|
|
|
|
|
|
|
globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
int vectorSize, TestStyle style, int num_runs_shift,
|
|
ExplicitType type, int testID,
|
|
int threadcount)
|
|
{
|
|
int i;
|
|
globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData));
|
|
pThis->m_deviceID = deviceID;
|
|
pThis->m_context = context;
|
|
// pThis->m_queue = queue;
|
|
pThis->m_num_elements = num_elements;
|
|
pThis->m_num_runs_shift = num_runs_shift;
|
|
pThis->m_vectorSize = vectorSize;
|
|
pThis->m_style = style;
|
|
pThis->m_type = type;
|
|
pThis->m_offset = (uint64_t)0;
|
|
pThis->m_testID = testID;
|
|
pThis->m_arrPerThreadData = NULL;
|
|
pThis->m_threadcount = threadcount;
|
|
|
|
pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata));
|
|
pThis->m_arrPerThreadData = (perThreadData **)
|
|
malloc(threadcount*sizeof(perThreadData *));
|
|
for(i=0; i < threadcount; ++i)
|
|
{
|
|
pThis->m_pRandData[i] = init_genrand(i+1);
|
|
pThis->m_arrPerThreadData[i] = NULL;
|
|
}
|
|
|
|
return pThis;
|
|
}
|
|
|
|
void globalThreadDataDestroy(globalThreadData * pThis)
|
|
{
|
|
int i;
|
|
|
|
for(i=0; i < pThis->m_threadcount; ++i)
|
|
{
|
|
free_mtdata(pThis->m_pRandData[i]);
|
|
if(pThis->m_arrPerThreadData[i] != NULL)
|
|
{
|
|
perThreadDataDestroy(pThis->m_arrPerThreadData[i]);
|
|
}
|
|
}
|
|
free(pThis->m_arrPerThreadData);
|
|
free(pThis->m_pRandData);
|
|
free(pThis);
|
|
}
|
|
|
|
int
|
|
test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx,
|
|
perThreadData ** ppThreadData);
|
|
|
|
|
|
cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo )
|
|
{
|
|
cl_int error; cl_int result;
|
|
globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo;
|
|
cl_command_queue queue;
|
|
|
|
#if THREAD_DEBUG
|
|
log_error("Thread %x (job %x) about to create command queue\n",
|
|
thread_id, job_id);
|
|
#endif
|
|
|
|
queue = clCreateCommandQueue (threadInfoGlobal->m_context,
|
|
threadInfoGlobal->m_deviceID,0,
|
|
&error);
|
|
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("Thread %x (job %x) could not create command queue\n",
|
|
thread_id, job_id);
|
|
return error; // should we clean up the queue too?
|
|
}
|
|
|
|
#if THREAD_DEBUG
|
|
log_error("Thread %x (job %x) created command queue\n",
|
|
thread_id, job_id);
|
|
#endif
|
|
|
|
result = test_integer_ops( threadInfoGlobal->m_deviceID,
|
|
threadInfoGlobal->m_context,
|
|
queue,
|
|
threadInfoGlobal->m_num_elements,
|
|
threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style,
|
|
threadInfoGlobal->m_num_runs_shift,
|
|
threadInfoGlobal->m_type, threadInfoGlobal->m_testID,
|
|
threadInfoGlobal->m_pRandData[thread_id],
|
|
threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id,
|
|
threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1),
|
|
&(threadInfoGlobal->m_arrPerThreadData[thread_id])
|
|
);
|
|
|
|
if(result != 0)
|
|
{
|
|
log_error("Thread %x (job %x) failed test_integer_ops with result %x\n",
|
|
thread_id, job_id, result);
|
|
// return error;
|
|
}
|
|
|
|
|
|
error = clReleaseCommandQueue(queue);
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("Thread %x (job %x) could not release command queue\n",
|
|
thread_id, job_id);
|
|
return error;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
int
|
|
test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID)
|
|
{
|
|
globalThreadData * pThreadInfo = NULL;
|
|
cl_int result=0;
|
|
cl_uint threadcount = GetThreadCount();
|
|
|
|
// Check to see if we are using single threaded mode on other than a 1.0 device
|
|
if (getenv( "CL_TEST_SINGLE_THREADED" )) {
|
|
|
|
char device_version[1024] = { 0 };
|
|
result = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
|
|
if(result != CL_SUCCESS)
|
|
{
|
|
log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
|
|
return result;
|
|
}
|
|
|
|
if (strcmp("OpenCL 1.0 ",device_version)) {
|
|
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
|
|
}
|
|
}
|
|
|
|
// This test will run threadcount threads concurrently; each thread will execute test_integer_ops()
|
|
// which will allocate 2 OpenCL buffers on the device; each buffer has size num_elements * type_size * vectorSize.
|
|
// We need to make sure that the total device memory allocated by all threads does not exceed the maximum
|
|
// memory on the device. If it does, we decrease num_elements until all threads combined will not
|
|
// over-subscribe device memory.
|
|
cl_ulong maxDeviceGlobalMem;
|
|
result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
|
|
if(result != CL_SUCCESS)
|
|
{
|
|
log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
|
|
return result;
|
|
}
|
|
|
|
if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
|
|
maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
|
|
}
|
|
|
|
// Let's not take all device memory - reduce by 75%
|
|
maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
|
|
// Now reduce num_elements so that the total device memory usage does not exceed 75% of global device memory.
|
|
size_t type_size = get_explicit_type_size(type);
|
|
while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize > maxDeviceGlobalMem)
|
|
{
|
|
num_elements >>= 1;
|
|
}
|
|
|
|
uint64_t startIndx = (uint64_t)0;
|
|
uint64_t endIndx = (1ULL<<num_runs_shift);
|
|
uint64_t jobcount = (endIndx-startIndx)/num_elements;
|
|
|
|
if(jobcount==0)
|
|
{
|
|
jobcount = 1;
|
|
}
|
|
|
|
pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements,
|
|
vectorSize, style, num_runs_shift,
|
|
type, testID, threadcount);
|
|
|
|
|
|
pThreadInfo->m_offset = startIndx;
|
|
|
|
#if THREAD_DEBUG
|
|
log_error("Launching %llx jobs\n",
|
|
jobcount);
|
|
#endif
|
|
|
|
result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo);
|
|
|
|
if(result != 0)
|
|
{
|
|
// cleanup ??
|
|
log_error("ThreadPool_Do return non-success value %d\n", result);
|
|
|
|
}
|
|
globalThreadDataDestroy(pThreadInfo);
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
int
|
|
test_integer_ops(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
int vectorSize, TestStyle style, int num_runs_shift,
|
|
ExplicitType type, int testID, MTdata randDataIn,
|
|
uint64_t startIndx, uint64_t endIndx,
|
|
perThreadData ** ppThreadData)
|
|
{
|
|
size_t threads[1];
|
|
int err;
|
|
int i;
|
|
int inputAVecSize, inputBVecSize;
|
|
|
|
|
|
|
|
inputAVecSize = inputBVecSize = vectorSize;
|
|
if( style == kInputAScalar )
|
|
inputAVecSize = 1;
|
|
else if( style == kInputBScalar )
|
|
inputBVecSize = 1;
|
|
|
|
/*
|
|
if( inputAVecSize != inputBVecSize )
|
|
log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n",
|
|
test_names[testID],
|
|
get_explicit_type_name(type), vectorSize,
|
|
( inputAVecSize == 1 ) ? "scalar" : "vector",
|
|
( inputBVecSize == 1 ) ? "scalar" : "vector",
|
|
startIndx, endIndx, (1ULL<<num_runs_shift) );
|
|
else
|
|
log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n",
|
|
test_names[testID],
|
|
get_explicit_type_name(type), vectorSize,
|
|
startIndx, endIndx, (1ULL<<num_runs_shift));
|
|
*/
|
|
|
|
|
|
// Figure out which sub-test to run, or all of them
|
|
int start_test_ID = 0;
|
|
int end_test_ID = NUM_TESTS;
|
|
if (testID != -1) {
|
|
start_test_ID = testID;
|
|
end_test_ID = testID+1;
|
|
}
|
|
if (testID > NUM_TESTS) {
|
|
log_error("Invalid test ID: %d\n", testID);
|
|
return -1;
|
|
}
|
|
|
|
if(*ppThreadData == NULL)
|
|
{
|
|
*ppThreadData = perThreadDataNew();
|
|
err = perThreadDataInit(*ppThreadData,
|
|
type, num_elements, vectorSize,
|
|
inputAVecSize, inputBVecSize,
|
|
context, start_test_ID,
|
|
end_test_ID, testID);
|
|
test_error(err, "failed to init per thread data\n");
|
|
}
|
|
|
|
perThreadData * pThreadData = *ppThreadData;
|
|
|
|
|
|
|
|
threads[0] = (size_t)num_elements;
|
|
int error_count = 0;
|
|
for (i=start_test_ID; i<end_test_ID; i++)
|
|
{
|
|
uint64_t indx;
|
|
|
|
|
|
if(startIndx >= endIndx)
|
|
{
|
|
startIndx = (uint64_t)0;
|
|
endIndx = (1ULL<<num_runs_shift);
|
|
}
|
|
for (indx=startIndx; indx < endIndx; indx+=num_elements)
|
|
{
|
|
|
|
switch (type) {
|
|
case kChar:
|
|
init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kUChar:
|
|
init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kShort:
|
|
init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kUShort:
|
|
init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kInt:
|
|
init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kUInt:
|
|
init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kLong:
|
|
init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kULong:
|
|
init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
default:
|
|
err = 1;
|
|
log_error("Invalid type.\n");
|
|
break;
|
|
}
|
|
|
|
|
|
err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL);
|
|
test_error(err, "clEnqueueWriteBuffer failed");
|
|
err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL );
|
|
test_error(err, "clEnqueueWriteBuffer failed");
|
|
|
|
err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueNDRangeKernel failed");
|
|
|
|
err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueReadBuffer failed");
|
|
|
|
// log_info("Performing verification\n");
|
|
|
|
// If one of the inputs are scalar, we need to extend the input values to vectors
|
|
// to accommodate the verify functions
|
|
if( vectorSize > 1 )
|
|
{
|
|
char * p = NULL;
|
|
if( style == kInputAScalar )
|
|
p = (char *)pThreadData->m_input_ptr[ 0 ];
|
|
else if( style == kInputBScalar )
|
|
p = (char *)pThreadData->m_input_ptr[ 1 ];
|
|
if( p != NULL )
|
|
{
|
|
for( int element = num_elements - 1; element >= 0; element-- )
|
|
{
|
|
for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ )
|
|
memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size );
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (type) {
|
|
case kChar:
|
|
err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kUChar:
|
|
err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kShort:
|
|
err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kUShort:
|
|
err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kInt:
|
|
err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kUInt:
|
|
err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kLong:
|
|
err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kULong:
|
|
err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
default:
|
|
err = 1;
|
|
log_error("Invalid type.\n");
|
|
break;
|
|
}
|
|
|
|
if (err) {
|
|
#if 0
|
|
log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements );
|
|
cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0];
|
|
log_error( "from 18:\n" );
|
|
for( int q = 18; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_input_ptr[1];
|
|
for( int q = 18; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_output_ptr;
|
|
for( int q = 18; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
log_error( "from 36:\n" );
|
|
inP = (cl_char *)pThreadData->m_input_ptr[0];
|
|
for( int q = 36; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_input_ptr[1];
|
|
for( int q = 36; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_output_ptr;
|
|
for( int q = 36; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
#endif
|
|
error_count++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
|
|
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
|
|
if (err) {
|
|
log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n",
|
|
get_explicit_type_name(type), sizeNames[vectorSize],
|
|
test_names[i],
|
|
startIndx, endIndx,
|
|
(1ULL<<num_runs_shift));
|
|
} else {
|
|
log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n",
|
|
get_explicit_type_name(type), sizeNames[vectorSize],
|
|
test_names[i],
|
|
startIndx, endIndx,
|
|
(1ULL<<num_runs_shift));
|
|
}
|
|
*/
|
|
}
|
|
|
|
|
|
|
|
return error_count;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Run all the vector sizes for a given test
|
|
int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) {
|
|
int errors = 0;
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID);
|
|
return errors;
|
|
}
|
|
|
|
// Run multiple tests for a given type
|
|
int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
|
|
int errors = 0;
|
|
|
|
if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
|
|
log_info("Detected CL_WIMPY_MODE env\n");
|
|
log_info("Skipping long test\n");
|
|
return 0;
|
|
}
|
|
|
|
int i;
|
|
for (i=0; i<total_tests; i++)
|
|
{
|
|
int localErrors;
|
|
log_info("Testing \"%s\" ", test_names[tests[i]]); fflush( stdout );
|
|
localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]);
|
|
if( localErrors )
|
|
log_info( "FAILED\n" );
|
|
else
|
|
log_info( "passed\n" );
|
|
|
|
errors += localErrors;
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
// Run all the math tests for a given type
|
|
int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {0, 1, 2, 3, 4};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all the logic tests for a given type
|
|
int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {5, 6, 7, 12, 14, 15, 22};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all the shifting tests for a given type
|
|
int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {8, 9, 10, 11};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all the comparison tests for a given type
|
|
int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {13, 16, 17, 18, 19, 20, 21};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all tests for a given type
|
|
int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int errors = 0;
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1);
|
|
return errors;
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Long tests
|
|
// -----------------
|
|
REGISTER_TEST(long_math)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_math(device, context, queue, num_elements, kLong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(long_logic)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_logic(device, context, queue, num_elements, kLong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(long_shift)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_shift(device, context, queue, num_elements, kLong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(long_compare)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_compare(device, context, queue, num_elements, kLong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_long_math)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_math(device, context, queue, num_elements, kLong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_long_logic)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_logic(device, context, queue, num_elements, kLong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_long_shift)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_shift(device, context, queue, num_elements, kLong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_long_compare)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_compare(device, context, queue, num_elements, kLong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// ULong tests
|
|
// -----------------
|
|
REGISTER_TEST(ulong_math)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_math(device, context, queue, num_elements, kULong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(ulong_logic)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_logic(device, context, queue, num_elements, kULong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(ulong_shift)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_shift(device, context, queue, num_elements, kULong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(ulong_compare)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_compare(device, context, queue, num_elements, kULong,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ulong_math)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_math(device, context, queue, num_elements, kULong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ulong_logic)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_logic(device, context, queue, num_elements, kULong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ulong_shift)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_shift(device, context, queue, num_elements, kULong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ulong_compare)
|
|
{
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test_compare(device, context, queue, num_elements, kULong,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Int tests
|
|
// -----------------
|
|
REGISTER_TEST(int_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(int_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(int_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(int_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_int_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_int_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_int_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_int_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// UInt tests
|
|
// -----------------
|
|
REGISTER_TEST(uint_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kUInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(uint_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kUInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(uint_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kUInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(uint_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kUInt,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uint_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kUInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uint_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kUInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uint_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kUInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uint_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kUInt,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Short tests
|
|
// -----------------
|
|
REGISTER_TEST(short_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(short_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(short_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(short_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_short_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_short_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_short_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_short_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// UShort tests
|
|
// -----------------
|
|
REGISTER_TEST(ushort_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kUShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(ushort_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kUShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(ushort_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kUShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(ushort_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kUShort,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ushort_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kUShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ushort_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kUShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ushort_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kUShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_ushort_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kUShort,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Char tests
|
|
// -----------------
|
|
REGISTER_TEST(char_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(char_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(char_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(char_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_char_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_char_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_char_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_char_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// UChar tests
|
|
// -----------------
|
|
REGISTER_TEST(uchar_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kUChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(uchar_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kUChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(uchar_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kUChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(uchar_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kUChar,
|
|
LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uchar_math)
|
|
{
|
|
return run_test_math(device, context, queue, num_elements, kUChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uchar_logic)
|
|
{
|
|
return run_test_logic(device, context, queue, num_elements, kUChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uchar_shift)
|
|
{
|
|
return run_test_shift(device, context, queue, num_elements, kUChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
REGISTER_TEST(quick_uchar_compare)
|
|
{
|
|
return run_test_compare(device, context, queue, num_elements, kUChar,
|
|
QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// These are kept for debugging if you want to run all the tests together.
|
|
|
|
int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
if (!gHasLong)
|
|
{
|
|
log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
|
|
return CL_SUCCESS;
|
|
}
|
|
return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
// Prototype for below
|
|
int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
|
|
int vectorSize, TestStyle style, ExplicitType type );
|
|
|
|
// Run all the vector sizes for a given test in scalar-vector and vector-scalar modes
|
|
int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID)
|
|
{
|
|
int sizes[] = { 2, 3, 4, 8, 16, 0 };
|
|
int errors = 0;
|
|
|
|
for( int i = 0; sizes[ i ] != 0; i++ )
|
|
{
|
|
if( testID == 13 )
|
|
{
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type );
|
|
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kBothVectors | kInputCAlsoScalar), type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputAScalar | kInputCAlsoScalar), type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputBScalar | kInputCAlsoScalar), type );
|
|
}
|
|
else
|
|
{
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID);
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
// Run all the tests for scalar-vector and vector-scalar for a given type
|
|
int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num )
|
|
{
|
|
int errors = 0;
|
|
size_t i;
|
|
|
|
// Shift operators:
|
|
// a) cannot take scalars as first parameter and vectors as second
|
|
// b) have the vector >> scalar case tested by tests 10 and 11
|
|
// so they get skipped entirely
|
|
|
|
int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7,
|
|
13, 14, 15, 16, 17, 18, 19, 20, 21 };
|
|
for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++)
|
|
{
|
|
errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]);
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
REGISTER_TEST(vector_scalar)
|
|
{
|
|
int errors = 0;
|
|
int numTypes = sizeof( types ) / sizeof( types[ 0 ] );
|
|
|
|
for( int t = 0; t < numTypes; t++ )
|
|
{
|
|
if ((types[ t ] == kLong || types[ t ] == kULong) && !gHasLong)
|
|
continue;
|
|
|
|
errors += run_vector_scalar_tests(device, context, queue, num_elements,
|
|
types[t], 1);
|
|
break;
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize )
|
|
{
|
|
cl_uint bits = genrand_int32(d);
|
|
cl_uint bitsLeft = 32;
|
|
|
|
memset( outData, 0, outDataSize * count );
|
|
|
|
for( size_t i = 0; i < count; i++ )
|
|
{
|
|
if( 0 == bitsLeft)
|
|
{
|
|
bits = genrand_int32(d);
|
|
bitsLeft = 32;
|
|
}
|
|
|
|
// Note: we will be setting just any bit non-zero for the type, so we can easily skip past
|
|
// and just write bytes (assuming the entire output buffer is already zeroed, which we did)
|
|
*outData = ( bits & 1 ) ? 0xff : 0;
|
|
|
|
bits >>= 1; bitsLeft -= 1;
|
|
|
|
outData += outDataSize;
|
|
}
|
|
}
|
|
|
|
static const char *kernel_question_colon_full =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" %s%s valA = %ssrcA%s"
|
|
" %s%s valB = %ssrcB%s"
|
|
" %s%s valC = %ssrcC%s"
|
|
" %s%s destVal = valC ? valA : valB;\n"
|
|
" %s"
|
|
"}\n";
|
|
|
|
static const char *kernel_qc_load_plain_prefix = "";
|
|
static const char *kernel_qc_load_plain_suffix = "[ tid ];\n";
|
|
|
|
static const char *kernel_qc_load_vec3_prefix = "vload3( tid, ";
|
|
static const char *kernel_qc_load_vec3_suffix = ");\n";
|
|
|
|
static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n";
|
|
static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n";
|
|
|
|
int test_question_colon_op(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
int vectorSize, TestStyle style, ExplicitType type )
|
|
{
|
|
cl_mem streams[4];
|
|
cl_int *input_ptr[3], *output_ptr;
|
|
cl_program program;
|
|
cl_kernel kernel;
|
|
size_t threads[1];
|
|
int err;
|
|
int inputAVecSize, inputBVecSize, inputCVecSize;
|
|
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
// Identical to sizeNames but with a blank for 3, since we use vload/store there
|
|
const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
MTdata s_randStates;
|
|
|
|
inputAVecSize = inputBVecSize = inputCVecSize = vectorSize;
|
|
if( style & kInputCAlsoScalar )
|
|
{
|
|
style = (TestStyle)( style & ~kInputCAlsoScalar );
|
|
inputCVecSize = 1;
|
|
}
|
|
if( style == kInputAScalar )
|
|
inputAVecSize = 1;
|
|
else if( style == kInputBScalar )
|
|
inputBVecSize = 1;
|
|
else if( style == kVectorScalarScalar )
|
|
inputAVecSize = inputBVecSize = 1;
|
|
|
|
log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n",
|
|
get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector",
|
|
( inputAVecSize == 1 ) ? "scalar" : "vector",
|
|
( inputBVecSize == 1 ) ? "scalar" : "vector" );
|
|
|
|
|
|
const char *type_name = get_explicit_type_name(type);
|
|
size_t type_size = get_explicit_type_size(type);
|
|
|
|
// Create and initialize I/O buffers
|
|
|
|
input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
|
|
s_randStates = init_genrand( gRandomSeed );
|
|
|
|
generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] );
|
|
generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
|
|
generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
|
|
|
|
streams[0] = clCreateBuffer(
|
|
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
|
type_size * num_elements * inputAVecSize, input_ptr[0], &err);
|
|
test_error(err, "clCreateBuffer failed");
|
|
streams[1] = clCreateBuffer(
|
|
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
|
type_size * num_elements * inputBVecSize, input_ptr[1], &err);
|
|
test_error(err, "clCreateBuffer failed");
|
|
streams[2] = clCreateBuffer(
|
|
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
|
type_size * num_elements * inputCVecSize, input_ptr[2], &err);
|
|
test_error(err, "clCreateBuffer failed");
|
|
streams[3] =
|
|
clCreateBuffer(context, CL_MEM_WRITE_ONLY,
|
|
type_size * num_elements * vectorSize, NULL, &err);
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
const char *vectorString = sizeNames[ vectorSize ];
|
|
const char *inputAVectorString = sizeNames[ inputAVecSize ];
|
|
const char *inputBVectorString = sizeNames[ inputBVecSize ];
|
|
const char *inputCVectorString = sizeNames[ inputCVecSize ];
|
|
|
|
char programString[4096];
|
|
const char *ptr;
|
|
|
|
sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ],
|
|
type_name, paramSizeNames[ inputBVecSize ],
|
|
type_name, paramSizeNames[ inputCVecSize ],
|
|
type_name, paramSizeNames[ vectorSize ],
|
|
// Loads
|
|
type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
|
|
type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
|
|
type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
|
|
// Dest type
|
|
type_name, vectorString,
|
|
// Store
|
|
( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain );
|
|
|
|
ptr = programString;
|
|
err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" );
|
|
test_error( err, "Unable to create test kernel" );
|
|
|
|
err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] );
|
|
err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] );
|
|
err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] );
|
|
err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] );
|
|
test_error(err, "clSetKernelArgs failed");
|
|
|
|
// Run
|
|
threads[0] = (size_t)num_elements;
|
|
|
|
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueNDRangeKernel failed");
|
|
|
|
// Read and verify results
|
|
err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueReadBuffer failed");
|
|
|
|
// log_info("Performing verification\n");
|
|
int error_count = 0;
|
|
|
|
char *inputAPtr = (char *)input_ptr[ 0 ];
|
|
char *inputBPtr = (char *)input_ptr[ 1 ];
|
|
cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ];
|
|
char *actualPtr = (char *)output_ptr;
|
|
|
|
for( int i = 0; i < num_elements; i++ )
|
|
{
|
|
for( int j = 0; j < vectorSize; j++ )
|
|
{
|
|
char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr;
|
|
if( memcmp( expectedPtr, actualPtr, type_size ) != 0 )
|
|
{
|
|
#if 0
|
|
char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ];
|
|
print_type_to_string( type, inputAPtr, inputAStr );
|
|
print_type_to_string( type, inputBPtr, inputBStr );
|
|
print_type_to_string( type, expectedPtr, expectedStr );
|
|
print_type_to_string( type, actualPtr, actualStr );
|
|
log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n",
|
|
type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" );
|
|
#endif
|
|
error_count++;
|
|
}
|
|
// Advance for each element member. Note if any of the vec sizes are 1, they don't advance here
|
|
inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size;
|
|
inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size;
|
|
inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size;
|
|
actualPtr += ( vectorSize == 1 ) ? 0 : type_size;
|
|
}
|
|
// Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct
|
|
inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0;
|
|
inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0;
|
|
inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0;
|
|
actualPtr += ( vectorSize == 1 ) ? type_size : 0;
|
|
}
|
|
|
|
// cleanup
|
|
clReleaseMemObject(streams[0]);
|
|
clReleaseMemObject(streams[1]);
|
|
clReleaseMemObject(streams[2]);
|
|
clReleaseMemObject(streams[3]);
|
|
clReleaseKernel(kernel);
|
|
clReleaseProgram(program);
|
|
free(input_ptr[0]);
|
|
free(input_ptr[1]);
|
|
free(input_ptr[2]);
|
|
free(output_ptr);
|
|
free_mtdata( s_randStates );
|
|
|
|
return error_count;
|
|
}
|