mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
1441 lines
62 KiB
C
1441 lines
62 KiB
C
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <math.h>
|
|
#include <string.h>
|
|
|
|
#if !defined(_WIN32)
|
|
#include <stdbool.h>
|
|
#endif
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
|
|
#include "procs.h"
|
|
#include "../../test_common/harness/conversions.h"
|
|
#include "../../test_common/harness/ThreadPool.h"
|
|
|
|
#define NUM_TESTS 23
|
|
|
|
#define LONG_MATH_SHIFT_SIZE 26
|
|
#define QUICK_MATH_SHIFT_SIZE 16
|
|
|
|
static const char *kernel_code =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = srcA[tid] %s srcB[tid];\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_V3 =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_V3_scalar_vector =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_V3_vector_scalar =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n"
|
|
"}\n";
|
|
|
|
|
|
// Separate kernel here because it does not fit the pattern
|
|
static const char *not_kernel_code =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = %ssrcA[tid];\n"
|
|
"}\n";
|
|
|
|
static const char *not_kernel_code_V3 =
|
|
"__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( %s vload3( tid, srcA ), tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_scalar_shift =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = srcA[tid] %s srcB[tid]%s;\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_scalar_shift_V3 =
|
|
"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_question_colon =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n"
|
|
"}\n";
|
|
|
|
static const char *kernel_code_question_colon_V3 =
|
|
"__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n"
|
|
"}\n";
|
|
|
|
|
|
|
|
|
|
// External verification and data generation functions
|
|
extern const char *tests[];
|
|
extern const char *test_names[];
|
|
extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n);
|
|
extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ;
|
|
extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n);
|
|
extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ;
|
|
extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n);
|
|
extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ;
|
|
extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n);
|
|
extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ;
|
|
extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n);
|
|
extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ;
|
|
extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n);
|
|
extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ;
|
|
extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n);
|
|
extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ;
|
|
extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n);
|
|
extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ;
|
|
|
|
// Supported type list
|
|
const ExplicitType types[] = {
|
|
kChar,
|
|
kUChar,
|
|
kShort,
|
|
kUShort,
|
|
kInt,
|
|
kUInt,
|
|
kLong,
|
|
kULong,
|
|
};
|
|
|
|
enum Style
|
|
{
|
|
kDontCare=0,
|
|
kBothVectors,
|
|
kInputAScalar,
|
|
kInputBScalar,
|
|
kVectorScalarScalar, // for the ?: operator only; indicates vector ? scalar : scalar.
|
|
kInputCAlsoScalar = 0x80 // Or'ed flag to indicate that the selector for the ?: operator is also scalar
|
|
};
|
|
|
|
typedef struct _perThreadData
|
|
{
|
|
cl_mem m_streams[3];
|
|
cl_int *m_input_ptr[2], *m_output_ptr;
|
|
size_t m_type_size;
|
|
cl_program m_program[NUM_TESTS];
|
|
cl_kernel m_kernel[NUM_TESTS];
|
|
} perThreadData;
|
|
|
|
|
|
perThreadData * perThreadDataNew()
|
|
{
|
|
perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData));
|
|
|
|
|
|
memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS);
|
|
memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS);
|
|
|
|
pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL;
|
|
pThis->m_output_ptr = NULL;
|
|
|
|
return pThis;
|
|
}
|
|
|
|
|
|
void perThreadDataDestroy(perThreadData * pThis)
|
|
{
|
|
int i;
|
|
// cleanup
|
|
clReleaseMemObject(pThis->m_streams[0]);
|
|
clReleaseMemObject(pThis->m_streams[1]);
|
|
clReleaseMemObject(pThis->m_streams[2]);
|
|
for (i=0; i<NUM_TESTS; i++)
|
|
{
|
|
if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]);
|
|
if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]);
|
|
}
|
|
free(pThis->m_input_ptr[0]);
|
|
free(pThis->m_input_ptr[1]);
|
|
free(pThis->m_output_ptr);
|
|
|
|
free(pThis);
|
|
}
|
|
|
|
|
|
cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type,
|
|
int num_elements, int vectorSize,
|
|
int inputAVecSize, int inputBVecSize,
|
|
cl_context context, int start_test_ID,
|
|
int end_test_ID, int testID)
|
|
{
|
|
int i;
|
|
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
|
|
const char *type_name = get_explicit_type_name(type);
|
|
pThis->m_type_size = get_explicit_type_size(type);
|
|
int err;
|
|
// Used for the && and || tests where the vector case returns a signed value
|
|
const char *signed_type_name;
|
|
switch (type) {
|
|
case kChar:
|
|
case kUChar:
|
|
signed_type_name = get_explicit_type_name(kChar);
|
|
break;
|
|
case kShort:
|
|
case kUShort:
|
|
signed_type_name = get_explicit_type_name(kShort);
|
|
break;
|
|
case kInt:
|
|
case kUInt:
|
|
signed_type_name = get_explicit_type_name(kInt);
|
|
break;
|
|
case kLong:
|
|
case kULong:
|
|
signed_type_name = get_explicit_type_name(kLong);
|
|
break;
|
|
default:
|
|
log_error("Invalid type.\n");
|
|
return -1;
|
|
break;
|
|
}
|
|
|
|
pThis->m_input_ptr[0] =
|
|
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
|
|
pThis->m_input_ptr[1] =
|
|
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
|
|
pThis->m_output_ptr =
|
|
(cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
|
|
pThis->m_streams[0] =
|
|
clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
|
|
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
pThis->m_streams[1] =
|
|
clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * inputBVecSize, NULL, &err );
|
|
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
pThis->m_streams[2] =
|
|
clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), pThis->m_type_size * num_elements * vectorSize, NULL, &err );
|
|
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
const char *vectorString = sizeNames[ vectorSize ];
|
|
const char *inputAVectorString = sizeNames[ inputAVecSize ];
|
|
const char *inputBVectorString = sizeNames[ inputBVecSize ];
|
|
|
|
if (testID == -1)
|
|
{
|
|
log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize));
|
|
}
|
|
|
|
char programString[4096];
|
|
const char *ptr;
|
|
|
|
|
|
const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3;
|
|
|
|
for (i=start_test_ID; i<end_test_ID; i++) {
|
|
switch (i) {
|
|
case 10:
|
|
case 11:
|
|
sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0"));
|
|
break;
|
|
case 12:
|
|
sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, tests[i]);
|
|
break;
|
|
case 13:
|
|
sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon,
|
|
type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ;
|
|
break;
|
|
case 14:
|
|
case 15:
|
|
case 16:
|
|
case 17:
|
|
case 18:
|
|
case 19:
|
|
case 20:
|
|
case 21:
|
|
// Need an unsigned result here for vector sizes > 1
|
|
sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
|
|
break;
|
|
case 22:
|
|
// Need an unsigned result here for vector sizes > 1
|
|
sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
|
|
break;
|
|
default:
|
|
sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
|
|
type_name, vectorString, tests[i]);
|
|
break;
|
|
}
|
|
|
|
//printf("kernel: %s\n", programString);
|
|
ptr = programString;
|
|
err = create_single_kernel_helper( context,
|
|
&(pThis->m_program[ i ]),
|
|
&(pThis->m_kernel[ i ]), 1,
|
|
&ptr, "test" );
|
|
test_error( err, "Unable to create test kernel" );
|
|
err = clSetKernelArg(pThis->m_kernel[i], 0,
|
|
sizeof pThis->m_streams[0],
|
|
&(pThis->m_streams[0]) );
|
|
err |= clSetKernelArg(pThis->m_kernel[i], 1,
|
|
sizeof pThis->m_streams[1],
|
|
&(pThis->m_streams[1]) );
|
|
err |= clSetKernelArg(pThis->m_kernel[i], 2,
|
|
sizeof pThis->m_streams[2],
|
|
&(pThis->m_streams[2]) );
|
|
test_error(err, "clSetKernelArgs failed");
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
typedef struct _globalThreadData
|
|
{
|
|
cl_device_id m_deviceID;
|
|
cl_context m_context;
|
|
// cl_command_queue m_queue;
|
|
int m_num_elements;
|
|
int m_threadcount;
|
|
int m_vectorSize;
|
|
int m_num_runs_shift;
|
|
Style m_style;
|
|
ExplicitType m_type;
|
|
MTdata * m_pRandData;
|
|
uint64_t m_offset;
|
|
int m_testID;
|
|
perThreadData **m_arrPerThreadData;
|
|
} globalThreadData;
|
|
|
|
|
|
|
|
globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
int vectorSize, Style style, int num_runs_shift,
|
|
ExplicitType type, int testID,
|
|
int threadcount)
|
|
{
|
|
int i;
|
|
globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData));
|
|
pThis->m_deviceID = deviceID;
|
|
pThis->m_context = context;
|
|
// pThis->m_queue = queue;
|
|
pThis->m_num_elements = num_elements;
|
|
pThis->m_num_runs_shift = num_runs_shift;
|
|
pThis->m_vectorSize = vectorSize;
|
|
pThis->m_style = style;
|
|
pThis->m_type = type;
|
|
pThis->m_offset = (uint64_t)0;
|
|
pThis->m_testID = testID;
|
|
pThis->m_arrPerThreadData = NULL;
|
|
pThis->m_threadcount = threadcount;
|
|
|
|
pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata));
|
|
pThis->m_arrPerThreadData = (perThreadData **)
|
|
malloc(threadcount*sizeof(perThreadData *));
|
|
for(i=0; i < threadcount; ++i)
|
|
{
|
|
pThis->m_pRandData[i] = init_genrand(i+1);
|
|
pThis->m_arrPerThreadData[i] = NULL;
|
|
}
|
|
|
|
return pThis;
|
|
}
|
|
|
|
void globalThreadDataDestroy(globalThreadData * pThis)
|
|
{
|
|
int i;
|
|
|
|
for(i=0; i < pThis->m_threadcount; ++i)
|
|
{
|
|
free_mtdata(pThis->m_pRandData[i]);
|
|
if(pThis->m_arrPerThreadData[i] != NULL)
|
|
{
|
|
perThreadDataDestroy(pThis->m_arrPerThreadData[i]);
|
|
}
|
|
}
|
|
free(pThis->m_arrPerThreadData);
|
|
free(pThis->m_pRandData);
|
|
free(pThis);
|
|
}
|
|
|
|
int
|
|
test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, Style style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx,
|
|
perThreadData ** ppThreadData);
|
|
|
|
|
|
cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo )
|
|
{
|
|
cl_int error; cl_int result;
|
|
globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo;
|
|
cl_command_queue queue;
|
|
|
|
#if THREAD_DEBUG
|
|
log_error("Thread %x (job %x) about to create command queue\n",
|
|
thread_id, job_id);
|
|
#endif
|
|
|
|
queue = clCreateCommandQueue (threadInfoGlobal->m_context,
|
|
threadInfoGlobal->m_deviceID,0,
|
|
&error);
|
|
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("Thread %x (job %x) could not create command queue\n",
|
|
thread_id, job_id);
|
|
return error; // should we clean up the queue too?
|
|
}
|
|
|
|
#if THREAD_DEBUG
|
|
log_error("Thread %x (job %x) created command queue\n",
|
|
thread_id, job_id);
|
|
#endif
|
|
|
|
result = test_integer_ops( threadInfoGlobal->m_deviceID,
|
|
threadInfoGlobal->m_context,
|
|
queue,
|
|
threadInfoGlobal->m_num_elements,
|
|
threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style,
|
|
threadInfoGlobal->m_num_runs_shift,
|
|
threadInfoGlobal->m_type, threadInfoGlobal->m_testID,
|
|
threadInfoGlobal->m_pRandData[thread_id],
|
|
threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id,
|
|
threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1),
|
|
&(threadInfoGlobal->m_arrPerThreadData[thread_id])
|
|
);
|
|
|
|
if(result != 0)
|
|
{
|
|
log_error("Thread %x (job %x) failed test_integer_ops with result %x\n",
|
|
thread_id, job_id, result);
|
|
// return error;
|
|
}
|
|
|
|
|
|
error = clReleaseCommandQueue(queue);
|
|
if(error != CL_SUCCESS)
|
|
{
|
|
log_error("Thread %x (job %x) could not release command queue\n",
|
|
thread_id, job_id);
|
|
return error;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
int
|
|
test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, Style style, int num_runs_shift, ExplicitType type, int testID)
|
|
{
|
|
globalThreadData * pThreadInfo = NULL;
|
|
cl_int result=0;
|
|
cl_uint threadcount = GetThreadCount();
|
|
|
|
// This test will run threadcount threads concurrently; each thread will
|
|
// execute test_integer_ops() which will allocate 2 OpenCL buffers on the
|
|
// device; each buffer has size num_elements * type_size * vectorSize. We
|
|
// need to make sure that the total device memory allocated by all threads
|
|
// does not exceed the maximum memory on the device. If it does, we decrease
|
|
// num_elements until all threads combined will not over-subscribe device
|
|
// memory.
|
|
cl_ulong maxDeviceGlobalMem;
|
|
result =
|
|
clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
|
|
sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
|
|
if (result != CL_SUCCESS) {
|
|
log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n",
|
|
result);
|
|
return result;
|
|
}
|
|
|
|
if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
|
|
maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
|
|
}
|
|
|
|
// Let's not take all device memory - reduce by 75%
|
|
maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
|
|
// Now reduce num_elements so that the total device memory usage does not
|
|
// exceed 75% of global device memory.
|
|
size_t type_size = get_explicit_type_size(type);
|
|
while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize >
|
|
maxDeviceGlobalMem) {
|
|
num_elements >>= 1;
|
|
}
|
|
|
|
uint64_t startIndx = (uint64_t)0;
|
|
uint64_t endIndx = (1ULL<<num_runs_shift);
|
|
uint64_t jobcount = (endIndx-startIndx)/num_elements;
|
|
|
|
if(jobcount==0)
|
|
{
|
|
jobcount = 1;
|
|
}
|
|
|
|
pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements,
|
|
vectorSize, style, num_runs_shift,
|
|
type, testID, threadcount);
|
|
|
|
|
|
pThreadInfo->m_offset = startIndx;
|
|
|
|
#if THREAD_DEBUG
|
|
log_error("Launching %llx jobs\n",
|
|
jobcount);
|
|
#endif
|
|
|
|
result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo);
|
|
|
|
if(result != 0)
|
|
{
|
|
// cleanup ??
|
|
log_error("ThreadPool_Do return non-success value %d\n", result);
|
|
|
|
}
|
|
globalThreadDataDestroy(pThreadInfo);
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
int
|
|
test_integer_ops(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
int vectorSize, Style style, int num_runs_shift,
|
|
ExplicitType type, int testID, MTdata randDataIn,
|
|
uint64_t startIndx, uint64_t endIndx,
|
|
perThreadData ** ppThreadData)
|
|
{
|
|
size_t threads[1];
|
|
int err;
|
|
int i;
|
|
int inputAVecSize, inputBVecSize;
|
|
|
|
|
|
|
|
inputAVecSize = inputBVecSize = vectorSize;
|
|
if( style == kInputAScalar )
|
|
inputAVecSize = 1;
|
|
else if( style == kInputBScalar )
|
|
inputBVecSize = 1;
|
|
|
|
/*
|
|
if( inputAVecSize != inputBVecSize )
|
|
log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n",
|
|
test_names[testID],
|
|
get_explicit_type_name(type), vectorSize,
|
|
( inputAVecSize == 1 ) ? "scalar" : "vector",
|
|
( inputBVecSize == 1 ) ? "scalar" : "vector",
|
|
startIndx, endIndx, (1ULL<<num_runs_shift) );
|
|
else
|
|
log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n",
|
|
test_names[testID],
|
|
get_explicit_type_name(type), vectorSize,
|
|
startIndx, endIndx, (1ULL<<num_runs_shift));
|
|
*/
|
|
|
|
|
|
// Figure out which sub-test to run, or all of them
|
|
int start_test_ID = 0;
|
|
int end_test_ID = NUM_TESTS;
|
|
if (testID != -1) {
|
|
start_test_ID = testID;
|
|
end_test_ID = testID+1;
|
|
}
|
|
if (testID > NUM_TESTS) {
|
|
log_error("Invalid test ID: %d\n", testID);
|
|
return -1;
|
|
}
|
|
|
|
if(*ppThreadData == NULL)
|
|
{
|
|
*ppThreadData = perThreadDataNew();
|
|
err = perThreadDataInit(*ppThreadData,
|
|
type, num_elements, vectorSize,
|
|
inputAVecSize, inputBVecSize,
|
|
context, start_test_ID,
|
|
end_test_ID, testID);
|
|
test_error(err, "failed to init per thread data\n");
|
|
}
|
|
|
|
perThreadData * pThreadData = *ppThreadData;
|
|
|
|
|
|
|
|
threads[0] = (size_t)num_elements;
|
|
int error_count = 0;
|
|
for (i=start_test_ID; i<end_test_ID; i++)
|
|
{
|
|
uint64_t indx;
|
|
|
|
|
|
if(startIndx >= endIndx)
|
|
{
|
|
startIndx = (uint64_t)0;
|
|
endIndx = (1ULL<<num_runs_shift);
|
|
}
|
|
for (indx=startIndx; indx < endIndx; indx+=num_elements)
|
|
{
|
|
|
|
switch (type) {
|
|
case kChar:
|
|
init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kUChar:
|
|
init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kShort:
|
|
init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kUShort:
|
|
init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kInt:
|
|
init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kUInt:
|
|
init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kLong:
|
|
init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
case kULong:
|
|
init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn);
|
|
break;
|
|
default:
|
|
err = 1;
|
|
log_error("Invalid type.\n");
|
|
break;
|
|
}
|
|
|
|
|
|
err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL);
|
|
test_error(err, "clEnqueueWriteBuffer failed");
|
|
err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL );
|
|
test_error(err, "clEnqueueWriteBuffer failed");
|
|
|
|
err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueNDRangeKernel failed");
|
|
|
|
err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueReadBuffer failed");
|
|
|
|
// log_info("Performing verification\n");
|
|
|
|
// If one of the inputs are scalar, we need to extend the input values to vectors
|
|
// to accommodate the verify functions
|
|
if( vectorSize > 1 )
|
|
{
|
|
char * p = NULL;
|
|
if( style == kInputAScalar )
|
|
p = (char *)pThreadData->m_input_ptr[ 0 ];
|
|
else if( style == kInputBScalar )
|
|
p = (char *)pThreadData->m_input_ptr[ 1 ];
|
|
if( p != NULL )
|
|
{
|
|
for( int element = num_elements - 1; element >= 0; element-- )
|
|
{
|
|
for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ )
|
|
memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size );
|
|
}
|
|
}
|
|
}
|
|
|
|
switch (type) {
|
|
case kChar:
|
|
err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kUChar:
|
|
err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kShort:
|
|
err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kUShort:
|
|
err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kInt:
|
|
err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kUInt:
|
|
err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kLong:
|
|
err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
case kULong:
|
|
err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize);
|
|
break;
|
|
default:
|
|
err = 1;
|
|
log_error("Invalid type.\n");
|
|
break;
|
|
}
|
|
#if 0
|
|
if (err) {
|
|
log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements );
|
|
cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0];
|
|
log_error( "from 18:\n" );
|
|
for( int q = 18; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_input_ptr[1];
|
|
for( int q = 18; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_output_ptr;
|
|
for( int q = 18; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
log_error( "from 36:\n" );
|
|
inP = (cl_char *)pThreadData->m_input_ptr[0];
|
|
for( int q = 36; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_input_ptr[1];
|
|
for( int q = 36; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
inP = (cl_char *)pThreadData->m_output_ptr;
|
|
for( int q = 36; q < 64; q++ )
|
|
{
|
|
log_error( "%02x ", inP[ q ] );
|
|
}
|
|
log_error( "\n" );
|
|
error_count++;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
|
|
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
|
|
if (err) {
|
|
log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n",
|
|
get_explicit_type_name(type), sizeNames[vectorSize],
|
|
test_names[i],
|
|
startIndx, endIndx,
|
|
(1ULL<<num_runs_shift));
|
|
} else {
|
|
log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n",
|
|
get_explicit_type_name(type), sizeNames[vectorSize],
|
|
test_names[i],
|
|
startIndx, endIndx,
|
|
(1ULL<<num_runs_shift));
|
|
}
|
|
*/
|
|
}
|
|
|
|
|
|
|
|
return error_count;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Run all the vector sizes for a given test
|
|
int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) {
|
|
int errors = 0;
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID);
|
|
log_info("."); fflush(stdout);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID);
|
|
log_info("."); fflush(stdout);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID);
|
|
log_info("."); fflush(stdout);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID);
|
|
log_info("."); fflush(stdout);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID);
|
|
log_info("."); fflush(stdout);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID);
|
|
log_info("."); fflush(stdout);
|
|
return errors;
|
|
}
|
|
|
|
// Run multiple tests for a given type
|
|
int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
|
|
int errors = 0;
|
|
int i;
|
|
for (i=0; i<total_tests; i++)
|
|
{
|
|
int localErrors;
|
|
log_info("Testing \"%s\".", test_names[tests[i]]); fflush( stdout );
|
|
localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]);
|
|
if( localErrors )
|
|
log_info( "FAILED.\n" );
|
|
else
|
|
log_info( "passed.\n" );
|
|
|
|
errors += localErrors;
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
// Run all the math tests for a given type
|
|
int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {0, 1, 2, 3, 4};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all the logic tests for a given type
|
|
int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {5, 6, 7, 12, 14, 15, 22};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all the shifting tests for a given type
|
|
int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {8, 9, 10, 11};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all the comparison tests for a given type
|
|
int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int tests[] = {13, 16, 17, 18, 19, 20, 21};
|
|
return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
|
|
}
|
|
|
|
// Run all tests for a given type
|
|
int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
|
|
int errors = 0;
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1);
|
|
return errors;
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Long tests
|
|
// -----------------
|
|
int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// ULong tests
|
|
// -----------------
|
|
int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Int tests
|
|
// -----------------
|
|
int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// UInt tests
|
|
// -----------------
|
|
int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Short tests
|
|
// -----------------
|
|
int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// UShort tests
|
|
// -----------------
|
|
int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// Char tests
|
|
// -----------------
|
|
int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
// -----------------
|
|
// UChar tests
|
|
// -----------------
|
|
int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_math(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_logic(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_shift(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test_compare(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
|
|
|
|
// These are kept for debugging if you want to run all the tests together.
|
|
|
|
int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
|
return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
|
|
}
|
|
|
|
// Prototype for below
|
|
int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
|
|
int vectorSize, Style style, ExplicitType type );
|
|
|
|
// Run all the vector sizes for a given test in scalar-vector and vector-scalar modes
|
|
int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID)
|
|
{
|
|
int sizes[] = { 2, 3, 4, 8, 16, 0 };
|
|
int errors = 0;
|
|
|
|
for( int i = 0; sizes[ i ] != 0; i++ )
|
|
{
|
|
if( testID == 13 )
|
|
{
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type );
|
|
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (Style)(kBothVectors | kInputCAlsoScalar), type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (Style)(kInputAScalar | kInputCAlsoScalar), type );
|
|
errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (Style)(kInputBScalar | kInputCAlsoScalar), type );
|
|
}
|
|
else
|
|
{
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID);
|
|
errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID);
|
|
}
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
// Run all the tests for scalar-vector and vector-scalar for a given type
|
|
int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num )
|
|
{
|
|
int errors = 0;
|
|
size_t i;
|
|
|
|
// Shift operators:
|
|
// a) cannot take scalars as first parameter and vectors as second
|
|
// b) have the vector >> scalar case tested by tests 10 and 11
|
|
// so they get skipped entirely
|
|
|
|
int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7,
|
|
13, 14, 15, 16, 17, 18, 19, 20, 21 };
|
|
for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++)
|
|
{
|
|
errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]);
|
|
}
|
|
return errors;
|
|
}
|
|
|
|
int test_vector_scalar_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
int errors = 0;
|
|
int numTypes = sizeof( types ) / sizeof( types[ 0 ] );
|
|
|
|
for( int t = 0; t < numTypes; t++ )
|
|
{
|
|
errors += run_vector_scalar_tests( deviceID, context, queue, num_elements, types[ t ], 1 );
|
|
break;
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize )
|
|
{
|
|
cl_uint bits = genrand_int32(d);
|
|
cl_uint bitsLeft = 32;
|
|
|
|
memset( outData, 0, outDataSize * count );
|
|
|
|
for( size_t i = 0; i < count; i++ )
|
|
{
|
|
if( 0 == bitsLeft)
|
|
{
|
|
bits = genrand_int32(d);
|
|
bitsLeft = 32;
|
|
}
|
|
|
|
// Note: we will be setting just any bit non-zero for the type, so we can easily skip past
|
|
// and just write bytes (assuming the entire output buffer is already zeroed, which we did)
|
|
*outData = ( bits & 1 ) ? 0xff : 0;
|
|
|
|
bits >>= 1; bitsLeft -= 1;
|
|
|
|
outData += outDataSize;
|
|
}
|
|
}
|
|
|
|
static const char *kernel_question_colon_full =
|
|
"__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
" %s%s valA = %ssrcA%s"
|
|
" %s%s valB = %ssrcB%s"
|
|
" %s%s valC = %ssrcC%s"
|
|
" %s%s destVal = valC ? valA : valB;\n"
|
|
" %s"
|
|
"}\n";
|
|
|
|
static const char *kernel_qc_load_plain_prefix = "";
|
|
static const char *kernel_qc_load_plain_suffix = "[ tid ];\n";
|
|
|
|
static const char *kernel_qc_load_vec3_prefix = "vload3( tid, ";
|
|
static const char *kernel_qc_load_vec3_suffix = ");\n";
|
|
|
|
static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n";
|
|
static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n";
|
|
|
|
int test_question_colon_op(cl_device_id deviceID, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
int vectorSize, Style style, ExplicitType type )
|
|
{
|
|
cl_mem streams[4];
|
|
cl_int *input_ptr[3], *output_ptr;
|
|
cl_program program;
|
|
cl_kernel kernel;
|
|
size_t threads[1];
|
|
int err;
|
|
int inputAVecSize, inputBVecSize, inputCVecSize;
|
|
const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
// Identical to sizeNames but with a blank for 3, since we use vload/store there
|
|
const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
|
|
MTdata s_randStates;
|
|
|
|
inputAVecSize = inputBVecSize = inputCVecSize = vectorSize;
|
|
if( style & kInputCAlsoScalar )
|
|
{
|
|
style = (Style)( style & ~kInputCAlsoScalar );
|
|
inputCVecSize = 1;
|
|
}
|
|
if( style == kInputAScalar )
|
|
inputAVecSize = 1;
|
|
else if( style == kInputBScalar )
|
|
inputBVecSize = 1;
|
|
else if( style == kVectorScalarScalar )
|
|
inputAVecSize = inputBVecSize = 1;
|
|
|
|
log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n",
|
|
get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector",
|
|
( inputAVecSize == 1 ) ? "scalar" : "vector",
|
|
( inputBVecSize == 1 ) ? "scalar" : "vector" );
|
|
|
|
|
|
const char *type_name = get_explicit_type_name(type);
|
|
size_t type_size = get_explicit_type_size(type);
|
|
|
|
// Create and initialize I/O buffers
|
|
|
|
input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize);
|
|
|
|
s_randStates = init_genrand( gRandomSeed );
|
|
|
|
generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] );
|
|
generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
|
|
generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
|
|
|
|
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputAVecSize, input_ptr[0], &err);
|
|
test_error(err, "clCreateBuffer failed");
|
|
streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputBVecSize, input_ptr[1], &err );
|
|
test_error(err, "clCreateBuffer failed");
|
|
streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR), type_size * num_elements * inputCVecSize, input_ptr[2], &err );
|
|
test_error(err, "clCreateBuffer failed");
|
|
streams[3] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_WRITE_ONLY), type_size * num_elements * vectorSize, NULL, &err );
|
|
test_error(err, "clCreateBuffer failed");
|
|
|
|
const char *vectorString = sizeNames[ vectorSize ];
|
|
const char *inputAVectorString = sizeNames[ inputAVecSize ];
|
|
const char *inputBVectorString = sizeNames[ inputBVecSize ];
|
|
const char *inputCVectorString = sizeNames[ inputCVecSize ];
|
|
|
|
char programString[4096];
|
|
const char *ptr;
|
|
|
|
sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ],
|
|
type_name, paramSizeNames[ inputBVecSize ],
|
|
type_name, paramSizeNames[ inputCVecSize ],
|
|
type_name, paramSizeNames[ vectorSize ],
|
|
// Loads
|
|
type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
|
|
type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
|
|
type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
|
|
// Dest type
|
|
type_name, vectorString,
|
|
// Store
|
|
( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain );
|
|
|
|
ptr = programString;
|
|
err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" );
|
|
test_error( err, "Unable to create test kernel" );
|
|
|
|
err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] );
|
|
err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] );
|
|
err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] );
|
|
err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] );
|
|
test_error(err, "clSetKernelArgs failed");
|
|
|
|
// Run
|
|
threads[0] = (size_t)num_elements;
|
|
|
|
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueNDRangeKernel failed");
|
|
|
|
// Read and verify results
|
|
err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL );
|
|
test_error(err, "clEnqueueReadBuffer failed");
|
|
|
|
// log_info("Performing verification\n");
|
|
int error_count = 0;
|
|
|
|
char *inputAPtr = (char *)input_ptr[ 0 ];
|
|
char *inputBPtr = (char *)input_ptr[ 1 ];
|
|
cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ];
|
|
char *actualPtr = (char *)output_ptr;
|
|
|
|
for( int i = 0; i < num_elements; i++ )
|
|
{
|
|
for( int j = 0; j < vectorSize; j++ )
|
|
{
|
|
char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr;
|
|
if( memcmp( expectedPtr, actualPtr, type_size ) != 0 )
|
|
{
|
|
#if 0
|
|
char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ];
|
|
print_type_to_string( type, inputAPtr, inputAStr );
|
|
print_type_to_string( type, inputBPtr, inputBStr );
|
|
print_type_to_string( type, expectedPtr, expectedStr );
|
|
print_type_to_string( type, actualPtr, actualStr );
|
|
log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n",
|
|
type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" );
|
|
#endif
|
|
error_count++;
|
|
}
|
|
// Advance for each element member. Note if any of the vec sizes are 1, they don't advance here
|
|
inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size;
|
|
inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size;
|
|
inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size;
|
|
actualPtr += ( vectorSize == 1 ) ? 0 : type_size;
|
|
}
|
|
// Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct
|
|
inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0;
|
|
inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0;
|
|
inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0;
|
|
actualPtr += ( vectorSize == 1 ) ? type_size : 0;
|
|
}
|
|
|
|
// cleanup
|
|
clReleaseMemObject(streams[0]);
|
|
clReleaseMemObject(streams[1]);
|
|
clReleaseMemObject(streams[2]);
|
|
clReleaseMemObject(streams[3]);
|
|
clReleaseKernel(kernel);
|
|
clReleaseProgram(program);
|
|
free(input_ptr[0]);
|
|
free(input_ptr[1]);
|
|
free(input_ptr[2]);
|
|
free(output_ptr);
|
|
free_mtdata( s_randStates );
|
|
|
|
return error_count;
|
|
}
|