mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
104
clean_tests.py
Executable file
104
clean_tests.py
Executable file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys, os, re
|
||||
from subprocess import Popen, PIPE
|
||||
from optparse import OptionParser
|
||||
|
||||
# trail_spaces: This method removes the trailing whitespaces and trailing tabs
|
||||
def trail_spaces(line):
|
||||
newline=line
|
||||
carreturn = 0
|
||||
if re.search("\r\n",line):
|
||||
carreturn = 1
|
||||
status = re.search("\s+$",line)
|
||||
if status:
|
||||
if carreturn:
|
||||
newline = re.sub("\s+$","\r\n",line)
|
||||
else:
|
||||
newline = re.sub("\s+$","\n",line)
|
||||
|
||||
status = re.search("\t+$",newline)
|
||||
if status:
|
||||
newline = re.sub("\t+$","",newline)
|
||||
return newline
|
||||
|
||||
#convert_tabs: This methos converts tabs to 4 spaces
|
||||
def convert_tabs(line):
|
||||
newline=line
|
||||
status = re.search("\t",line)
|
||||
if status:
|
||||
newline = re.sub("\t"," ",line)
|
||||
return newline
|
||||
|
||||
#convert_lineends: This method converts lineendings from DOS to Unix
|
||||
def convert_lineends(line):
|
||||
newline=line
|
||||
status = re.search("\r\n",line)
|
||||
if status:
|
||||
newline = re.sub("\r\n","\n",line)
|
||||
return newline
|
||||
|
||||
#processfile: This method processes each file passed to it depending
|
||||
# on the flags passed
|
||||
|
||||
def processfile(file,tabs, lineends,trails,verbose):
|
||||
processed_data = []
|
||||
if verbose:
|
||||
print "processing file: "+file
|
||||
|
||||
with open(file,'r') as fr:
|
||||
data = fr.readlines()
|
||||
for line in data:
|
||||
if tabs:
|
||||
line = convert_tabs(line)
|
||||
if lineends:
|
||||
line = convert_lineends(line)
|
||||
if trails:
|
||||
line = trail_spaces(line)
|
||||
processed_data.append(line)
|
||||
|
||||
with open(file,'w') as fw:
|
||||
fw.writelines(processed_data)
|
||||
|
||||
#findfiles: This method finds all the code files present in current
|
||||
# directory and subdirectories.
|
||||
|
||||
def findfiles(tabs,lineends,trails,verbose):
|
||||
testfiles = []
|
||||
for root, dirs, files in os.walk("./"):
|
||||
for file in files:
|
||||
for extn in ('.c','.cpp','.h','.hpp'):
|
||||
if file.endswith(extn):
|
||||
testfiles.append(os.path.join(root, file))
|
||||
for file in testfiles:
|
||||
processfile(file,tabs,lineends,trails,verbose)
|
||||
|
||||
# Main function
|
||||
|
||||
def main():
|
||||
|
||||
parser = OptionParser()
|
||||
parser.add_option("--notabs", dest="tabs", action="store_false", default=True, help="Disable converting tabs to 4 spaces.")
|
||||
parser.add_option("--notrails", dest="trails", action="store_false", default=True, help="Disable removing trailing whitespaces and trailing tabs.")
|
||||
parser.add_option("--nolineends", dest="lineends", action="store_false", default=True, help=" Disable converting line endings to Unix from DOS.")
|
||||
parser.add_option("--verbose", dest="verbose", action="store_true", default=False, help="Prints out the files being processed.")
|
||||
parser.add_option("--git", dest="SHA1", default="", help="Processes only the files present in the particular <SHA1> commit.")
|
||||
parser.add_option('-o', action="store", default=True, help="Default: All the code files (.c,.cpp,.h,.hpp) in the current directory and subdirectories will be processed")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if options.SHA1:
|
||||
pl = Popen(["git","show", "--pretty=format:", "--name-only",options.SHA1], stdout=PIPE)
|
||||
cmdout = pl.communicate()[0]
|
||||
gitout=cmdout.split("\n")
|
||||
for file in gitout:
|
||||
print file
|
||||
if file:
|
||||
processfile(file,options.tabs,options.lineends,options.trails,options.verbose)
|
||||
|
||||
|
||||
if not options.SHA1:
|
||||
findfiles(options.tabs,options.lineends,options.trails,options.verbose)
|
||||
|
||||
# start the process by calling main
|
||||
main()
|
||||
@@ -30,9 +30,9 @@
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <GL/glew.h>
|
||||
#include <GL/glew.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/glext.h>
|
||||
#ifdef _WIN32
|
||||
#include <GL/glut.h>
|
||||
#else
|
||||
@@ -48,5 +48,5 @@
|
||||
#endif
|
||||
|
||||
|
||||
#endif // __gl_headers_h
|
||||
#endif // __gl_headers_h
|
||||
|
||||
|
||||
@@ -121,85 +121,85 @@ extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
|
||||
|
||||
class glBufferWrapper
|
||||
{
|
||||
public:
|
||||
glBufferWrapper() { mBuffer = 0; }
|
||||
glBufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
|
||||
public:
|
||||
glBufferWrapper() { mBuffer = 0; }
|
||||
glBufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
|
||||
|
||||
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glTextureWrapper
|
||||
{
|
||||
public:
|
||||
glTextureWrapper() { mHandle = 0; }
|
||||
glTextureWrapper( GLuint b ) { mHandle = b; }
|
||||
~glTextureWrapper() {
|
||||
public:
|
||||
glTextureWrapper() { mHandle = 0; }
|
||||
glTextureWrapper( GLuint b ) { mHandle = b; }
|
||||
~glTextureWrapper() {
|
||||
if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
|
||||
}
|
||||
|
||||
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
|
||||
operator GLuint() { return mHandle; }
|
||||
operator GLuint *() { return &mHandle; }
|
||||
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
|
||||
operator GLuint() { return mHandle; }
|
||||
operator GLuint *() { return &mHandle; }
|
||||
|
||||
GLuint * operator&() { return &mHandle; }
|
||||
GLuint * operator&() { return &mHandle; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mHandle == rhs; }
|
||||
bool operator==( GLuint rhs ) { return mHandle == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
// The texture handle.
|
||||
GLuint mHandle;
|
||||
GLuint mHandle;
|
||||
};
|
||||
|
||||
class glRenderbufferWrapper
|
||||
{
|
||||
public:
|
||||
glRenderbufferWrapper() { mBuffer = 0; }
|
||||
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
|
||||
public:
|
||||
glRenderbufferWrapper() { mBuffer = 0; }
|
||||
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glFramebufferWrapper
|
||||
{
|
||||
public:
|
||||
glFramebufferWrapper() { mBuffer = 0; }
|
||||
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
|
||||
public:
|
||||
glFramebufferWrapper() { mBuffer = 0; }
|
||||
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
|
||||
@@ -229,7 +229,7 @@ extern void * CreateGLTexture1D(size_t width,
|
||||
extern void * CreateGLTexture2D( size_t width, size_t height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
|
||||
@@ -245,18 +245,18 @@ extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GL
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer );
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer );
|
||||
|
||||
extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer,
|
||||
int *outError, MTdata d, bool allocateMem );
|
||||
ExplicitType type,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer,
|
||||
int *outError, MTdata d, bool allocateMem );
|
||||
|
||||
extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
|
||||
GLenum attachment, GLenum glFormat,
|
||||
|
||||
@@ -32,15 +32,15 @@
|
||||
|
||||
class GLEnvironment
|
||||
{
|
||||
public:
|
||||
GLEnvironment() {}
|
||||
virtual ~GLEnvironment() {}
|
||||
public:
|
||||
GLEnvironment() {}
|
||||
virtual ~GLEnvironment() {}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
|
||||
virtual cl_context CreateCLContext( void ) = 0;
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
|
||||
virtual cl_context CreateCLContext( void ) = 0;
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
|
||||
|
||||
static GLEnvironment * Instance( void );
|
||||
static GLEnvironment * Instance( void );
|
||||
|
||||
|
||||
};
|
||||
|
||||
@@ -19,14 +19,14 @@
|
||||
|
||||
class OSXGLEnvironment : public GLEnvironment
|
||||
{
|
||||
public:
|
||||
OSXGLEnvironment()
|
||||
{
|
||||
public:
|
||||
OSXGLEnvironment()
|
||||
{
|
||||
mCGLContext = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
{
|
||||
if (!use_opengl_32) {
|
||||
|
||||
// Create a GLUT window to render into
|
||||
@@ -65,10 +65,10 @@ class OSXGLEnvironment : public GLEnvironment
|
||||
CGLSetCurrentContext(mCGLContext);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
int error;
|
||||
|
||||
@@ -135,22 +135,22 @@ class OSXGLEnvironment : public GLEnvironment
|
||||
found_valid_device = 1;
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~OSXGLEnvironment()
|
||||
{
|
||||
CGLDestroyContext( mCGLContext );
|
||||
}
|
||||
virtual ~OSXGLEnvironment()
|
||||
{
|
||||
CGLDestroyContext( mCGLContext );
|
||||
}
|
||||
|
||||
CGLContextObj mCGLContext;
|
||||
CGLContextObj mCGLContext;
|
||||
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static OSXGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new OSXGLEnvironment();
|
||||
return env;
|
||||
static OSXGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new OSXGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
|
||||
@@ -197,8 +197,8 @@ public:
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static WGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new WGLEnvironment();
|
||||
return env;
|
||||
static WGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new WGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_platform_id platform;
|
||||
cl_platform_id platform;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
@@ -115,8 +115,8 @@ public:
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static X11GLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new X11GLEnvironment();
|
||||
return env;
|
||||
static X11GLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new X11GLEnvironment();
|
||||
return env;
|
||||
}
|
||||
|
||||
@@ -32,6 +32,9 @@
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/errno.h>
|
||||
#ifdef __linux__
|
||||
#include <sched.h>
|
||||
#endif
|
||||
#endif // !_WIN32
|
||||
|
||||
// declarations
|
||||
@@ -251,7 +254,6 @@ void *ThreadPool_WorkerFunc( void *p )
|
||||
{
|
||||
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
|
||||
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
||||
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
|
||||
|
||||
while( MAX_COUNT > item )
|
||||
@@ -444,7 +446,6 @@ void ThreadPool_Init(void)
|
||||
// Check for manual override of multithreading code. We add this for better debuggability.
|
||||
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
|
||||
{
|
||||
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
@@ -458,7 +459,9 @@ void ThreadPool_Init(void)
|
||||
|
||||
GetLogicalProcessorInformation( NULL, &length );
|
||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
|
||||
if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
||||
if( buffer != NULL )
|
||||
{
|
||||
if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
||||
{
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
||||
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
|
||||
@@ -475,6 +478,7 @@ void ThreadPool_Init(void)
|
||||
}
|
||||
++ptr;
|
||||
}
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
#elif defined (__MINGW32__)
|
||||
@@ -484,6 +488,20 @@ void ThreadPool_Init(void)
|
||||
GetSystemInfo( &sysinfo );
|
||||
gThreadCount = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#elif defined (__linux__) && !defined(__ANDROID__)
|
||||
cpu_set_t affinity;
|
||||
if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
|
||||
{
|
||||
#if !(defined(CPU_COUNT))
|
||||
gThreadCount = 1;
|
||||
#else
|
||||
gThreadCount = CPU_COUNT(&affinity);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||
}
|
||||
#else // !_WIN32
|
||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||
#endif // !_WIN32
|
||||
@@ -493,6 +511,18 @@ void ThreadPool_Init(void)
|
||||
gThreadCount = 2;
|
||||
}
|
||||
|
||||
// When working in 32 bit limit the thread number to 12
|
||||
// This fix was made due to memory issues in integer_ops test
|
||||
// When running integer_ops, the test opens as many threads as the
|
||||
// machine has and each thread allocates a fixed amount of memory
|
||||
// When running this test on dual socket machine in 32-bit, the
|
||||
// process memory is not sufficient and the test fails
|
||||
#if defined(_WIN32) && !defined(_M_X64)
|
||||
if (gThreadCount > 12) {
|
||||
gThreadCount = 12;
|
||||
}
|
||||
#endif
|
||||
|
||||
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
|
||||
if( gThreadCount < 2 )
|
||||
{
|
||||
@@ -532,6 +562,7 @@ void ThreadPool_Init(void)
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
gRunning = gThreadCount;
|
||||
// init threads
|
||||
for( i = 0; i < gThreadCount; i++ )
|
||||
{
|
||||
@@ -745,6 +776,7 @@ cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
gUserInfo = userInfo;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
ResetEvent(caller_event);
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
|
||||
@@ -17,27 +17,51 @@
|
||||
#define _COMPAT_H_
|
||||
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
|
||||
#include <Windows.h>
|
||||
#include <Winbase.h>
|
||||
#include <CL/cl.h>
|
||||
#include <float.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
||||
|
||||
#define isfinite(x) _finite(x)
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#define inline
|
||||
|
||||
#else
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define EXTERN_C extern "C"
|
||||
#else
|
||||
#define EXTERN_C
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// stdlib.h
|
||||
//
|
||||
|
||||
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
|
||||
|
||||
// llabs appeared in MS C v16 (VS 10/2010).
|
||||
#if defined( _MSC_VER ) && _MSC_VER <= 1500
|
||||
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// stdbool.h
|
||||
//
|
||||
|
||||
// stdbool.h appeared in MS C v18 (VS 12/2013).
|
||||
#if defined( _MSC_VER ) && MSC_VER <= 1700
|
||||
#if !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#define true 1
|
||||
#define false 0
|
||||
#endif
|
||||
#else
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// stdint.h
|
||||
//
|
||||
|
||||
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
|
||||
#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
|
||||
typedef unsigned char uint8_t;
|
||||
typedef char int8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
@@ -46,25 +70,83 @@ typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
|
||||
#define MAXPATHLEN MAX_PATH
|
||||
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
#else
|
||||
#ifndef __STDC_LIMIT_MACROS
|
||||
#define __STDC_LIMIT_MACROS
|
||||
#endif
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||
//#define NAN (INFINITY | 1)
|
||||
//const static int PINFBITPATT_SP32 = INFINITY;
|
||||
|
||||
//
|
||||
// float.h
|
||||
//
|
||||
|
||||
#include <float.h>
|
||||
|
||||
|
||||
|
||||
//
|
||||
// fenv.h
|
||||
//
|
||||
|
||||
// fenv.h appeared in MS C v18 (VS 12/2013).
|
||||
#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
|
||||
// reimplement fenv.h because windows doesn't have it
|
||||
#define FE_INEXACT 0x0020
|
||||
#define FE_UNDERFLOW 0x0010
|
||||
#define FE_OVERFLOW 0x0008
|
||||
#define FE_DIVBYZERO 0x0004
|
||||
#define FE_INVALID 0x0001
|
||||
#define FE_ALL_EXCEPT 0x003D
|
||||
int fetestexcept(int excepts);
|
||||
int feclearexcept(int excepts);
|
||||
#else
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// math.h
|
||||
//
|
||||
|
||||
#if defined( __INTEL_COMPILER )
|
||||
#include <mathimf.h>
|
||||
#else
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846264338327950288
|
||||
#endif
|
||||
|
||||
#if ! defined( __INTEL_COMPILER )
|
||||
|
||||
#ifndef NAN
|
||||
#define NAN (INFINITY - INFINITY)
|
||||
#endif
|
||||
#ifndef HUGE_VALF
|
||||
#define HUGE_VALF (float)HUGE_VAL
|
||||
#endif
|
||||
#ifndef INFINITY
|
||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||
#endif
|
||||
#ifndef isfinite
|
||||
#define isfinite(x) _finite(x)
|
||||
#endif
|
||||
#ifndef isnan
|
||||
#define isnan( x ) ((x) != (x))
|
||||
#endif
|
||||
#ifndef isinf
|
||||
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
||||
#endif
|
||||
|
||||
double rint( double x);
|
||||
float rintf( float x);
|
||||
@@ -98,27 +180,6 @@ long double remquol( long double x, long double y, int *quo);
|
||||
|
||||
long double scalblnl(long double x, long n);
|
||||
|
||||
inline long long
|
||||
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||
|
||||
|
||||
// end of math functions
|
||||
|
||||
uint64_t ReadTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
|
||||
#define sleep(X) Sleep(1000*X)
|
||||
#define snprintf sprintf_s
|
||||
//#define hypotl _hypot
|
||||
|
||||
float make_nan();
|
||||
float nanf( const char* str);
|
||||
double nan( const char* str);
|
||||
long double nanl( const char* str);
|
||||
|
||||
//#if defined USE_BOOST
|
||||
//#include <boost/math/tr1.hpp>
|
||||
//double hypot(double x, double y);
|
||||
float hypotf(float x, float y);
|
||||
long double hypotl(long double x, long double y) ;
|
||||
double lgamma(double x);
|
||||
@@ -143,58 +204,190 @@ double round(double x);
|
||||
float roundf(float x);
|
||||
long double roundl(long double x);
|
||||
|
||||
int signbit(double x);
|
||||
int signbitf(float x);
|
||||
int cf_signbit(double x);
|
||||
int cf_signbitf(float x);
|
||||
|
||||
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); }
|
||||
//#endif // USE_BOOST
|
||||
// Added in _MSC_VER == 1800 (Visual Studio 2013)
|
||||
#if _MSC_VER < 1800
|
||||
static int signbit(double x) { return cf_signbit(x); }
|
||||
#endif
|
||||
static int signbitf(float x) { return cf_signbitf(x); }
|
||||
|
||||
long int lrint (double flt);
|
||||
long int lrintf (float flt);
|
||||
|
||||
|
||||
float int2float (int32_t ix);
|
||||
int32_t float2int (float fx);
|
||||
|
||||
#endif
|
||||
|
||||
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
|
||||
// These functions appeared in Intel C v13.
|
||||
float nanf( const char* str);
|
||||
double nan( const char* str);
|
||||
long double nanl( const char* str);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( __ANDROID__ )
|
||||
#define log2(X) (log(X)/log(2))
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// stdio.h
|
||||
//
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
|
||||
#if _MSC_VER < 1900
|
||||
#define snprintf sprintf_s
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// unistd.h
|
||||
//
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
EXTERN_C unsigned int sleep( unsigned int sec );
|
||||
EXTERN_C int usleep( int usec );
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//
|
||||
// syscall.h
|
||||
//
|
||||
|
||||
#if defined( __ANDROID__ )
|
||||
// Android bionic's isn't providing SYS_sysctl wrappers.
|
||||
#define SYS__sysctl __NR__sysctl
|
||||
#elif defined( __aarch64__ )
|
||||
// Enable deprecated syscalls on arm 64-bit.
|
||||
#define __ARCH_WANT_SYSCALL_DEPRECATED
|
||||
// And use the NR variant of syscall too.
|
||||
#define SYS__sysctl __NR__sysctl
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Some tests use _malloca which defined in malloc.h.
|
||||
#if !defined (__APPLE__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// ???
|
||||
//
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
|
||||
#define MAXPATHLEN _MAX_PATH
|
||||
|
||||
EXTERN_C uint64_t ReadTime( void );
|
||||
EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
int __builtin_clz(unsigned int pattern);
|
||||
|
||||
|
||||
static const double zero= 0.00000000000000000000e+00;
|
||||
#define NAN (INFINITY - INFINITY)
|
||||
#define HUGE_VALF (float)HUGE_VAL
|
||||
|
||||
int usleep(int usec);
|
||||
|
||||
// reimplement fenv.h because windows doesn't have it
|
||||
#define FE_INEXACT 0x0020
|
||||
#define FE_UNDERFLOW 0x0010
|
||||
#define FE_OVERFLOW 0x0008
|
||||
#define FE_DIVBYZERO 0x0004
|
||||
#define FE_INVALID 0x0001
|
||||
#define FE_ALL_EXCEPT 0x003D
|
||||
|
||||
int fetestexcept(int excepts);
|
||||
int feclearexcept(int excepts);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#else // !((defined(_WIN32) && defined(_MSC_VER)
|
||||
#if defined(__MINGW32__)
|
||||
#include <windows.h>
|
||||
#define sleep(X) Sleep(1000*X)
|
||||
EXTERN_C int __builtin_clz(unsigned int pattern);
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef MIN
|
||||
#define MIN(x,y) (((x)<(y))?(x):(y))
|
||||
#endif
|
||||
#ifndef MAX
|
||||
#define MAX(x,y) (((x)>(y))?(x):(y))
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
------------------------------------------------------------------------------------------------
|
||||
WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
|
||||
|
||||
This is a typical usage of the macros:
|
||||
|
||||
double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
|
||||
|
||||
(taken from math_brute_force/reference_math.c). There are two problems:
|
||||
|
||||
1. There is an error here. On Windows in will produce incorrect result
|
||||
`0x1.5555555555555p+50'. To have a correct result it should be written as
|
||||
`MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
|
||||
third argument is not obvious -- sometimes it should be the same as exponent of the
|
||||
first argument, but sometimes not.
|
||||
|
||||
2. Information is duplicated. It is easy to make a mistake.
|
||||
|
||||
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
|
||||
------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
||||
|
||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
||||
|
||||
#else
|
||||
|
||||
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
|
||||
#define MAKE_HEX_FLOAT(x,y,z) x
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) x
|
||||
#define MAKE_HEX_LONG(x,y,z) x
|
||||
|
||||
#endif // !((defined(_WIN32) && defined(_MSC_VER)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
------------------------------------------------------------------------------------------------
|
||||
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
|
||||
double respectively. Arguments:
|
||||
|
||||
sm -- sign of number,
|
||||
int -- integer part of mantissa (without `0x' prefix),
|
||||
fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
|
||||
se -- sign of exponent,
|
||||
exp -- absolute value of (binary) exponent.
|
||||
|
||||
Example:
|
||||
|
||||
double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
|
||||
|
||||
Note:
|
||||
|
||||
We have to pass signs as separate arguments because gcc pass negative integer values
|
||||
(e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
|
||||
`0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
|
||||
literal.
|
||||
------------------------------------------------------------------------------------------------
|
||||
*/
|
||||
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
||||
// If compiler does not support hex floating point literals:
|
||||
#define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) )
|
||||
#define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) )
|
||||
#define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
|
||||
#else
|
||||
// If compiler supports hex floating point literals: just concatenate all the parts into a literal.
|
||||
#define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
|
||||
#define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
|
||||
#define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
|
||||
#endif
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
#include <Windows.h>
|
||||
#define sleep(sec) Sleep((sec) * 1000)
|
||||
#endif
|
||||
|
||||
#endif // _COMPAT_H_
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,15 +16,14 @@
|
||||
#ifndef _conversions_h
|
||||
#define _conversions_h
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
#include "errorHelpers.h"
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include "compat.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
@@ -34,41 +33,41 @@ extern "C" {
|
||||
|
||||
enum ExplicitTypes
|
||||
{
|
||||
kBool = 0,
|
||||
kChar,
|
||||
kUChar,
|
||||
kUnsignedChar,
|
||||
kShort,
|
||||
kUShort,
|
||||
kUnsignedShort,
|
||||
kInt,
|
||||
kUInt,
|
||||
kUnsignedInt,
|
||||
kLong,
|
||||
kULong,
|
||||
kUnsignedLong,
|
||||
kFloat,
|
||||
kHalf,
|
||||
kDouble,
|
||||
kNumExplicitTypes
|
||||
kBool = 0,
|
||||
kChar,
|
||||
kUChar,
|
||||
kUnsignedChar,
|
||||
kShort,
|
||||
kUShort,
|
||||
kUnsignedShort,
|
||||
kInt,
|
||||
kUInt,
|
||||
kUnsignedInt,
|
||||
kLong,
|
||||
kULong,
|
||||
kUnsignedLong,
|
||||
kFloat,
|
||||
kHalf,
|
||||
kDouble,
|
||||
kNumExplicitTypes
|
||||
};
|
||||
|
||||
typedef enum ExplicitTypes ExplicitType;
|
||||
typedef enum ExplicitTypes ExplicitType;
|
||||
|
||||
enum RoundingTypes
|
||||
{
|
||||
kRoundToEven = 0,
|
||||
kRoundToZero,
|
||||
kRoundToPosInf,
|
||||
kRoundToNegInf,
|
||||
kRoundToNearest,
|
||||
kRoundToEven = 0,
|
||||
kRoundToZero,
|
||||
kRoundToPosInf,
|
||||
kRoundToNegInf,
|
||||
kRoundToNearest,
|
||||
|
||||
kNumRoundingTypes,
|
||||
kNumRoundingTypes,
|
||||
|
||||
kDefaultRoundingType = kRoundToNearest
|
||||
kDefaultRoundingType = kRoundToNearest
|
||||
};
|
||||
|
||||
typedef enum RoundingTypes RoundingType;
|
||||
typedef enum RoundingTypes RoundingType;
|
||||
|
||||
extern void print_type_to_string(ExplicitType type, void *data, char* string);
|
||||
extern size_t get_explicit_type_size( ExplicitType type );
|
||||
@@ -76,7 +75,7 @@ extern const char * get_explicit_type_name( ExplicitType type );
|
||||
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
|
||||
|
||||
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
|
||||
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
|
||||
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
|
||||
|
||||
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
|
||||
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
|
||||
|
||||
@@ -27,21 +27,21 @@
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
const char *IGetErrorString( int clErrorCode )
|
||||
const char *IGetErrorString( int clErrorCode )
|
||||
{
|
||||
switch( clErrorCode )
|
||||
{
|
||||
case CL_SUCCESS: return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_SUCCESS: return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
@@ -51,37 +51,37 @@ const char *IGetErrorString( int clErrorCode )
|
||||
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
|
||||
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
|
||||
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
|
||||
@@ -105,9 +105,9 @@ const char *GetChannelOrderName( cl_channel_order order )
|
||||
case CL_RGx: return "CL_RGx";
|
||||
case CL_RGB: return "CL_RGB";
|
||||
case CL_RGBx: return "CL_RGBx";
|
||||
case CL_RGBA: return "CL_RGBA";
|
||||
case CL_ARGB: return "CL_ARGB";
|
||||
case CL_BGRA: return "CL_BGRA";
|
||||
case CL_RGBA: return "CL_RGBA";
|
||||
case CL_ARGB: return "CL_ARGB";
|
||||
case CL_BGRA: return "CL_BGRA";
|
||||
case CL_INTENSITY: return "CL_INTENSITY";
|
||||
case CL_LUMINANCE: return "CL_LUMINANCE";
|
||||
#if defined CL_1RGB_APPLE
|
||||
@@ -116,7 +116,7 @@ const char *GetChannelOrderName( cl_channel_order order )
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
|
||||
#endif
|
||||
default: return NULL;
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -210,12 +210,12 @@ const char *GetAddressModeName( cl_addressing_mode mode )
|
||||
{
|
||||
switch( mode )
|
||||
{
|
||||
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
||||
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
||||
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
|
||||
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
|
||||
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
|
||||
default: return NULL;
|
||||
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
||||
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
||||
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
|
||||
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
|
||||
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -223,11 +223,11 @@ const char *GetDeviceTypeName( cl_device_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
||||
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
|
||||
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
|
||||
default: return NULL;
|
||||
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
||||
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
|
||||
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -510,7 +510,7 @@ float Ulp_Error_Double( double test, long double reference )
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
return result;
|
||||
}
|
||||
|
||||
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
|
||||
|
||||
@@ -47,7 +47,7 @@ extern "C" {
|
||||
#define test_start()
|
||||
#define log_info printf
|
||||
#define log_error printf
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better", _number )
|
||||
#define test_finish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
@@ -73,9 +73,9 @@ extern "C" {
|
||||
#define ct_assert_i(b, line) ct_assert_ii(b, line)
|
||||
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
|
||||
|
||||
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
|
||||
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
|
||||
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
|
||||
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
|
||||
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
|
||||
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
|
||||
|
||||
// expected error code vs. what we got
|
||||
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
@@ -85,7 +85,7 @@ extern "C" {
|
||||
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
|
||||
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
|
||||
extern const char *IGetErrorString( int clErrorCode );
|
||||
extern const char *IGetErrorString( int clErrorCode );
|
||||
|
||||
extern float Ulp_Error_Half( cl_ushort test, float reference );
|
||||
extern float Ulp_Error( float test, double reference );
|
||||
|
||||
@@ -25,7 +25,7 @@
|
||||
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
||||
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
||||
typedef int FPU_mode_type;
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
@@ -45,6 +45,12 @@
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined (__aarch64__)
|
||||
unsigned fpcr;
|
||||
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
|
||||
*mode = fpcr;
|
||||
__asm__ volatile ("msr fpcr, %0" :: "r"(fpcr | (1U << 24)));
|
||||
#else
|
||||
#error ForceFTZ needs an implentation
|
||||
#endif
|
||||
@@ -64,6 +70,12 @@
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
||||
// Add 64 bit support
|
||||
#elif defined (__aarch64__)
|
||||
unsigned fpcr;
|
||||
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
|
||||
*mode = fpcr;
|
||||
__asm__ volatile ("msr fpcr, %0" :: "r"(fpcr & ~(1U << 24)));
|
||||
#else
|
||||
#error DisableFTZ needs an implentation
|
||||
#endif
|
||||
@@ -78,6 +90,9 @@
|
||||
fpu_control = *mode;
|
||||
#elif defined (__arm__)
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
||||
// Add 64 bit support
|
||||
#elif defined (__aarch64__)
|
||||
__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
|
||||
#else
|
||||
#error RestoreFPState needs an implementation
|
||||
#endif
|
||||
|
||||
@@ -23,8 +23,8 @@
|
||||
|
||||
void * genericThread::IStaticReflector( void * data )
|
||||
{
|
||||
genericThread *t = (genericThread *)data;
|
||||
return t->IRun();
|
||||
genericThread *t = (genericThread *)data;
|
||||
return t->IRun();
|
||||
}
|
||||
|
||||
bool genericThread::Start( void )
|
||||
@@ -33,8 +33,8 @@ bool genericThread::Start( void )
|
||||
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
|
||||
return ( mHandle != NULL );
|
||||
#else // !_WIN32
|
||||
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
|
||||
return ( error == 0 );
|
||||
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
|
||||
return ( error == 0 );
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
@@ -44,10 +44,10 @@ void * genericThread::Join( void )
|
||||
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
|
||||
return NULL;
|
||||
#else // !_WIN32
|
||||
void * retVal;
|
||||
int error = pthread_join( (pthread_t)mHandle, &retVal );
|
||||
if( error != 0 )
|
||||
retVal = NULL;
|
||||
return retVal;
|
||||
void * retVal;
|
||||
int error = pthread_join( (pthread_t)mHandle, &retVal );
|
||||
if( error != 0 )
|
||||
retVal = NULL;
|
||||
return retVal;
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
@@ -20,22 +20,22 @@
|
||||
|
||||
class genericThread
|
||||
{
|
||||
public:
|
||||
public:
|
||||
|
||||
virtual ~genericThread() {}
|
||||
virtual ~genericThread() {}
|
||||
|
||||
bool Start( void );
|
||||
void * Join( void );
|
||||
bool Start( void );
|
||||
void * Join( void );
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
virtual void * IRun( void ) = 0;
|
||||
virtual void * IRun( void ) = 0;
|
||||
|
||||
private:
|
||||
private:
|
||||
|
||||
void* mHandle;
|
||||
void* mHandle;
|
||||
|
||||
static void * IStaticReflector( void * data );
|
||||
static void * IStaticReflector( void * data );
|
||||
};
|
||||
|
||||
#endif // _genericThread_h
|
||||
|
||||
@@ -188,40 +188,40 @@ size_t get_pixel_size( cl_image_format *format )
|
||||
|
||||
int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
|
||||
{
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Make sure each image format is supported */
|
||||
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
|
||||
/* Make sure each image format is supported */
|
||||
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
|
||||
return error;
|
||||
|
||||
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
|
||||
{
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
|
||||
{
|
||||
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
|
||||
{
|
||||
*outFormat = formatList[ i ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
|
||||
{
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Make sure each image format is supported */
|
||||
@@ -231,10 +231,10 @@ int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
formatList[ i ].image_channel_data_type == CL_FLOAT ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
|
||||
if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
formatList[ i ].image_channel_data_type == CL_FLOAT ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
|
||||
{
|
||||
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
|
||||
{
|
||||
@@ -242,8 +242,8 @@ int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,20 +23,20 @@
|
||||
|
||||
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
/* Create the program object from source */
|
||||
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
||||
if( *outProgram == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "clCreateProgramWithSource failed" );
|
||||
return error;
|
||||
}
|
||||
/* Create the program object from source */
|
||||
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
||||
if( *outProgram == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "clCreateProgramWithSource failed" );
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
/* Compile the program */
|
||||
int buildProgramFailed = 0;
|
||||
int printedSource = 0;
|
||||
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
|
||||
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
unsigned int i;
|
||||
@@ -53,7 +53,7 @@ int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
||||
return error;
|
||||
return error;
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
@@ -111,9 +111,9 @@ int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_
|
||||
if (error != CL_SUCCESS || log[0]=='\0'){
|
||||
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
free( devices );
|
||||
return error;
|
||||
return error;
|
||||
} else {
|
||||
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
||||
free( devices );
|
||||
@@ -137,17 +137,17 @@ int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_
|
||||
}
|
||||
}
|
||||
|
||||
/* And create a kernel from it */
|
||||
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
||||
if( *outKernel == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "Unable to create kernel" );
|
||||
/* And create a kernel from it */
|
||||
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
||||
if( *outKernel == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "Unable to create kernel" );
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
free( devices );
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_device_version( cl_device_id id, size_t* major, size_t* minor)
|
||||
@@ -172,40 +172,40 @@ int get_device_version( cl_device_id id, size_t* major, size_t* minor)
|
||||
|
||||
int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
|
||||
{
|
||||
cl_device_id *devices;
|
||||
size_t size, maxCommonSize = 0;
|
||||
int numDevices, i, j, error;
|
||||
cl_device_id *devices;
|
||||
size_t size, maxCommonSize = 0;
|
||||
int numDevices, i, j, error;
|
||||
cl_uint numDims;
|
||||
size_t outSize;
|
||||
size_t outSize;
|
||||
size_t sizeLimit[]={1,1,1};
|
||||
|
||||
|
||||
/* Assume fewer than 16 devices will be returned */
|
||||
/* Assume fewer than 16 devices will be returned */
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
|
||||
test_error( error, "Unable to obtain list of devices size for context" );
|
||||
devices = (cl_device_id *)malloc(outSize);
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
|
||||
test_error( error, "Unable to obtain list of devices for context" );
|
||||
|
||||
numDevices = (int)( outSize / sizeof( cl_device_id ) );
|
||||
numDevices = (int)( outSize / sizeof( cl_device_id ) );
|
||||
|
||||
for( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
for( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
||||
sizeLimit[0] = 1;
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
if (outLimits != NULL)
|
||||
{
|
||||
@@ -219,51 +219,51 @@ int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(devices);
|
||||
}
|
||||
free(devices);
|
||||
|
||||
*outMaxSize = (unsigned int)maxCommonSize;
|
||||
return 0;
|
||||
*outMaxSize = (unsigned int)maxCommonSize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t globalThreadSize, size_t *outMaxSize )
|
||||
size_t globalThreadSize, size_t *outMaxSize )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
|
||||
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
|
||||
the modulo test will succeed and break the loop anyway */
|
||||
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
|
||||
;
|
||||
return 0;
|
||||
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
|
||||
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
|
||||
the modulo test will succeed and break the loop anyway */
|
||||
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
|
||||
{
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
@@ -280,30 +280,30 @@ int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
|
||||
{
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
@@ -320,7 +320,7 @@ int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
@@ -395,7 +395,7 @@ int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_ob
|
||||
}
|
||||
|
||||
free( list );
|
||||
return ( i < count ) ? true : false;
|
||||
return ( i < count ) ? 1 : 0;
|
||||
}
|
||||
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
@@ -477,44 +477,44 @@ size_t get_pixel_bytes( const cl_image_format *fmt )
|
||||
|
||||
int verifyImageSupport( cl_device_id device )
|
||||
{
|
||||
if( checkForImageSupport( device ) )
|
||||
{
|
||||
log_error( "ERROR: Device does not supported images as required by this test!\n" );
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
return 0;
|
||||
if( checkForImageSupport( device ) )
|
||||
{
|
||||
log_error( "ERROR: Device does not supported images as required by this test!\n" );
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkForImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkFor3DImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
char profile[128];
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
|
||||
@@ -535,8 +535,8 @@ int checkFor3DImageSupport( cl_device_id device )
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void * align_malloc(size_t size, size_t alignment)
|
||||
@@ -545,8 +545,19 @@ void * align_malloc(size_t size, size_t alignment)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
// alignemnt must be a power of two and multiple of sizeof(void *).
|
||||
if ( alignment < sizeof( void * ) )
|
||||
{
|
||||
alignment = sizeof( void * );
|
||||
}
|
||||
#if defined(__ANDROID__)
|
||||
ptr = memalign(alignment, size);
|
||||
if ( ptr )
|
||||
return ptr;
|
||||
#else
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
#endif
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
@@ -555,6 +566,7 @@ void * align_malloc(size_t size, size_t alignment)
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
@@ -652,33 +664,33 @@ cl_device_fp_config get_default_rounding_mode( cl_device_id device )
|
||||
|
||||
int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
|
||||
{
|
||||
cl_command_queue_properties realProps;
|
||||
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
|
||||
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
|
||||
cl_command_queue_properties realProps;
|
||||
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
|
||||
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
|
||||
|
||||
return ( realProps & prop ) ? 1 : 0;
|
||||
return ( realProps & prop ) ? 1 : 0;
|
||||
}
|
||||
|
||||
int printDeviceHeader( cl_device_id device )
|
||||
{
|
||||
char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
|
||||
int error;
|
||||
int error;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
|
||||
|
||||
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
|
||||
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
|
||||
( error == CL_SUCCESS ) ? cLangVersion : "" );
|
||||
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
|
||||
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
|
||||
( error == CL_SUCCESS ) ? cLangVersion : "" );
|
||||
|
||||
return CL_SUCCESS;
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -107,19 +107,19 @@ size_t get_min_alignment(cl_context context);
|
||||
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
|
||||
|
||||
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
|
||||
if( checkForImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
|
||||
if( checkForImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
|
||||
if( checkFor3DImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
|
||||
if( checkFor3DImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
/* Prints out the standard device header for all tests given the device to print for */
|
||||
extern int printDeviceHeader( cl_device_id device );
|
||||
|
||||
@@ -13,15 +13,18 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
|
||||
#include "compat.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <assert.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
#if defined ( _MSC_VER )
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <CL/cl.h>
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#if ! defined( __INTEL_COMPILER )
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
@@ -387,86 +390,6 @@ long double log2l(long double x)
|
||||
return 1.44269504088896340735992468100189214L * log(x);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// misc functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
// This function is commented out because the Windows implementation should never call munmap.
|
||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||
int munmap(void *addr, size_t len)
|
||||
{
|
||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||
|
||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||
}
|
||||
*/
|
||||
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
LARGE_INTEGER current;
|
||||
QueryPerformanceCounter(¤t);
|
||||
return (uint64_t)current.QuadPart;
|
||||
}
|
||||
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
static double PerformanceFrequency = 0.0;
|
||||
|
||||
if (PerformanceFrequency == 0.0) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
PerformanceFrequency = (double) frequency.QuadPart;
|
||||
}
|
||||
|
||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||
}
|
||||
|
||||
float make_nan()
|
||||
{
|
||||
/* This is the IEEE 754 single-precision format:
|
||||
unsigned int mantissa: 22;
|
||||
unsigned int quiet_nan: 1;
|
||||
unsigned int exponent: 8;
|
||||
unsigned int negative: 1;
|
||||
*/
|
||||
//const static unsigned
|
||||
static const int32_t _nan = 0x7fc00000;
|
||||
return *(const float*)(&_nan);
|
||||
}
|
||||
|
||||
float nanf( const char* str)
|
||||
{
|
||||
cl_uint u = atoi( str );
|
||||
u |= 0x7fc00000U;
|
||||
return *( float*)(&u);
|
||||
}
|
||||
|
||||
|
||||
double nan( const char* str)
|
||||
{
|
||||
cl_ulong u = atoi( str );
|
||||
u |= 0x7ff8000000000000ULL;
|
||||
return *( double*)(&u);
|
||||
}
|
||||
|
||||
// double check this implementatation
|
||||
long double nanl( const char* str)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
||||
}u;
|
||||
u.u.sexp = 0x7fff;
|
||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
||||
|
||||
return u.f;
|
||||
}
|
||||
|
||||
double trunc(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
@@ -589,7 +512,167 @@ long double roundl(long double x)
|
||||
return x;
|
||||
}
|
||||
|
||||
int signbit(double x)
|
||||
float cbrtf( float x )
|
||||
{
|
||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
||||
return copysignf( z, x );
|
||||
}
|
||||
|
||||
double cbrt( double x )
|
||||
{
|
||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
||||
}
|
||||
|
||||
long int lrint (double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
long int lrintf (float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// fenv functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
#if _MSC_VER < 1900
|
||||
int fetestexcept(int excepts)
|
||||
{
|
||||
unsigned int status = _statusfp();
|
||||
return excepts & (
|
||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
||||
);
|
||||
}
|
||||
|
||||
int feclearexcept(int excepts)
|
||||
{
|
||||
_clearfp();
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // __INTEL_COMPILER
|
||||
|
||||
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1300)
|
||||
|
||||
float make_nan()
|
||||
{
|
||||
/* This is the IEEE 754 single-precision format:
|
||||
unsigned int mantissa: 22;
|
||||
unsigned int quiet_nan: 1;
|
||||
unsigned int exponent: 8;
|
||||
unsigned int negative: 1;
|
||||
*/
|
||||
//const static unsigned
|
||||
static const int32_t _nan = 0x7fc00000;
|
||||
return *(const float*)(&_nan);
|
||||
}
|
||||
|
||||
float nanf( const char* str)
|
||||
{
|
||||
cl_uint u = atoi( str );
|
||||
u |= 0x7fc00000U;
|
||||
return *( float*)(&u);
|
||||
}
|
||||
|
||||
|
||||
double nan( const char* str)
|
||||
{
|
||||
cl_ulong u = atoi( str );
|
||||
u |= 0x7ff8000000000000ULL;
|
||||
return *( double*)(&u);
|
||||
}
|
||||
|
||||
// double check this implementatation
|
||||
long double nanl( const char* str)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
||||
}u;
|
||||
u.u.sexp = 0x7fff;
|
||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
||||
|
||||
return u.f;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// misc functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
// This function is commented out because the Windows implementation should never call munmap.
|
||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||
int munmap(void *addr, size_t len)
|
||||
{
|
||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||
|
||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||
}
|
||||
*/
|
||||
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
LARGE_INTEGER current;
|
||||
QueryPerformanceCounter(¤t);
|
||||
return (uint64_t)current.QuadPart;
|
||||
}
|
||||
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
static double PerformanceFrequency = 0.0;
|
||||
|
||||
if (PerformanceFrequency == 0.0) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
PerformanceFrequency = (double) frequency.QuadPart;
|
||||
}
|
||||
|
||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||
}
|
||||
|
||||
int cf_signbit(double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -600,7 +683,7 @@ int signbit(double x)
|
||||
return u.u >> 63;
|
||||
}
|
||||
|
||||
int signbitf(float x)
|
||||
int cf_signbitf(float x)
|
||||
{
|
||||
union
|
||||
{
|
||||
@@ -611,17 +694,6 @@ int signbitf(float x)
|
||||
return u.u >> 31;
|
||||
}
|
||||
|
||||
float cbrtf( float x )
|
||||
{
|
||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
||||
return copysignf( z, x );
|
||||
}
|
||||
|
||||
double cbrt( double x )
|
||||
{
|
||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
||||
}
|
||||
|
||||
float int2float (int32_t ix)
|
||||
{
|
||||
union {
|
||||
@@ -642,7 +714,7 @@ int32_t float2int (float fx)
|
||||
return u.i;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER) && !defined(_WIN64)
|
||||
#if !defined(_WIN64)
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
@@ -682,45 +754,10 @@ int __builtin_clz(unsigned int pattern)
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif //defined(_MSC_VER) && !defined(_WIN64)
|
||||
#endif // !defined(_WIN64)
|
||||
|
||||
#include <intrin.h>
|
||||
#include <emmintrin.h>
|
||||
long int lrint (double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
long int lrintf (float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
int usleep(int usec)
|
||||
{
|
||||
@@ -728,22 +765,10 @@ int usleep(int usec)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fetestexcept(int excepts)
|
||||
unsigned int sleep( unsigned int sec )
|
||||
{
|
||||
unsigned int status = _statusfp();
|
||||
return excepts & (
|
||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
||||
);
|
||||
}
|
||||
|
||||
int feclearexcept(int excepts)
|
||||
{
|
||||
_clearfp();
|
||||
Sleep( sec * 1000 );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif //defined(_WIN32)
|
||||
#endif // defined( _MSC_VER )
|
||||
|
||||
@@ -59,8 +59,14 @@ static void * align_malloc(size_t size, size_t alignment)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
#if defined(__ANDROID__)
|
||||
ptr = memalign(alignment, size);
|
||||
if ( ptr )
|
||||
return ptr;
|
||||
#else
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
#endif
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
|
||||
564
test_common/harness/os_helpers.cpp
Normal file
564
test_common/harness/os_helpers.cpp
Normal file
@@ -0,0 +1,564 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "os_helpers.h"
|
||||
#include "errorHelpers.h"
|
||||
|
||||
// =================================================================================================
|
||||
// C++ interface.
|
||||
// =================================================================================================
|
||||
|
||||
#include <cerrno> // errno, error constants
|
||||
#include <climits> // PATH_MAX
|
||||
#include <cstdlib> // abort, _splitpath, _makepath
|
||||
#include <cstring> // strdup, strerror_r
|
||||
#include <sstream>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#define CHECK_PTR( ptr ) \
|
||||
if ( (ptr) == NULL ) { \
|
||||
abort(); \
|
||||
}
|
||||
|
||||
typedef std::vector< char > buffer_t;
|
||||
|
||||
#if ! defined( PATH_MAX )
|
||||
#define PATH_MAX 1000
|
||||
#endif
|
||||
|
||||
int const _size = PATH_MAX + 1; // Initial buffer size for path.
|
||||
int const _count = 8; // How many times we will try to double buffer size.
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// MacOS X
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
|
||||
#include <mach-o/dyld.h> // _NSGetExecutablePath
|
||||
#include <libgen.h> // dirname
|
||||
|
||||
|
||||
static
|
||||
std::string
|
||||
_err_msg(
|
||||
int err, // Error number (e. g. errno).
|
||||
int level // Nesting level, for avoiding infinite recursion.
|
||||
) {
|
||||
|
||||
/*
|
||||
There are 3 incompatible versions of strerror_r:
|
||||
|
||||
char * strerror_r( int, char *, size_t ); // GNU version
|
||||
int strerror_r( int, char *, size_t ); // BSD version
|
||||
int strerror_r( int, char *, size_t ); // XSI version
|
||||
|
||||
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
||||
|
||||
*/
|
||||
|
||||
// BSD version of strerror_r.
|
||||
buffer_t buffer( 100 );
|
||||
int count = _count;
|
||||
for ( ; ; ) {
|
||||
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
||||
if ( rc == EINVAL ) {
|
||||
// Error code is not recognized, but anyway we got the message.
|
||||
return & buffer.front();
|
||||
} else if ( rc == ERANGE ) {
|
||||
// Buffer is not enough.
|
||||
if ( count > 0 ) {
|
||||
// Enlarge the buffer.
|
||||
-- count;
|
||||
buffer.resize( buffer.size() * 2 );
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
} else if ( rc == 0 ) {
|
||||
// We got the message.
|
||||
return & buffer.front();
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
}; // forever
|
||||
|
||||
} // _err_msg
|
||||
|
||||
|
||||
std::string
|
||||
dir_sep(
|
||||
) {
|
||||
return "/";
|
||||
} // dir_sep
|
||||
|
||||
|
||||
std::string
|
||||
exe_path(
|
||||
) {
|
||||
buffer_t path( _size );
|
||||
int count = _count;
|
||||
for ( ; ; ) {
|
||||
uint32_t size = path.size();
|
||||
int rc = _NSGetExecutablePath( & path.front(), & size );
|
||||
if ( rc == 0 ) {
|
||||
break;
|
||||
}; // if
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
path.resize( size );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
|
||||
(unsigned long) path.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
}; // forever
|
||||
return & path.front();
|
||||
} // exe_path
|
||||
|
||||
|
||||
std::string
|
||||
exe_dir(
|
||||
) {
|
||||
std::string path = exe_path();
|
||||
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
||||
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
||||
return dirname( & buffer.front() );
|
||||
} // exe_dir
|
||||
|
||||
|
||||
#endif // __APPLE__
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// Linux
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined( __linux__ )
|
||||
|
||||
|
||||
#include <cerrno> // errno
|
||||
#include <libgen.h> // dirname
|
||||
#include <unistd.h> // readlink
|
||||
|
||||
|
||||
static
|
||||
std::string
|
||||
_err_msg(
|
||||
int err,
|
||||
int level
|
||||
) {
|
||||
|
||||
/*
|
||||
There are 3 incompatible versions of strerror_r:
|
||||
|
||||
char * strerror_r( int, char *, size_t ); // GNU version
|
||||
int strerror_r( int, char *, size_t ); // BSD version
|
||||
int strerror_r( int, char *, size_t ); // XSI version
|
||||
|
||||
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
||||
|
||||
*/
|
||||
|
||||
#if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
|
||||
|
||||
// XSI version of strerror_r.
|
||||
#warning Not tested!
|
||||
buffer_t buffer( 200 );
|
||||
int count = _count;
|
||||
for ( ; ; ) {
|
||||
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
||||
if ( rc == -1 ) {
|
||||
int _err = errno;
|
||||
if ( _err == ERANGE ) {
|
||||
if ( count > 0 ) {
|
||||
// Enlarge the buffer.
|
||||
-- count;
|
||||
buffer.resize( buffer.size() * 2 );
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
} else {
|
||||
std::stringstream ostr;
|
||||
ostr
|
||||
<< "Error " << err << " "
|
||||
<< "(Getting error message failed: "
|
||||
<< ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
|
||||
<< ")";
|
||||
return ostr.str();
|
||||
}; // if
|
||||
} else {
|
||||
// We got the message.
|
||||
return & buffer.front();
|
||||
}; // if
|
||||
}; // forever
|
||||
|
||||
#else
|
||||
|
||||
// GNU version of strerror_r.
|
||||
char buffer[ 2000 ];
|
||||
return strerror_r( err, buffer, sizeof( buffer ) );
|
||||
|
||||
#endif
|
||||
|
||||
} // _err_msg
|
||||
|
||||
|
||||
std::string
|
||||
dir_sep(
|
||||
) {
|
||||
return "/";
|
||||
} // dir_sep
|
||||
|
||||
|
||||
std::string
|
||||
exe_path(
|
||||
) {
|
||||
|
||||
static std::string const exe = "/proc/self/exe";
|
||||
|
||||
buffer_t path( _size );
|
||||
int count = _count; // Max number of iterations.
|
||||
|
||||
for ( ; ; ) {
|
||||
|
||||
ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
|
||||
|
||||
if ( len < 0 ) {
|
||||
// Oops.
|
||||
int err = errno;
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Reading symlink `%s' failed: %s\n",
|
||||
exe.c_str(), err_msg( err ).c_str()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
if ( len < path.size() ) {
|
||||
// We got the path.
|
||||
path.resize( len );
|
||||
break;
|
||||
}; // if
|
||||
|
||||
// Oops, buffer is too small.
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
// Enlarge the buffer.
|
||||
path.resize( path.size() * 2 );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
|
||||
exe.c_str(),
|
||||
(unsigned long) path.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
}; // forever
|
||||
|
||||
return std::string( & path.front(), path.size() );
|
||||
|
||||
} // exe_path
|
||||
|
||||
|
||||
std::string
|
||||
exe_dir(
|
||||
) {
|
||||
std::string path = exe_path();
|
||||
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
||||
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
||||
return dirname( & buffer.front() );
|
||||
} // exe_dir
|
||||
|
||||
#endif // __linux__
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// MS Windows
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#if defined( _WIN32 )
|
||||
|
||||
|
||||
#include <windows.h>
|
||||
#if defined( max )
|
||||
#undef max
|
||||
#endif
|
||||
|
||||
#include <cctype>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
static
|
||||
std::string
|
||||
_err_msg(
|
||||
int err,
|
||||
int level
|
||||
) {
|
||||
|
||||
std::string msg;
|
||||
|
||||
LPSTR buffer = NULL;
|
||||
DWORD flags =
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS;
|
||||
|
||||
DWORD len =
|
||||
FormatMessageA(
|
||||
flags,
|
||||
NULL,
|
||||
err,
|
||||
LANG_USER_DEFAULT,
|
||||
reinterpret_cast< LPSTR >( & buffer ),
|
||||
0,
|
||||
NULL
|
||||
);
|
||||
|
||||
if ( buffer == NULL || len == 0 ) {
|
||||
|
||||
int _err = GetLastError();
|
||||
char str[1024] = { 0 };
|
||||
snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
|
||||
msg = std::string(str);
|
||||
|
||||
} else {
|
||||
|
||||
// Trim trailing whitespace (including `\r' and `\n').
|
||||
while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
|
||||
-- len;
|
||||
}; // while
|
||||
|
||||
// Drop trailing full stop.
|
||||
if ( len > 0 && buffer[ len - 1 ] == '.' ) {
|
||||
-- len;
|
||||
}; // if
|
||||
|
||||
msg.assign( buffer, len );
|
||||
|
||||
}; //if
|
||||
|
||||
if ( buffer != NULL ) {
|
||||
LocalFree( buffer );
|
||||
}; // if
|
||||
|
||||
return msg;
|
||||
|
||||
} // _get_err_msg
|
||||
|
||||
|
||||
std::string
|
||||
dir_sep(
|
||||
) {
|
||||
return "\\";
|
||||
} // dir_sep
|
||||
|
||||
|
||||
std::string
|
||||
exe_path(
|
||||
) {
|
||||
|
||||
buffer_t path( _size );
|
||||
int count = _count;
|
||||
|
||||
for ( ; ; ) {
|
||||
|
||||
DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
|
||||
|
||||
if ( len == 0 ) {
|
||||
int err = GetLastError();
|
||||
log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
if ( len < path.size() ) {
|
||||
path.resize( len );
|
||||
break;
|
||||
}; // if
|
||||
|
||||
// Buffer too small.
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
path.resize( path.size() * 2 );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Buffer of %lu bytes is still too small\n",
|
||||
(unsigned long) path.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
|
||||
}; // forever
|
||||
|
||||
return std::string( & path.front(), path.size() );
|
||||
|
||||
} // exe_path
|
||||
|
||||
|
||||
std::string
|
||||
exe_dir(
|
||||
) {
|
||||
|
||||
std::string exe = exe_path();
|
||||
int count = 0;
|
||||
|
||||
// Splitting path into components.
|
||||
buffer_t drv( _MAX_DRIVE );
|
||||
buffer_t dir( _MAX_DIR );
|
||||
count = _count;
|
||||
#if defined(_MSC_VER)
|
||||
for ( ; ; ) {
|
||||
int rc =
|
||||
_splitpath_s(
|
||||
exe.c_str(),
|
||||
& drv.front(), drv.size(),
|
||||
& dir.front(), dir.size(),
|
||||
NULL, 0, // We need neither name
|
||||
NULL, 0 // nor extension
|
||||
);
|
||||
if ( rc == 0 ) {
|
||||
break;
|
||||
} else if ( rc == ERANGE ) {
|
||||
if ( count > 0 ) {
|
||||
-- count;
|
||||
// Buffer is too small, but it is not clear which one.
|
||||
// So we have to enlarge all.
|
||||
drv.resize( drv.size() * 2 );
|
||||
dir.resize( dir.size() * 2 );
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Splitting path `%s' to components failed: "
|
||||
"Buffers of %lu and %lu bytes are still too small\n",
|
||||
exe.c_str(),
|
||||
(unsigned long) drv.size(),
|
||||
(unsigned long) dir.size()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
} else {
|
||||
log_error(
|
||||
"ERROR: Getting executable path failed: "
|
||||
"Splitting path `%s' to components failed: %s\n",
|
||||
exe.c_str(),
|
||||
err_msg( rc ).c_str()
|
||||
);
|
||||
exit( 2 );
|
||||
}; // if
|
||||
}; // forever
|
||||
|
||||
#else // __MINGW32__
|
||||
|
||||
// MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
|
||||
_splitpath(
|
||||
exe.c_str(),
|
||||
& drv.front(),
|
||||
& dir.front(),
|
||||
NULL, // We need neither name
|
||||
NULL // nor extension
|
||||
);
|
||||
#endif // __MINGW32__
|
||||
|
||||
// Combining components back to path.
|
||||
// I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
|
||||
// ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
|
||||
// So let us try to guess the size of result and go with insecure `_makepath'.
|
||||
buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
|
||||
_makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
|
||||
|
||||
return & path.front();
|
||||
|
||||
} // exe_dir
|
||||
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
std::string
|
||||
err_msg(
|
||||
int err
|
||||
) {
|
||||
|
||||
return _err_msg( err, 0 );
|
||||
|
||||
} // err_msg
|
||||
|
||||
|
||||
// =================================================================================================
|
||||
// C interface.
|
||||
// =================================================================================================
|
||||
|
||||
|
||||
char *
|
||||
get_err_msg(
|
||||
int err
|
||||
) {
|
||||
char * msg = strdup( err_msg( err ).c_str() );
|
||||
CHECK_PTR( msg );
|
||||
return msg;
|
||||
} // get_err_msg
|
||||
|
||||
|
||||
char *
|
||||
get_dir_sep(
|
||||
) {
|
||||
char * sep = strdup( dir_sep().c_str() );
|
||||
CHECK_PTR( sep );
|
||||
return sep;
|
||||
} // get_dir_sep
|
||||
|
||||
|
||||
char *
|
||||
get_exe_path(
|
||||
) {
|
||||
char * path = strdup( exe_path().c_str() );
|
||||
CHECK_PTR( path );
|
||||
return path;
|
||||
} // get_exe_path
|
||||
|
||||
|
||||
char *
|
||||
get_exe_dir(
|
||||
) {
|
||||
char * dir = strdup( exe_dir().c_str() );
|
||||
CHECK_PTR( dir );
|
||||
return dir;
|
||||
} // get_exe_dir
|
||||
|
||||
|
||||
// end of file //
|
||||
53
test_common/harness/os_helpers.h
Normal file
53
test_common/harness/os_helpers.h
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __os_helpers_h__
|
||||
#define __os_helpers_h__
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// C++ interface.
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
#include <string>
|
||||
|
||||
std::string err_msg( int err );
|
||||
std::string dir_sep();
|
||||
std::string exe_path();
|
||||
std::string exe_dir();
|
||||
|
||||
#endif // __cplusplus
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// C interface.
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
char * get_err_msg( int err ); // Returns system error message. Subject to free.
|
||||
char * get_dir_sep(); // Returns dir separator. Subject to free.
|
||||
char * get_exe_path(); // Returns path of current executable. Subject to free.
|
||||
char * get_exe_dir(); // Returns dir of current executable. Subject to free.
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // __os_helpers_h__
|
||||
42
test_common/harness/parseParameters.cpp
Normal file
42
test_common/harness/parseParameters.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "parseParameters.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <string.h>
|
||||
|
||||
bool is_power_of_two(int number)
|
||||
{
|
||||
return number && !(number & (number - 1));
|
||||
}
|
||||
|
||||
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
|
||||
{
|
||||
const char *arg_temp = strchr(&arg[1], ']');
|
||||
if (arg_temp != 0)
|
||||
{
|
||||
int new_factor = atoi(&arg[1]);
|
||||
arg = arg_temp; // Advance until ']'
|
||||
if (is_power_of_two(new_factor))
|
||||
{
|
||||
log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
|
||||
wimpyReductionFactor = new_factor;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
|
||||
}
|
||||
}
|
||||
}
|
||||
24
test_common/harness/parseParameters.h
Normal file
24
test_common/harness/parseParameters.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _parseParameters_h
|
||||
#define _parseParameters_h
|
||||
|
||||
#include "compat.h"
|
||||
#include <string>
|
||||
|
||||
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
|
||||
|
||||
#endif // _parseParameters_h
|
||||
@@ -17,19 +17,19 @@
|
||||
#define _ref_counting_h
|
||||
|
||||
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type );
|
||||
cl_uint c##_refCount; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type );
|
||||
|
||||
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount_new; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type ); \
|
||||
if( c##_refCount != c##_refCount_new ) \
|
||||
{ \
|
||||
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
|
||||
return -1; \
|
||||
}
|
||||
cl_uint c##_refCount_new; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type ); \
|
||||
if( c##_refCount != c##_refCount_new ) \
|
||||
{ \
|
||||
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
|
||||
@@ -15,7 +15,69 @@
|
||||
//
|
||||
#include "rounding_mode.h"
|
||||
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
#if (defined( __arm__ ) || defined(__aarch64__))
|
||||
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
|
||||
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
|
||||
|
||||
#define _ARM_FE_FTZ 0x1000000
|
||||
#define _ARM_FE_NFTZ 0x0
|
||||
#if defined(__aarch64__)
|
||||
#define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
|
||||
#define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
|
||||
#else
|
||||
#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
|
||||
#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
|
||||
#define _ARM_FE_TONEAREST 0x0
|
||||
#define _ARM_FE_UPWARD 0x400000
|
||||
#define _ARM_FE_DOWNWARD 0x800000
|
||||
#define _ARM_FE_TOWARDZERO 0xc00000
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
|
||||
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
|
||||
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
||||
const int *p = int_rounds;
|
||||
if( outType == kfloat || outType == kdouble )
|
||||
p = flt_rounds;
|
||||
|
||||
int fpscr = 0;
|
||||
RoundingMode oldRound = get_round();
|
||||
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
|
||||
|
||||
return oldRound;
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
int fpscr;
|
||||
int oldRound;
|
||||
|
||||
_FPU_GETCW(fpscr);
|
||||
oldRound = (fpscr & FPSCR_ROUND_MASK);
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case _ARM_FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case _ARM_FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case _ARM_FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case _ARM_FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
}
|
||||
|
||||
return kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#elif !(defined(_WIN32) && defined(_MSC_VER))
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
@@ -135,8 +197,10 @@ void *FlushToZero( void )
|
||||
union{ int i; void *p; }u = { _mm_getcsr() };
|
||||
_mm_setcsr( u.i | 0x8040 );
|
||||
return u.p;
|
||||
#elif defined( __arm__ )
|
||||
// processor is already in FTZ mode -- do nothing
|
||||
#elif defined( __arm__ ) || defined(__aarch64__)
|
||||
int fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||
return NULL;
|
||||
#elif defined( __PPC__ )
|
||||
fpu_control_t flags = 0;
|
||||
@@ -159,8 +223,10 @@ void UnFlushToZero( void *p)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ void *p; int i; }u = { p };
|
||||
_mm_setcsr( u.i );
|
||||
#elif defined( __arm__ )
|
||||
// processor is already in FTZ mode -- do nothing
|
||||
#elif defined( __arm__ ) || defined(__aarch64__)
|
||||
int fpscr;
|
||||
_FPU_GETCW(fpscr);
|
||||
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||
#elif defined( __PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
|
||||
@@ -16,15 +16,11 @@
|
||||
#ifndef __ROUNDING_MODE_H__
|
||||
#define __ROUNDING_MODE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "compat.h"
|
||||
|
||||
#if (defined(_WIN32) && defined (_MSC_VER))
|
||||
// need for _controlfp_s and rouinding modes in RoundingMode
|
||||
#include <float.h>
|
||||
#include "errorHelpers.h"
|
||||
#include "testHarness.h"
|
||||
#else
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
|
||||
@@ -58,14 +58,14 @@ int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
|
||||
{
|
||||
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
|
||||
( imageSupportRequired ) ? verifyImageSupport : NULL );
|
||||
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
|
||||
( imageSupportRequired ) ? verifyImageSupport : NULL );
|
||||
}
|
||||
|
||||
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
|
||||
DeviceCheckFn deviceCheckFn )
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
|
||||
DeviceCheckFn deviceCheckFn )
|
||||
{
|
||||
test_start();
|
||||
|
||||
@@ -451,20 +451,20 @@ int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore the old FP mode before leaving.
|
||||
// Restore the old FP mode before leaving.
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
|
||||
@@ -29,21 +29,21 @@ extern cl_uint gRandomSeed;
|
||||
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
|
||||
// setup work, and then call each function in turn as dictatated by the passed arguments.
|
||||
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
|
||||
|
||||
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
|
||||
typedef int (*DeviceCheckFn)( cl_device_id device );
|
||||
|
||||
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
|
||||
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
|
||||
|
||||
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
|
||||
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
|
||||
|
||||
// Call this function if you need to do all the setup work yourself, and just need the function list called/
|
||||
// managed.
|
||||
@@ -57,15 +57,15 @@ extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device
|
||||
// deviceToUse, deviceGroupToUse and numElementsToUse are all just passed to each test function
|
||||
|
||||
extern int callTestFunctions( basefn functionList[], int numFunctions,
|
||||
const char *functionNames[],
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse,
|
||||
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps );
|
||||
const char *functionNames[],
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse,
|
||||
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps );
|
||||
|
||||
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
|
||||
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
||||
|
||||
///// Miscellaneous steps
|
||||
|
||||
@@ -87,8 +87,8 @@ extern cl_device_id GetOpposingDevice( cl_device_id device );
|
||||
|
||||
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
|
||||
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
|
||||
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
||||
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
|
||||
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
||||
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
|
||||
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
|
||||
@@ -13,16 +13,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "compat.h"
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
@@ -33,10 +27,10 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
basefn mFunction;
|
||||
cl_device_id mDevice;
|
||||
cl_context mContext;
|
||||
int mNumElements;
|
||||
basefn mFunction;
|
||||
cl_device_id mDevice;
|
||||
cl_context mContext;
|
||||
int mNumElements;
|
||||
} TestFnArgs;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -47,59 +41,59 @@ typedef struct
|
||||
|
||||
void *test_thread_wrapper( void *data )
|
||||
{
|
||||
TestFnArgs *args;
|
||||
int retVal;
|
||||
cl_context context;
|
||||
TestFnArgs *args;
|
||||
int retVal;
|
||||
cl_context context;
|
||||
|
||||
args = (TestFnArgs *)data;
|
||||
args = (TestFnArgs *)data;
|
||||
|
||||
/* Create a new context to use (contexts can't cross threads) */
|
||||
context = clCreateContext(NULL, args->mDeviceGroup);
|
||||
if( context == NULL )
|
||||
{
|
||||
log_error("clCreateContext failed for new thread\n");
|
||||
return (void *)(-1);
|
||||
}
|
||||
/* Create a new context to use (contexts can't cross threads) */
|
||||
context = clCreateContext(NULL, args->mDeviceGroup);
|
||||
if( context == NULL )
|
||||
{
|
||||
log_error("clCreateContext failed for new thread\n");
|
||||
return (void *)(-1);
|
||||
}
|
||||
|
||||
/* Call function */
|
||||
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
|
||||
/* Call function */
|
||||
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
|
||||
|
||||
clReleaseContext( context );
|
||||
clReleaseContext( context );
|
||||
|
||||
return (void *)retVal;
|
||||
return (void *)retVal;
|
||||
}
|
||||
|
||||
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int error;
|
||||
pthread_t threadHdl;
|
||||
void *retVal;
|
||||
TestFnArgs args;
|
||||
int error;
|
||||
pthread_t threadHdl;
|
||||
void *retVal;
|
||||
TestFnArgs args;
|
||||
|
||||
|
||||
args.mFunction = fnToTest;
|
||||
args.mDeviceGroup = deviceGroup;
|
||||
args.mDevice = device;
|
||||
args.mContext = context;
|
||||
args.mNumElements = numElements;
|
||||
args.mFunction = fnToTest;
|
||||
args.mDeviceGroup = deviceGroup;
|
||||
args.mDevice = device;
|
||||
args.mContext = context;
|
||||
args.mNumElements = numElements;
|
||||
|
||||
|
||||
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to create thread for testing!\n" );
|
||||
return -1;
|
||||
}
|
||||
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to create thread for testing!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Thread has been started, now just wait for it to complete (or crash) */
|
||||
error = pthread_join( threadHdl, &retVal );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to join testing thread!\n" );
|
||||
return -1;
|
||||
}
|
||||
/* Thread has been started, now just wait for it to complete (or crash) */
|
||||
error = pthread_join( threadHdl, &retVal );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to join testing thread!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int)((intptr_t)retVal);
|
||||
return (int)((intptr_t)retVal);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#define TEST_NOT_IMPLEMENTED -99
|
||||
#define TEST_NOT_IMPLEMENTED -99
|
||||
|
||||
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
|
||||
@@ -32,14 +32,14 @@
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
cl_int err = Create( context, mem_flags, fmt, width );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
|
||||
{
|
||||
cl_int error;
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
@@ -106,20 +106,20 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, con
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
|
||||
{
|
||||
cl_int error;
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
@@ -189,19 +189,19 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, con
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
return error;
|
||||
}
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
|
||||
{
|
||||
cl_int error;
|
||||
cl_int error;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
@@ -272,20 +272,20 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, con
|
||||
|
||||
#endif
|
||||
|
||||
return error;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
|
||||
{
|
||||
cl_int error;
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
@@ -423,7 +423,7 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageTyp
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
return error;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
@@ -433,21 +433,21 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageTyp
|
||||
*******/
|
||||
clProtectedArray::clProtectedArray()
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
}
|
||||
|
||||
clProtectedArray::clProtectedArray( size_t sizeInBytes )
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
Allocate( sizeInBytes );
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
Allocate( sizeInBytes );
|
||||
}
|
||||
|
||||
clProtectedArray::~clProtectedArray()
|
||||
{
|
||||
if( mBuffer != NULL ) {
|
||||
if( mBuffer != NULL ) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap( mBuffer, mRealSize );
|
||||
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
||||
int error = munmap( mBuffer, mRealSize );
|
||||
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
||||
#else
|
||||
free( mBuffer );
|
||||
#endif
|
||||
@@ -459,19 +459,19 @@ void clProtectedArray::Allocate( size_t sizeInBytes )
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
// Allocate enough space to: round up our actual allocation to an even number of pages
|
||||
// and allocate two pages on either side
|
||||
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
|
||||
mRealSize = mRoundedSize + kPageSize * 2;
|
||||
// Allocate enough space to: round up our actual allocation to an even number of pages
|
||||
// and allocate two pages on either side
|
||||
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
|
||||
mRealSize = mRoundedSize + kPageSize * 2;
|
||||
|
||||
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
|
||||
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
|
||||
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
mValidBuffer = mBuffer + kPageSize;
|
||||
mValidBuffer = mBuffer + kPageSize;
|
||||
|
||||
// Protect guard area from access
|
||||
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
|
||||
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
|
||||
// Protect guard area from access
|
||||
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
|
||||
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
|
||||
#else
|
||||
mRoundedSize = mRealSize = sizeInBytes;
|
||||
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
|
||||
|
||||
@@ -36,95 +36,95 @@ extern "C" cl_uint gRandomSeed;
|
||||
|
||||
class clContextWrapper
|
||||
{
|
||||
public:
|
||||
clContextWrapper() { mContext = NULL; }
|
||||
clContextWrapper( cl_context program ) { mContext = program; }
|
||||
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
|
||||
public:
|
||||
clContextWrapper() { mContext = NULL; }
|
||||
clContextWrapper( cl_context program ) { mContext = program; }
|
||||
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
|
||||
|
||||
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
|
||||
operator cl_context() { return mContext; }
|
||||
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
|
||||
operator cl_context() { return mContext; }
|
||||
|
||||
cl_context * operator&() { return &mContext; }
|
||||
cl_context * operator&() { return &mContext; }
|
||||
|
||||
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
|
||||
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_context mContext;
|
||||
cl_context mContext;
|
||||
};
|
||||
|
||||
/* cl_program wrapper */
|
||||
|
||||
class clProgramWrapper
|
||||
{
|
||||
public:
|
||||
clProgramWrapper() { mProgram = NULL; }
|
||||
clProgramWrapper( cl_program program ) { mProgram = program; }
|
||||
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
|
||||
public:
|
||||
clProgramWrapper() { mProgram = NULL; }
|
||||
clProgramWrapper( cl_program program ) { mProgram = program; }
|
||||
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
|
||||
|
||||
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
|
||||
operator cl_program() { return mProgram; }
|
||||
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
|
||||
operator cl_program() { return mProgram; }
|
||||
|
||||
cl_program * operator&() { return &mProgram; }
|
||||
cl_program * operator&() { return &mProgram; }
|
||||
|
||||
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
|
||||
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_program mProgram;
|
||||
cl_program mProgram;
|
||||
};
|
||||
|
||||
/* cl_kernel wrapper */
|
||||
|
||||
class clKernelWrapper
|
||||
{
|
||||
public:
|
||||
clKernelWrapper() { mKernel = NULL; }
|
||||
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
|
||||
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
|
||||
public:
|
||||
clKernelWrapper() { mKernel = NULL; }
|
||||
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
|
||||
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
|
||||
|
||||
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
|
||||
operator cl_kernel() { return mKernel; }
|
||||
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
|
||||
operator cl_kernel() { return mKernel; }
|
||||
|
||||
cl_kernel * operator&() { return &mKernel; }
|
||||
cl_kernel * operator&() { return &mKernel; }
|
||||
|
||||
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
|
||||
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_kernel mKernel;
|
||||
cl_kernel mKernel;
|
||||
};
|
||||
|
||||
/* cl_mem (stream) wrapper */
|
||||
|
||||
class clMemWrapper
|
||||
{
|
||||
public:
|
||||
clMemWrapper() { mMem = NULL; }
|
||||
clMemWrapper( cl_mem mem ) { mMem = mem; }
|
||||
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
|
||||
public:
|
||||
clMemWrapper() { mMem = NULL; }
|
||||
clMemWrapper( cl_mem mem ) { mMem = mem; }
|
||||
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
|
||||
|
||||
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_mem() { return mMem; }
|
||||
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_mem() { return mMem; }
|
||||
|
||||
cl_mem * operator&() { return &mMem; }
|
||||
cl_mem * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
|
||||
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_mem mMem;
|
||||
cl_mem mMem;
|
||||
};
|
||||
|
||||
class clProtectedImage
|
||||
{
|
||||
public:
|
||||
clProtectedImage() { image = NULL; backingStore = NULL; }
|
||||
public:
|
||||
clProtectedImage() { image = NULL; backingStore = NULL; }
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
|
||||
~clProtectedImage()
|
||||
~clProtectedImage()
|
||||
{
|
||||
if( image != NULL )
|
||||
clReleaseMemObject( image );
|
||||
@@ -136,102 +136,102 @@ class clProtectedImage
|
||||
}
|
||||
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
|
||||
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
|
||||
|
||||
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
|
||||
operator cl_mem() { return image; }
|
||||
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
|
||||
operator cl_mem() { return image; }
|
||||
|
||||
cl_mem * operator&() { return ℑ }
|
||||
cl_mem * operator&() { return ℑ }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return image == rhs; }
|
||||
bool operator==( const cl_mem &rhs ) { return image == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void *backingStore;
|
||||
size_t backingStoreSize;
|
||||
cl_mem image;
|
||||
cl_mem image;
|
||||
};
|
||||
|
||||
/* cl_command_queue wrapper */
|
||||
|
||||
class clCommandQueueWrapper
|
||||
{
|
||||
public:
|
||||
clCommandQueueWrapper() { mMem = NULL; }
|
||||
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
|
||||
public:
|
||||
clCommandQueueWrapper() { mMem = NULL; }
|
||||
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
|
||||
~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
|
||||
|
||||
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_command_queue() { return mMem; }
|
||||
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_command_queue() { return mMem; }
|
||||
|
||||
cl_command_queue * operator&() { return &mMem; }
|
||||
cl_command_queue * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
|
||||
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_command_queue mMem;
|
||||
cl_command_queue mMem;
|
||||
};
|
||||
|
||||
/* cl_sampler wrapper */
|
||||
class clSamplerWrapper
|
||||
{
|
||||
public:
|
||||
clSamplerWrapper() { mMem = NULL; }
|
||||
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
|
||||
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
|
||||
public:
|
||||
clSamplerWrapper() { mMem = NULL; }
|
||||
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
|
||||
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
|
||||
|
||||
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_sampler() { return mMem; }
|
||||
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_sampler() { return mMem; }
|
||||
|
||||
cl_sampler * operator&() { return &mMem; }
|
||||
cl_sampler * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
|
||||
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_sampler mMem;
|
||||
cl_sampler mMem;
|
||||
};
|
||||
|
||||
/* cl_event wrapper */
|
||||
class clEventWrapper
|
||||
{
|
||||
public:
|
||||
clEventWrapper() { mMem = NULL; }
|
||||
clEventWrapper( cl_event mem ) { mMem = mem; }
|
||||
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
|
||||
public:
|
||||
clEventWrapper() { mMem = NULL; }
|
||||
clEventWrapper( cl_event mem ) { mMem = mem; }
|
||||
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
|
||||
|
||||
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_event() { return mMem; }
|
||||
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_event() { return mMem; }
|
||||
|
||||
cl_event * operator&() { return &mMem; }
|
||||
cl_event * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
|
||||
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
cl_event mMem;
|
||||
cl_event mMem;
|
||||
};
|
||||
|
||||
/* Generic protected memory buffer, for verifying access within bounds */
|
||||
class clProtectedArray
|
||||
{
|
||||
public:
|
||||
clProtectedArray();
|
||||
clProtectedArray( size_t sizeInBytes );
|
||||
virtual ~clProtectedArray();
|
||||
public:
|
||||
clProtectedArray();
|
||||
clProtectedArray( size_t sizeInBytes );
|
||||
virtual ~clProtectedArray();
|
||||
|
||||
void Allocate( size_t sizeInBytes );
|
||||
void Allocate( size_t sizeInBytes );
|
||||
|
||||
operator void *() { return (void *)mValidBuffer; }
|
||||
operator const void *() const { return (const void *)mValidBuffer; }
|
||||
operator void *() { return (void *)mValidBuffer; }
|
||||
operator const void *() const { return (const void *)mValidBuffer; }
|
||||
|
||||
protected:
|
||||
protected:
|
||||
|
||||
char * mBuffer;
|
||||
char * mValidBuffer;
|
||||
size_t mRealSize, mRoundedSize;
|
||||
char * mBuffer;
|
||||
char * mValidBuffer;
|
||||
size_t mRealSize, mRoundedSize;
|
||||
};
|
||||
|
||||
class RandomSeed
|
||||
@@ -281,14 +281,14 @@ template <typename T> class BufferOwningPtr
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
|
||||
@@ -301,20 +301,20 @@ template <typename T> class BufferOwningPtr
|
||||
abort();
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
map = m;
|
||||
mapsize = mapsize_;
|
||||
allocsize = allocsize_;
|
||||
aligned = aligned_;
|
||||
aligned = aligned_;
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
|
||||
8
test_common/miniz/CMakeLists.txt
Normal file
8
test_common/miniz/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
set(TARGET_NAME miniz)
|
||||
|
||||
add_library(
|
||||
${TARGET_NAME}
|
||||
STATIC
|
||||
miniz.c
|
||||
miniz.h
|
||||
)
|
||||
4153
test_common/miniz/miniz.c
Normal file
4153
test_common/miniz/miniz.c
Normal file
File diff suppressed because it is too large
Load Diff
749
test_common/miniz/miniz.h
Normal file
749
test_common/miniz/miniz.h
Normal file
@@ -0,0 +1,749 @@
|
||||
#ifndef MINIZ_HEADER_INCLUDED
|
||||
#define MINIZ_HEADER_INCLUDED
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
|
||||
// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
|
||||
#define MINIZ_NO_TIME
|
||||
#endif
|
||||
|
||||
#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
|
||||
// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
|
||||
#define MINIZ_X86_OR_X64_CPU 1
|
||||
#endif
|
||||
|
||||
#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
|
||||
// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
|
||||
#define MINIZ_LITTLE_ENDIAN 1
|
||||
#endif
|
||||
|
||||
#if MINIZ_X86_OR_X64_CPU
|
||||
// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
|
||||
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
|
||||
// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
|
||||
#define MINIZ_HAS_64BIT_REGISTERS 1
|
||||
#endif
|
||||
|
||||
// Return status codes. MZ_PARAM_ERROR is non-standard.
|
||||
enum {
|
||||
MZ_OK = 0,
|
||||
MZ_STREAM_END = 1,
|
||||
MZ_NEED_DICT = 2,
|
||||
MZ_ERRNO = -1,
|
||||
MZ_STREAM_ERROR = -2,
|
||||
MZ_DATA_ERROR = -3,
|
||||
MZ_MEM_ERROR = -4,
|
||||
MZ_BUF_ERROR = -5,
|
||||
MZ_VERSION_ERROR = -6,
|
||||
MZ_PARAM_ERROR = -10000
|
||||
};
|
||||
|
||||
typedef unsigned long mz_ulong;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ------------------- zlib-style API Definitions.
|
||||
|
||||
// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
|
||||
void mz_free(void *p);
|
||||
|
||||
#define MZ_ADLER32_INIT (1)
|
||||
// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
|
||||
mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
|
||||
|
||||
#define MZ_CRC32_INIT (0)
|
||||
// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
|
||||
mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
|
||||
|
||||
// Compression strategies.
|
||||
enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
|
||||
|
||||
// Method
|
||||
#define MZ_DEFLATED 8
|
||||
|
||||
#ifndef MINIZ_NO_ZLIB_APIS
|
||||
|
||||
// Heap allocation callbacks.
|
||||
// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
|
||||
typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
|
||||
typedef void (*mz_free_func)(void *opaque, void *address);
|
||||
typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
|
||||
|
||||
#define MZ_VERSION "9.1.15"
|
||||
#define MZ_VERNUM 0x91F0
|
||||
#define MZ_VER_MAJOR 9
|
||||
#define MZ_VER_MINOR 1
|
||||
#define MZ_VER_REVISION 15
|
||||
#define MZ_VER_SUBREVISION 0
|
||||
|
||||
// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
|
||||
enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
|
||||
|
||||
// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
|
||||
enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
|
||||
|
||||
// Window bits
|
||||
#define MZ_DEFAULT_WINDOW_BITS 15
|
||||
|
||||
struct mz_internal_state;
|
||||
|
||||
// Compression/decompression stream struct.
|
||||
typedef struct mz_stream_s
|
||||
{
|
||||
const unsigned char *next_in; // pointer to next byte to read
|
||||
unsigned int avail_in; // number of bytes available at next_in
|
||||
mz_ulong total_in; // total number of bytes consumed so far
|
||||
|
||||
unsigned char *next_out; // pointer to next byte to write
|
||||
unsigned int avail_out; // number of bytes that can be written to next_out
|
||||
mz_ulong total_out; // total number of bytes produced so far
|
||||
|
||||
char *msg; // error msg (unused)
|
||||
struct mz_internal_state *state; // internal state, allocated by zalloc/zfree
|
||||
|
||||
mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc)
|
||||
mz_free_func zfree; // optional heap free function (defaults to free)
|
||||
void *opaque; // heap alloc function user pointer
|
||||
|
||||
int data_type; // data_type (unused)
|
||||
mz_ulong adler; // adler32 of the source or uncompressed data
|
||||
mz_ulong reserved; // not used
|
||||
} mz_stream;
|
||||
|
||||
typedef mz_stream *mz_streamp;
|
||||
|
||||
// Returns the version string of miniz.c.
|
||||
const char *mz_version(void);
|
||||
|
||||
// mz_deflateInit() initializes a compressor with default options:
|
||||
// Parameters:
|
||||
// pStream must point to an initialized mz_stream struct.
|
||||
// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
|
||||
// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
|
||||
// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
|
||||
// Return values:
|
||||
// MZ_OK on success.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
// MZ_PARAM_ERROR if the input parameters are bogus.
|
||||
// MZ_MEM_ERROR on out of memory.
|
||||
int mz_deflateInit(mz_streamp pStream, int level);
|
||||
|
||||
// mz_deflateInit2() is like mz_deflate(), except with more control:
|
||||
// Additional parameters:
|
||||
// method must be MZ_DEFLATED
|
||||
// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
|
||||
// mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
|
||||
int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
|
||||
|
||||
// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
|
||||
int mz_deflateReset(mz_streamp pStream);
|
||||
|
||||
// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
|
||||
// Parameters:
|
||||
// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
|
||||
// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
|
||||
// Return values:
|
||||
// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
|
||||
// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
// MZ_PARAM_ERROR if one of the parameters is invalid.
|
||||
// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
|
||||
int mz_deflate(mz_streamp pStream, int flush);
|
||||
|
||||
// mz_deflateEnd() deinitializes a compressor:
|
||||
// Return values:
|
||||
// MZ_OK on success.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
int mz_deflateEnd(mz_streamp pStream);
|
||||
|
||||
// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
|
||||
mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
|
||||
|
||||
// Single-call compression functions mz_compress() and mz_compress2():
|
||||
// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
|
||||
int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
|
||||
int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
|
||||
|
||||
// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
|
||||
mz_ulong mz_compressBound(mz_ulong source_len);
|
||||
|
||||
// Initializes a decompressor.
|
||||
int mz_inflateInit(mz_streamp pStream);
|
||||
|
||||
// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
|
||||
// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
|
||||
int mz_inflateInit2(mz_streamp pStream, int window_bits);
|
||||
|
||||
// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
|
||||
// Parameters:
|
||||
// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
|
||||
// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
|
||||
// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
|
||||
// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
|
||||
// Return values:
|
||||
// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
|
||||
// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
|
||||
// MZ_STREAM_ERROR if the stream is bogus.
|
||||
// MZ_DATA_ERROR if the deflate stream is invalid.
|
||||
// MZ_PARAM_ERROR if one of the parameters is invalid.
|
||||
// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
|
||||
// with more input data, or with more room in the output buffer (except when using single call decompression, described above).
|
||||
int mz_inflate(mz_streamp pStream, int flush);
|
||||
|
||||
// Deinitializes a decompressor.
|
||||
int mz_inflateEnd(mz_streamp pStream);
|
||||
|
||||
// Single-call decompression.
|
||||
// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
|
||||
int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
|
||||
|
||||
// Returns a string description of the specified error code, or NULL if the error code is invalid.
|
||||
const char *mz_error(int err);
|
||||
|
||||
// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
|
||||
// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
|
||||
#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
|
||||
typedef unsigned char Byte;
|
||||
typedef unsigned int uInt;
|
||||
typedef mz_ulong uLong;
|
||||
typedef Byte Bytef;
|
||||
typedef uInt uIntf;
|
||||
typedef char charf;
|
||||
typedef int intf;
|
||||
typedef void *voidpf;
|
||||
typedef uLong uLongf;
|
||||
typedef void *voidp;
|
||||
typedef void *const voidpc;
|
||||
#define Z_NULL 0
|
||||
#define Z_NO_FLUSH MZ_NO_FLUSH
|
||||
#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
|
||||
#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
|
||||
#define Z_FULL_FLUSH MZ_FULL_FLUSH
|
||||
#define Z_FINISH MZ_FINISH
|
||||
#define Z_BLOCK MZ_BLOCK
|
||||
#define Z_OK MZ_OK
|
||||
#define Z_STREAM_END MZ_STREAM_END
|
||||
#define Z_NEED_DICT MZ_NEED_DICT
|
||||
#define Z_ERRNO MZ_ERRNO
|
||||
#define Z_STREAM_ERROR MZ_STREAM_ERROR
|
||||
#define Z_DATA_ERROR MZ_DATA_ERROR
|
||||
#define Z_MEM_ERROR MZ_MEM_ERROR
|
||||
#define Z_BUF_ERROR MZ_BUF_ERROR
|
||||
#define Z_VERSION_ERROR MZ_VERSION_ERROR
|
||||
#define Z_PARAM_ERROR MZ_PARAM_ERROR
|
||||
#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
|
||||
#define Z_BEST_SPEED MZ_BEST_SPEED
|
||||
#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
|
||||
#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
|
||||
#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
|
||||
#define Z_FILTERED MZ_FILTERED
|
||||
#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
|
||||
#define Z_RLE MZ_RLE
|
||||
#define Z_FIXED MZ_FIXED
|
||||
#define Z_DEFLATED MZ_DEFLATED
|
||||
#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
|
||||
#define alloc_func mz_alloc_func
|
||||
#define free_func mz_free_func
|
||||
#define internal_state mz_internal_state
|
||||
#define z_stream mz_stream
|
||||
#define deflateInit mz_deflateInit
|
||||
#define deflateInit2 mz_deflateInit2
|
||||
#define deflateReset mz_deflateReset
|
||||
#define deflate mz_deflate
|
||||
#define deflateEnd mz_deflateEnd
|
||||
#define deflateBound mz_deflateBound
|
||||
#define compress mz_compress
|
||||
#define compress2 mz_compress2
|
||||
#define compressBound mz_compressBound
|
||||
#define inflateInit mz_inflateInit
|
||||
#define inflateInit2 mz_inflateInit2
|
||||
#define inflate mz_inflate
|
||||
#define inflateEnd mz_inflateEnd
|
||||
#define uncompress mz_uncompress
|
||||
#define crc32 mz_crc32
|
||||
#define adler32 mz_adler32
|
||||
#define MAX_WBITS 15
|
||||
#define MAX_MEM_LEVEL 9
|
||||
#define zError mz_error
|
||||
#define ZLIB_VERSION MZ_VERSION
|
||||
#define ZLIB_VERNUM MZ_VERNUM
|
||||
#define ZLIB_VER_MAJOR MZ_VER_MAJOR
|
||||
#define ZLIB_VER_MINOR MZ_VER_MINOR
|
||||
#define ZLIB_VER_REVISION MZ_VER_REVISION
|
||||
#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
|
||||
#define zlibVersion mz_version
|
||||
#define zlib_version mz_version()
|
||||
#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
|
||||
|
||||
#endif // MINIZ_NO_ZLIB_APIS
|
||||
|
||||
// ------------------- Types and macros
|
||||
|
||||
typedef unsigned char mz_uint8;
|
||||
typedef signed short mz_int16;
|
||||
typedef unsigned short mz_uint16;
|
||||
typedef unsigned int mz_uint32;
|
||||
typedef unsigned int mz_uint;
|
||||
typedef long long mz_int64;
|
||||
typedef unsigned long long mz_uint64;
|
||||
typedef int mz_bool;
|
||||
|
||||
#define MZ_FALSE (0)
|
||||
#define MZ_TRUE (1)
|
||||
|
||||
// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
|
||||
#ifdef _MSC_VER
|
||||
#define MZ_MACRO_END while (0, 0)
|
||||
#else
|
||||
#define MZ_MACRO_END while (0)
|
||||
#endif
|
||||
|
||||
// ------------------- ZIP archive reading/writing
|
||||
|
||||
#ifndef MINIZ_NO_ARCHIVE_APIS
|
||||
|
||||
enum
|
||||
{
|
||||
MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024,
|
||||
MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
|
||||
MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
mz_uint32 m_file_index;
|
||||
mz_uint32 m_central_dir_ofs;
|
||||
mz_uint16 m_version_made_by;
|
||||
mz_uint16 m_version_needed;
|
||||
mz_uint16 m_bit_flag;
|
||||
mz_uint16 m_method;
|
||||
#ifndef MINIZ_NO_TIME
|
||||
time_t m_time;
|
||||
#endif
|
||||
mz_uint32 m_crc32;
|
||||
mz_uint64 m_comp_size;
|
||||
mz_uint64 m_uncomp_size;
|
||||
mz_uint16 m_internal_attr;
|
||||
mz_uint32 m_external_attr;
|
||||
mz_uint64 m_local_header_ofs;
|
||||
mz_uint32 m_comment_size;
|
||||
char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
|
||||
char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
|
||||
} mz_zip_archive_file_stat;
|
||||
|
||||
typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
|
||||
typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
|
||||
|
||||
struct mz_zip_internal_state_tag;
|
||||
typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
MZ_ZIP_MODE_INVALID = 0,
|
||||
MZ_ZIP_MODE_READING = 1,
|
||||
MZ_ZIP_MODE_WRITING = 2,
|
||||
MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
|
||||
} mz_zip_mode;
|
||||
|
||||
typedef struct mz_zip_archive_tag
|
||||
{
|
||||
mz_uint64 m_archive_size;
|
||||
mz_uint64 m_central_directory_file_ofs;
|
||||
mz_uint m_total_files;
|
||||
mz_zip_mode m_zip_mode;
|
||||
|
||||
mz_uint m_file_offset_alignment;
|
||||
|
||||
mz_alloc_func m_pAlloc;
|
||||
mz_free_func m_pFree;
|
||||
mz_realloc_func m_pRealloc;
|
||||
void *m_pAlloc_opaque;
|
||||
|
||||
mz_file_read_func m_pRead;
|
||||
mz_file_write_func m_pWrite;
|
||||
void *m_pIO_opaque;
|
||||
|
||||
mz_zip_internal_state *m_pState;
|
||||
|
||||
} mz_zip_archive;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
|
||||
MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
|
||||
MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
|
||||
MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
|
||||
} mz_zip_flags;
|
||||
|
||||
// ZIP archive reading
|
||||
|
||||
// Inits a ZIP archive reader.
|
||||
// These functions read and validate the archive's central directory.
|
||||
mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags);
|
||||
mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
|
||||
#endif
|
||||
|
||||
// Returns the total number of files in the archive.
|
||||
mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
|
||||
|
||||
// Returns detailed information about an archive file entry.
|
||||
mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
|
||||
|
||||
// Determines if an archive file entry is a directory entry.
|
||||
mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
|
||||
mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
|
||||
|
||||
// Retrieves the filename of an archive file entry.
|
||||
// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
|
||||
mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
|
||||
|
||||
// Attempts to locates a file in the archive's central directory.
|
||||
// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
|
||||
// Returns -1 if the file cannot be found.
|
||||
int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
|
||||
|
||||
// Extracts a archive file to a memory buffer using no memory allocation.
|
||||
mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
|
||||
mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
|
||||
|
||||
// Extracts a archive file to a memory buffer.
|
||||
mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
|
||||
mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
|
||||
|
||||
// Extracts a archive file to a dynamically allocated heap buffer.
|
||||
void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
|
||||
void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
|
||||
|
||||
// Extracts a archive file using a callback function to output the file's data.
|
||||
mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
|
||||
mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
// Extracts a archive file to a disk file and sets its last accessed and modified times.
|
||||
// This function only extracts files, not archive directory records.
|
||||
mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
|
||||
mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
|
||||
#endif
|
||||
|
||||
// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
|
||||
mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
|
||||
|
||||
// ZIP archive writing
|
||||
|
||||
#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
|
||||
|
||||
// Inits a ZIP archive writer.
|
||||
mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
|
||||
mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
|
||||
#endif
|
||||
|
||||
// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
|
||||
// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
|
||||
// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
|
||||
// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
|
||||
// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
|
||||
// the archive is finalized the file's central directory will be hosed.
|
||||
mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
|
||||
|
||||
// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
|
||||
// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
|
||||
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||
mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
|
||||
mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
|
||||
|
||||
#ifndef MINIZ_NO_STDIO
|
||||
// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
|
||||
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||
mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
|
||||
#endif
|
||||
|
||||
// Adds a file to an archive by fully cloning the data from another archive.
|
||||
// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
|
||||
mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index);
|
||||
|
||||
// Finalizes the archive by writing the central directory records followed by the end of central directory record.
|
||||
// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
|
||||
// An archive must be manually finalized by calling this function for it to be valid.
|
||||
mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
|
||||
mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize);
|
||||
|
||||
// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
|
||||
// Note for the archive to be valid, it must have been finalized before ending.
|
||||
mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
|
||||
|
||||
// Misc. high-level helper functions:
|
||||
|
||||
// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
|
||||
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||
mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
|
||||
|
||||
// Reads a single file from an archive into a heap block.
|
||||
// Returns NULL on failure.
|
||||
void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags);
|
||||
|
||||
#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
|
||||
|
||||
#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
|
||||
|
||||
// ------------------- Low-level Decompression API Definitions
|
||||
|
||||
// Decompression flags used by tinfl_decompress().
|
||||
// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
|
||||
// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
|
||||
// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
|
||||
// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
|
||||
enum
|
||||
{
|
||||
TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
|
||||
TINFL_FLAG_HAS_MORE_INPUT = 2,
|
||||
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
|
||||
TINFL_FLAG_COMPUTE_ADLER32 = 8
|
||||
};
|
||||
|
||||
// High level decompression functions:
|
||||
// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
|
||||
// On entry:
|
||||
// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
|
||||
// On return:
|
||||
// Function returns a pointer to the decompressed data, or NULL on failure.
|
||||
// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
|
||||
// The caller must call mz_free() on the returned block when it's no longer needed.
|
||||
void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
|
||||
|
||||
// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
|
||||
// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
|
||||
#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
|
||||
size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
|
||||
|
||||
// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
|
||||
// Returns 1 on success or 0 on failure.
|
||||
typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
|
||||
int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||
|
||||
struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
|
||||
|
||||
// Max size of LZ dictionary.
|
||||
#define TINFL_LZ_DICT_SIZE 32768
|
||||
|
||||
// Return status.
|
||||
typedef enum
|
||||
{
|
||||
TINFL_STATUS_BAD_PARAM = -3,
|
||||
TINFL_STATUS_ADLER32_MISMATCH = -2,
|
||||
TINFL_STATUS_FAILED = -1,
|
||||
TINFL_STATUS_DONE = 0,
|
||||
TINFL_STATUS_NEEDS_MORE_INPUT = 1,
|
||||
TINFL_STATUS_HAS_MORE_OUTPUT = 2
|
||||
} tinfl_status;
|
||||
|
||||
// Initializes the decompressor to its initial state.
|
||||
#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
|
||||
#define tinfl_get_adler32(r) (r)->m_check_adler32
|
||||
|
||||
// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
|
||||
// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
|
||||
tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
|
||||
|
||||
// Internal/private bits follow.
|
||||
enum
|
||||
{
|
||||
TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
|
||||
TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
|
||||
mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
|
||||
} tinfl_huff_table;
|
||||
|
||||
#if MINIZ_HAS_64BIT_REGISTERS
|
||||
#define TINFL_USE_64BIT_BITBUF 1
|
||||
#endif
|
||||
|
||||
#if TINFL_USE_64BIT_BITBUF
|
||||
typedef mz_uint64 tinfl_bit_buf_t;
|
||||
#define TINFL_BITBUF_SIZE (64)
|
||||
#else
|
||||
typedef mz_uint32 tinfl_bit_buf_t;
|
||||
#define TINFL_BITBUF_SIZE (32)
|
||||
#endif
|
||||
|
||||
struct tinfl_decompressor_tag
|
||||
{
|
||||
mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
|
||||
tinfl_bit_buf_t m_bit_buf;
|
||||
size_t m_dist_from_out_buf_start;
|
||||
tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
|
||||
mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
|
||||
};
|
||||
|
||||
// ------------------- Low-level Compression API Definitions
|
||||
|
||||
// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
|
||||
#define TDEFL_LESS_MEMORY 0
|
||||
|
||||
// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
|
||||
// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
|
||||
enum
|
||||
{
|
||||
TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
|
||||
};
|
||||
|
||||
// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
|
||||
// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
|
||||
// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
|
||||
// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
|
||||
// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
|
||||
// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
|
||||
// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
|
||||
// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
|
||||
// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
|
||||
enum
|
||||
{
|
||||
TDEFL_WRITE_ZLIB_HEADER = 0x01000,
|
||||
TDEFL_COMPUTE_ADLER32 = 0x02000,
|
||||
TDEFL_GREEDY_PARSING_FLAG = 0x04000,
|
||||
TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
|
||||
TDEFL_RLE_MATCHES = 0x10000,
|
||||
TDEFL_FILTER_MATCHES = 0x20000,
|
||||
TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
|
||||
TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
|
||||
};
|
||||
|
||||
// High level compression functions:
|
||||
// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
|
||||
// On entry:
|
||||
// pSrc_buf, src_buf_len: Pointer and size of source block to compress.
|
||||
// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
|
||||
// On return:
|
||||
// Function returns a pointer to the compressed data, or NULL on failure.
|
||||
// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
|
||||
// The caller must free() the returned block when it's no longer needed.
|
||||
void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
|
||||
|
||||
// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
|
||||
// Returns 0 on failure.
|
||||
size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
|
||||
|
||||
// Compresses an image to a compressed PNG file in memory.
|
||||
// On entry:
|
||||
// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4.
|
||||
// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
|
||||
// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
|
||||
// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
|
||||
// On return:
|
||||
// Function returns a pointer to the compressed data, or NULL on failure.
|
||||
// *pLen_out will be set to the size of the PNG image file.
|
||||
// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
|
||||
void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
|
||||
void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
|
||||
|
||||
// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
|
||||
typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
|
||||
|
||||
// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
|
||||
mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||
|
||||
enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
|
||||
|
||||
// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
|
||||
#if TDEFL_LESS_MEMORY
|
||||
enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
|
||||
#else
|
||||
enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
|
||||
#endif
|
||||
|
||||
// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
|
||||
typedef enum
|
||||
{
|
||||
TDEFL_STATUS_BAD_PARAM = -2,
|
||||
TDEFL_STATUS_PUT_BUF_FAILED = -1,
|
||||
TDEFL_STATUS_OKAY = 0,
|
||||
TDEFL_STATUS_DONE = 1,
|
||||
} tdefl_status;
|
||||
|
||||
// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
|
||||
typedef enum
|
||||
{
|
||||
TDEFL_NO_FLUSH = 0,
|
||||
TDEFL_SYNC_FLUSH = 2,
|
||||
TDEFL_FULL_FLUSH = 3,
|
||||
TDEFL_FINISH = 4
|
||||
} tdefl_flush;
|
||||
|
||||
// tdefl's compression state structure.
|
||||
typedef struct
|
||||
{
|
||||
tdefl_put_buf_func_ptr m_pPut_buf_func;
|
||||
void *m_pPut_buf_user;
|
||||
mz_uint m_flags, m_max_probes[2];
|
||||
int m_greedy_parsing;
|
||||
mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
|
||||
mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
|
||||
mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
|
||||
mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
|
||||
tdefl_status m_prev_return_status;
|
||||
const void *m_pIn_buf;
|
||||
void *m_pOut_buf;
|
||||
size_t *m_pIn_buf_size, *m_pOut_buf_size;
|
||||
tdefl_flush m_flush;
|
||||
const mz_uint8 *m_pSrc;
|
||||
size_t m_src_buf_left, m_out_buf_ofs;
|
||||
mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
|
||||
mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||
mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||
mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||
mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
|
||||
mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
|
||||
mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
|
||||
mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
|
||||
} tdefl_compressor;
|
||||
|
||||
// Initializes the compressor.
|
||||
// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
|
||||
// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
|
||||
// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
|
||||
// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
|
||||
tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||
|
||||
// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
|
||||
tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
|
||||
|
||||
// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
|
||||
// tdefl_compress_buffer() always consumes the entire input buffer.
|
||||
tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
|
||||
|
||||
tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
|
||||
mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
|
||||
|
||||
// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
|
||||
#ifndef MINIZ_NO_ZLIB_APIS
|
||||
// Create tdefl_compress() flags given zlib-style compression parameters.
|
||||
// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
|
||||
// window_bits may be -15 (raw deflate) or 15 (zlib)
|
||||
// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
|
||||
mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
|
||||
#endif // #ifndef MINIZ_NO_ZLIB_APIS
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // MINIZ_HEADER_INCLUDED
|
||||
@@ -65,3 +65,6 @@ add_subdirectory(select)
|
||||
add_subdirectory(thread_dimensions)
|
||||
add_subdirectory(vec_align)
|
||||
add_subdirectory(vec_step)
|
||||
|
||||
# Add any extension folders
|
||||
add_subdirectory(spir)
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
int fill_buffer_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t size, MTdata d, cl_bool blocking_write) {
|
||||
size_t i, j;
|
||||
size_t i, j;
|
||||
cl_uint *data;
|
||||
int error, result;
|
||||
cl_uint checksum_delta = 0;
|
||||
@@ -157,7 +157,7 @@ int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_
|
||||
size_t image_lines_to_use;
|
||||
image_lines_to_use = IMAGE_LINES;
|
||||
if (image_lines_to_use > height)
|
||||
image_lines_to_use = height;
|
||||
image_lines_to_use = height;
|
||||
|
||||
data = (cl_uint*)malloc(width*4*sizeof(cl_uint)*IMAGE_LINES);
|
||||
if (data == NULL) {
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#include "allocation_fill.h"
|
||||
|
||||
|
||||
static cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
static cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
|
||||
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
int error;
|
||||
@@ -27,7 +27,7 @@ int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id de
|
||||
}
|
||||
|
||||
|
||||
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height) {
|
||||
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
|
||||
size_t max_width, max_height, num_pixels, found_width, found_height;
|
||||
int error;
|
||||
|
||||
@@ -48,34 +48,44 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t
|
||||
|
||||
num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
|
||||
|
||||
if (num_pixels > (max_width*max_height))
|
||||
if (num_pixels > (max_width*max_height)) {
|
||||
if(NULL != max_size) {
|
||||
*max_size = max_width * max_height * sizeof(cl_uint) * 4;
|
||||
}
|
||||
return FAILED_TOO_BIG;
|
||||
}
|
||||
|
||||
// We want a close-to-square aspect ratio.
|
||||
// Note that this implicitly assumes that max width >= max height
|
||||
found_width = (int)sqrt( (double) num_pixels );
|
||||
if (found_width == 0)
|
||||
found_width = 1;
|
||||
if( found_width > max_width ) {
|
||||
found_width = max_width;
|
||||
}
|
||||
if (found_width == 0)
|
||||
found_width = 1;
|
||||
|
||||
found_height = (size_t)num_pixels/found_width;
|
||||
if (found_height > max_height) {
|
||||
found_height = max_height;
|
||||
}
|
||||
if (found_height == 0)
|
||||
found_height = 1;
|
||||
|
||||
*width = found_width;
|
||||
*height = found_height;
|
||||
|
||||
if(NULL != max_size) {
|
||||
*max_size = found_width * found_height * sizeof(cl_uint) * 4;
|
||||
}
|
||||
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
size_t width, height;
|
||||
int error;
|
||||
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height);
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
|
||||
if (error != SUCCEEDED)
|
||||
return error;
|
||||
|
||||
@@ -91,7 +101,7 @@ int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_devic
|
||||
size_t width, height;
|
||||
int error;
|
||||
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height);
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
|
||||
if (error != SUCCEEDED)
|
||||
return error;
|
||||
|
||||
@@ -109,19 +119,18 @@ int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id devi
|
||||
if (type == BUFFER_NON_BLOCKING) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, false);
|
||||
if (type == IMAGE_READ_NON_BLOCKING) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, false);
|
||||
if (type == IMAGE_WRITE_NON_BLOCKING) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, false);
|
||||
log_error("Invalid allocation type: %d\n", type);
|
||||
log_error("Invalid allocation type: %d\n", type);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
|
||||
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
|
||||
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
|
||||
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
|
||||
|
||||
cl_ulong max_individual_allocation_size, global_mem_size;
|
||||
cl_ulong max_individual_allocation_size, global_mem_size;
|
||||
int error, result;
|
||||
size_t amount_allocated;
|
||||
size_t reduction_amount;
|
||||
size_t min_allocation_allowed;
|
||||
int current_allocation;
|
||||
size_t allocation_this_time, actual_allocation;
|
||||
|
||||
@@ -129,13 +138,17 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
||||
*number_of_mems = 0;
|
||||
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||
test_error_abort(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
test_error_abort(error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
|
||||
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||
// max_individual_allocation_size, toMB(max_individual_allocation_size),
|
||||
// global_mem_size, toMB(global_mem_size));
|
||||
if (global_mem_size > (cl_ulong)SIZE_MAX) {
|
||||
global_mem_size = (cl_ulong)SIZE_MAX;
|
||||
}
|
||||
|
||||
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||
// max_individual_allocation_size, toMB(max_individual_allocation_size),
|
||||
// global_mem_size, toMB(global_mem_size));
|
||||
|
||||
if (size_to_allocate > global_mem_size) {
|
||||
log_error("Can not allocate more than the global memory size.\n");
|
||||
@@ -144,18 +157,32 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
||||
|
||||
amount_allocated = 0;
|
||||
current_allocation = 0;
|
||||
reduction_amount = (size_t)max_individual_allocation_size/16;
|
||||
min_allocation_allowed = (size_t)max_individual_allocation_size/4;
|
||||
if (min_allocation_allowed > size_to_allocate)
|
||||
min_allocation_allowed = size_to_allocate/4;
|
||||
|
||||
// If allocating for images, reduce the maximum allocation size to the maximum image size.
|
||||
// If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
|
||||
// than the maximum image size on systems with 16GB or RAM or more. In this case, we
|
||||
// succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
|
||||
// (min_allocation_allowed) and thus we fail the allocation below.
|
||||
if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t max_size;
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
|
||||
if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
|
||||
return error;
|
||||
if (max_size < max_individual_allocation_size)
|
||||
max_individual_allocation_size = max_size;
|
||||
}
|
||||
|
||||
reduction_amount = (size_t)max_individual_allocation_size / 16;
|
||||
|
||||
if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
|
||||
else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||
else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||
|
||||
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
|
||||
// toMB(reduction_amount), toMB(min_allocation_allowed));
|
||||
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
|
||||
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
|
||||
// toMB(reduction_amount), toMB(min_allocation_allowed));
|
||||
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
|
||||
|
||||
// If we are only doing a single allocation, only allow 1
|
||||
int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
|
||||
@@ -168,35 +195,43 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
||||
|
||||
cl_uint max_image_args;
|
||||
error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
|
||||
|
||||
if ((int)max_image_args < max_to_allocate) {
|
||||
log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
|
||||
max_to_allocate = max_image_args;
|
||||
log_info("\t\tMaximum number of images per kernel limited to %d\n", (int)max_image_args);
|
||||
max_to_allocate = max_image_args;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to allocate the requested amount.
|
||||
while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
|
||||
allocation_this_time = size_to_allocate - amount_allocated;
|
||||
if (allocation_this_time > max_individual_allocation_size)
|
||||
allocation_this_time = (size_t)max_individual_allocation_size;
|
||||
|
||||
// Try to allocate a chunk of memory
|
||||
// Determine how much more is needed
|
||||
allocation_this_time = size_to_allocate - amount_allocated;
|
||||
|
||||
// Bound by the individual allocation size
|
||||
if (allocation_this_time > max_individual_allocation_size)
|
||||
allocation_this_time = (size_t)max_individual_allocation_size;
|
||||
|
||||
// Allocate the largest object possible
|
||||
result = FAILED_TOO_BIG;
|
||||
//log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
|
||||
while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
|
||||
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
|
||||
|
||||
// Create the object
|
||||
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
|
||||
if (result == SUCCEEDED) {
|
||||
// Allocation succeeded, another memory object was added to the array
|
||||
*number_of_mems = (current_allocation+1);
|
||||
// Verify the size is correct to within 1MB.
|
||||
*number_of_mems = (current_allocation + 1);
|
||||
|
||||
// Verify the size is correct to within 1MB.
|
||||
actual_allocation = get_actual_allocation_size(mems[current_allocation]);
|
||||
if (fabs((double)(allocation_this_time - actual_allocation)) > 1024.0*1024.0) {
|
||||
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
|
||||
if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
|
||||
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB(actual_allocation));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
// If we are filling the allocation for verification do so
|
||||
if (force_fill) {
|
||||
//log_info("\t\t\tWriting random values to object and calculating checksum.\n");
|
||||
@@ -207,10 +242,15 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
||||
result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
|
||||
}
|
||||
}
|
||||
|
||||
// If creation failed, try to create a smaller object
|
||||
if (result == FAILED_TOO_BIG) {
|
||||
//log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
|
||||
if (allocation_this_time > reduction_amount)
|
||||
allocation_this_time -= reduction_amount;
|
||||
allocation_this_time -= reduction_amount;
|
||||
else if (reduction_amount > 1) {
|
||||
reduction_amount /= 2;
|
||||
}
|
||||
else {
|
||||
allocation_this_time = 0;
|
||||
}
|
||||
@@ -223,8 +263,8 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
if (allocation_this_time < min_allocation_allowed && allocation_this_time < (size_to_allocate-amount_allocated)) {
|
||||
log_info("\t\tFailed to allocate an individual allocation of more than %gMB.\n", toMB(min_allocation_allowed));
|
||||
if (!allocation_this_time) {
|
||||
log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
|
||||
return FAILED_TOO_BIG;
|
||||
}
|
||||
|
||||
|
||||
@@ -19,7 +19,7 @@ cl_command_queue reset_queue(cl_context context, cl_device_id device_id, cl_comm
|
||||
{
|
||||
log_info("Invalid command queue. Releasing and recreating the command queue.\n");
|
||||
clReleaseCommandQueue(*queue);
|
||||
*queue = clCreateCommandQueue(context, device_id, 0, error);
|
||||
*queue = clCreateCommandQueue(context, device_id, 0, error);
|
||||
return *queue;
|
||||
}
|
||||
|
||||
@@ -55,7 +55,7 @@ size_t get_actual_allocation_size(cl_mem mem) {
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ int main(int argc, const char *argv[])
|
||||
int count;
|
||||
cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
|
||||
cl_ulong max_individual_allocation_size, global_mem_size;
|
||||
char str[ 128 ], *endPtr;
|
||||
char str[ 128 ], *endPtr;
|
||||
int r;
|
||||
int number_of_mems_used;
|
||||
int failure_counts = 0;
|
||||
@@ -168,7 +168,7 @@ int main(int argc, const char *argv[])
|
||||
g_tests_to_run |= IMAGE_WRITE_NON_BLOCKING;
|
||||
}
|
||||
if( g_tests_to_run == 0 )
|
||||
break; // Argument is invalid; break to print usage
|
||||
break; // Argument is invalid; break to print usage
|
||||
}
|
||||
|
||||
else if( strcmp( str, "do_not_force_fill" ) == 0 )
|
||||
@@ -203,7 +203,7 @@ int main(int argc, const char *argv[])
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( printDeviceHeader( g_device_id ) != CL_SUCCESS )
|
||||
if( printDeviceHeader( g_device_id ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
|
||||
@@ -52,8 +52,8 @@
|
||||
#define IMAGE_READ_NON_BLOCKING 16
|
||||
#define IMAGE_WRITE_NON_BLOCKING 32
|
||||
|
||||
#define test_error_abort(errCode,msg) test_error_ret_abort(errCode,msg,errCode)
|
||||
#define test_error_ret_abort(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
|
||||
#define test_error_abort(errCode,msg) test_error_ret_abort(errCode,msg,errCode)
|
||||
#define test_error_ret_abort(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
|
||||
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
@@ -14,6 +14,7 @@ add_executable(conformance_test_api
|
||||
test_platform.cpp
|
||||
test_retain.cpp
|
||||
test_device_min_data_type_align_size_alignment.cpp
|
||||
test_queue_properties.cpp
|
||||
test_mem_objects.cpp
|
||||
test_bool.c
|
||||
test_null_buffer_arg.c
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
basefn basefn_list[] = {
|
||||
basefn basefn_list[] = {
|
||||
test_get_platform_info,
|
||||
test_get_sampler_info,
|
||||
test_get_command_queue_info,
|
||||
@@ -104,7 +104,7 @@ basefn basefn_list[] = {
|
||||
test_retain_mem_object_multiple,
|
||||
test_min_data_type_align_size_alignment,
|
||||
|
||||
test_mem_object_destructor_callback,
|
||||
test_mem_object_destructor_callback,
|
||||
test_null_buffer_arg,
|
||||
test_get_buffer_info,
|
||||
test_get_image2d_info,
|
||||
@@ -112,6 +112,7 @@ basefn basefn_list[] = {
|
||||
test_get_image1d_info,
|
||||
test_get_image1d_array_info,
|
||||
test_get_image2d_array_info,
|
||||
test_queue_properties,
|
||||
};
|
||||
|
||||
|
||||
@@ -199,17 +200,17 @@ const char *basefn_names[] = {
|
||||
"get_image1d_info",
|
||||
"get_image1d_array_info",
|
||||
"get_image2d_array_info",
|
||||
|
||||
"queue_properties",
|
||||
"all",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -18,84 +18,84 @@
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/clImageHelper.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
extern float calculate_ulperror(float a, float b);
|
||||
extern float calculate_ulperror(float a, float b);
|
||||
|
||||
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
@@ -105,4 +105,5 @@ extern int test_get_image1d_info( cl_device_id deviceID, cl_context context
|
||||
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ const char *sample_single_param_write_kernel[] = {
|
||||
"__kernel void sample_test(__global int *src)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" src[tid] = tid;\n"
|
||||
" src[tid] = tid;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
@@ -73,7 +73,7 @@ const char *sample_sampler_kernel_pattern[] = {
|
||||
"__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n",
|
||||
" dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
|
||||
" dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
@@ -245,9 +245,9 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c
|
||||
char readArgLine[128], *programSrc;
|
||||
const char *readArgPattern = ", read_only image2d_t srcimg%d";
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper *streams, result;
|
||||
clMemWrapper *streams, result;
|
||||
size_t threads[2];
|
||||
cl_image_format image_format_desc;
|
||||
cl_image_format image_format_desc;
|
||||
size_t maxParameterSize;
|
||||
cl_event event;
|
||||
cl_int event_status;
|
||||
@@ -364,9 +364,9 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_
|
||||
char writeArgLine[128], *programSrc;
|
||||
const char *writeArgPattern = ", write_only image2d_t dstimg%d";
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper *streams;
|
||||
clMemWrapper *streams;
|
||||
size_t threads[2];
|
||||
cl_image_format image_format_desc;
|
||||
cl_image_format image_format_desc;
|
||||
size_t maxParameterSize;
|
||||
cl_event event;
|
||||
cl_int event_status;
|
||||
@@ -676,7 +676,7 @@ int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_co
|
||||
int error;
|
||||
size_t maxDimension;
|
||||
clMemWrapper streams[1];
|
||||
cl_image_format image_format_desc;
|
||||
cl_image_format image_format_desc;
|
||||
cl_ulong maxAllocSize;
|
||||
|
||||
|
||||
@@ -732,7 +732,7 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c
|
||||
int error;
|
||||
size_t maxDimension;
|
||||
clMemWrapper streams[1];
|
||||
cl_image_format image_format_desc;
|
||||
cl_image_format image_format_desc;
|
||||
cl_ulong maxAllocSize;
|
||||
|
||||
|
||||
@@ -789,7 +789,7 @@ int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_co
|
||||
int error;
|
||||
size_t maxDimension;
|
||||
clMemWrapper streams[1];
|
||||
cl_image_format image_format_desc;
|
||||
cl_image_format image_format_desc;
|
||||
cl_ulong maxAllocSize;
|
||||
|
||||
|
||||
@@ -845,7 +845,7 @@ int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_
|
||||
int error;
|
||||
size_t maxDimension;
|
||||
clMemWrapper streams[1];
|
||||
cl_image_format image_format_desc;
|
||||
cl_image_format image_format_desc;
|
||||
cl_ulong maxAllocSize;
|
||||
size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
|
||||
|
||||
@@ -1275,8 +1275,8 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int *constantData, *resultData;
|
||||
cl_ulong maxSize, stepSize, currentSize;
|
||||
int i;
|
||||
@@ -1402,8 +1402,8 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper *streams;
|
||||
size_t threads[1], localThreads[1];
|
||||
clMemWrapper *streams;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_uint i, maxArgs;
|
||||
cl_ulong maxSize;
|
||||
cl_ulong maxParameterSize;
|
||||
@@ -1616,8 +1616,8 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int *localData, *resultData;
|
||||
cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
|
||||
cl_char buffer[ 4098 ];
|
||||
@@ -1738,7 +1738,7 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
|
||||
|
||||
int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int err;
|
||||
int err;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
|
||||
@@ -27,137 +27,137 @@ static const char *sample_binary_kernel_source[] = {
|
||||
|
||||
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
size_t binarySize;
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
size_t binarySize;
|
||||
|
||||
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
unsigned char *buffers[ 1 ] = { binary };
|
||||
unsigned char *buffers[ 1 ] = { binary };
|
||||
|
||||
// Do another sanity check here first
|
||||
size_t size;
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
|
||||
test_error( error, "Unable to get expected size of binaries array" );
|
||||
if( size != sizeof( buffers ) )
|
||||
{
|
||||
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
|
||||
free(binary);
|
||||
// Do another sanity check here first
|
||||
size_t size;
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
|
||||
test_error( error, "Unable to get expected size of binaries array" );
|
||||
if( size != sizeof( buffers ) )
|
||||
{
|
||||
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
|
||||
// No way to verify the binary is correct, so just be good with that
|
||||
// No way to verify the binary is correct, so just be good with that
|
||||
free(binary);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
/* To test this in a self-contained fashion, we have to create a program with
|
||||
/* To test this in a self-contained fashion, we have to create a program with
|
||||
source, then get the binary, then use that binary to reload the program, and then verify */
|
||||
|
||||
int error;
|
||||
clProgramWrapper program, program_from_binary;
|
||||
size_t binarySize;
|
||||
int error;
|
||||
clProgramWrapper program, program_from_binary;
|
||||
size_t binarySize;
|
||||
|
||||
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
const unsigned char *buffers[ 1 ] = { binary };
|
||||
const unsigned char *buffers[ 1 ] = { binary };
|
||||
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
|
||||
cl_int loadErrors[ 1 ];
|
||||
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
|
||||
test_error( error, "Unable to load valid program binary" );
|
||||
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
|
||||
cl_int loadErrors[ 1 ];
|
||||
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
|
||||
test_error( error, "Unable to load valid program binary" );
|
||||
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
|
||||
|
||||
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program" );
|
||||
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary2;
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary2;
|
||||
binary2 = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
buffers[ 0 ] = binary2;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
buffers[ 0 ] = binary2;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
|
||||
if( memcmp( binary, binary2, binarySize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Program binary is different when loaded from binary!\n" );
|
||||
if( memcmp( binary, binary2, binarySize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Program binary is different when loaded from binary!\n" );
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Try again, this time without passing the status ptr in, to make sure we still
|
||||
// get a valid binary
|
||||
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, NULL, &error );
|
||||
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
|
||||
// Try again, this time without passing the status ptr in, to make sure we still
|
||||
// get a valid binary
|
||||
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, NULL, &error );
|
||||
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
|
||||
|
||||
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program" );
|
||||
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program" );
|
||||
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary3;
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary3;
|
||||
binary3 = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
buffers[ 0 ] = binary3;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
buffers[ 0 ] = binary3;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
|
||||
if( memcmp( binary, binary3, binarySize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Program binary is different when status pointer is NULL!\n" );
|
||||
free(binary3);
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
free(binary3);
|
||||
if( memcmp( binary, binary3, binarySize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Program binary is different when status pointer is NULL!\n" );
|
||||
free(binary3);
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
free(binary3);
|
||||
|
||||
// Now execute them both to see that they both do the same thing.
|
||||
clMemWrapper in, out, out_binary;
|
||||
@@ -216,21 +216,21 @@ int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_com
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Compare the results
|
||||
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
|
||||
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// All done!
|
||||
// All done!
|
||||
free(in_data);
|
||||
free(out_data);
|
||||
free(out_data_binary);
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -36,17 +36,17 @@ const char *kernel_with_bool[] = {
|
||||
};
|
||||
|
||||
int test_for_bool_type(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
int err = create_single_kernel_helper(context,
|
||||
&program,
|
||||
&kernel,
|
||||
1, kernel_with_bool,
|
||||
"kernel_with_bool" );
|
||||
&program,
|
||||
&kernel,
|
||||
1, kernel_with_bool,
|
||||
"kernel_with_bool" );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,16 +31,16 @@ void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
|
||||
|
||||
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
clContextWrapper context_to_test;
|
||||
clCommandQueueWrapper queue_to_test;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
const char *sample_single_test_kernel[] = {
|
||||
@@ -80,45 +80,45 @@ int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
|
||||
@@ -483,7 +483,7 @@ static const char *single_task_kernel[] = {
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" for( int i = 0; i < count; i++ )\n"
|
||||
" dst[i] = tid + i;\n"
|
||||
" dst[i] = tid + i;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
@@ -25,117 +25,117 @@ const char *inspect_image_kernel_source[] = {
|
||||
"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0), i;\n"
|
||||
" for( i = 0; i < 100000; i++ ); \n"
|
||||
" for( i = 0; i < 100000; i++ ); \n"
|
||||
" outDimensions[tid * 2] = get_image_width(src) * tid;\n"
|
||||
" outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
#define NUM_TRIES 100
|
||||
#define NUM_TRIES 100
|
||||
#define NUM_THREADS 2048
|
||||
|
||||
int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error, i;
|
||||
clMemWrapper images[ NUM_TRIES ];
|
||||
size_t sizes[ NUM_TRIES ][ 2 ];
|
||||
clMemWrapper results[ NUM_TRIES ];
|
||||
cl_image_format imageFormat;
|
||||
size_t maxWidth, maxHeight;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int resultArray[ NUM_THREADS * 2 ];
|
||||
char errStr[ 128 ];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error, i;
|
||||
clMemWrapper images[ NUM_TRIES ];
|
||||
size_t sizes[ NUM_TRIES ][ 2 ];
|
||||
clMemWrapper results[ NUM_TRIES ];
|
||||
cl_image_format imageFormat;
|
||||
size_t maxWidth, maxHeight;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int resultArray[ NUM_THREADS * 2 ];
|
||||
char errStr[ 128 ];
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
// Just get any ol format to test with
|
||||
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
|
||||
test_error( error, "Unable to obtain suitable image format to test with!" );
|
||||
// Just get any ol format to test with
|
||||
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
|
||||
test_error( error, "Unable to obtain suitable image format to test with!" );
|
||||
|
||||
// Create our testing kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
// Create our testing kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Get max dimensions for each of our images
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
|
||||
test_error( error, "Unable to get max image dimensions for device" );
|
||||
// Get max dimensions for each of our images
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
|
||||
test_error( error, "Unable to get max image dimensions for device" );
|
||||
|
||||
// Get the number of threads we'll be able to run
|
||||
threads[0] = NUM_THREADS;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size for kernel" );
|
||||
// Get the number of threads we'll be able to run
|
||||
threads[0] = NUM_THREADS;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size for kernel" );
|
||||
|
||||
// Create a variety of images and output arrays
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
|
||||
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
|
||||
// Create a variety of images and output arrays
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
|
||||
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
|
||||
|
||||
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
|
||||
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
|
||||
if( images[i] == NULL )
|
||||
{
|
||||
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
|
||||
return -1;
|
||||
}
|
||||
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
|
||||
if( results[i] == NULL)
|
||||
{
|
||||
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
|
||||
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
|
||||
if( images[i] == NULL )
|
||||
{
|
||||
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
|
||||
return -1;
|
||||
}
|
||||
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
|
||||
if( results[i] == NULL)
|
||||
{
|
||||
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Start setting arguments and executing kernels
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
// Set the arguments for this try
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
|
||||
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
// Start setting arguments and executing kernels
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
// Set the arguments for this try
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
|
||||
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
|
||||
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
|
||||
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
|
||||
// Queue up execution
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
sprintf( errStr, "Unable to execute kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
}
|
||||
// Queue up execution
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
sprintf( errStr, "Unable to execute kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
}
|
||||
|
||||
// Read the results back out, one at a time, and verify
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
|
||||
sprintf( errStr, "Unable to read results for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
// Read the results back out, one at a time, and verify
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
|
||||
sprintf( errStr, "Unable to read results for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
|
||||
// Verify. Each entry should be n * the (width/height) of image i
|
||||
for( int j = 0; j < NUM_THREADS; j++ )
|
||||
{
|
||||
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
|
||||
{
|
||||
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
|
||||
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
|
||||
return -1;
|
||||
}
|
||||
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
|
||||
{
|
||||
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
|
||||
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Verify. Each entry should be n * the (width/height) of image i
|
||||
for( int j = 0; j < NUM_THREADS; j++ )
|
||||
{
|
||||
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
|
||||
{
|
||||
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
|
||||
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
|
||||
return -1;
|
||||
}
|
||||
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
|
||||
{
|
||||
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
|
||||
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we got here, everything verified successfully
|
||||
return 0;
|
||||
// If we got here, everything verified successfully
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -96,209 +96,209 @@ const char *sample_two_kernel_program[] = {
|
||||
|
||||
int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program, testProgram;
|
||||
cl_context testContext;
|
||||
cl_kernel kernel;
|
||||
cl_char name[ 512 ];
|
||||
cl_uint numArgs, numInstances;
|
||||
size_t paramSize;
|
||||
int error;
|
||||
cl_program program, testProgram;
|
||||
cl_context testContext;
|
||||
cl_kernel kernel;
|
||||
cl_char name[ 512 ];
|
||||
cl_uint numArgs, numInstances;
|
||||
size_t paramSize;
|
||||
|
||||
|
||||
/* Create reference */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create reference */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, ¶mSize );
|
||||
test_error( error, "Unable to get kernel function name param size" );
|
||||
if( paramSize != strlen( "sample_test" ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, ¶mSize );
|
||||
test_error( error, "Unable to get kernel function name param size" );
|
||||
if( paramSize != strlen( "sample_test" ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
|
||||
test_error( error, "Unable to get kernel function name" );
|
||||
if( strcmp( (char *)name, "sample_test" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
|
||||
test_error( error, "Unable to get kernel function name" );
|
||||
if( strcmp( (char *)name, "sample_test" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, ¶mSize );
|
||||
test_error( error, "Unable to get kernel arg count param size" );
|
||||
if( paramSize != sizeof( numArgs ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, ¶mSize );
|
||||
test_error( error, "Unable to get kernel arg count param size" );
|
||||
if( paramSize != sizeof( numArgs ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
|
||||
test_error( error, "Unable to get kernel arg count" );
|
||||
if( numArgs != 2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
|
||||
test_error( error, "Unable to get kernel arg count" );
|
||||
if( numArgs != 2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, ¶mSize );
|
||||
test_error( error, "Unable to get kernel reference count param size" );
|
||||
if( paramSize != sizeof( numInstances ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, ¶mSize );
|
||||
test_error( error, "Unable to get kernel reference count param size" );
|
||||
if( paramSize != sizeof( numInstances ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
|
||||
test_error( error, "Unable to get kernel reference count" );
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
|
||||
test_error( error, "Unable to get kernel reference count" );
|
||||
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, ¶mSize );
|
||||
test_error( error, "Unable to get kernel program param size" );
|
||||
if( paramSize != sizeof( testProgram ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, ¶mSize );
|
||||
test_error( error, "Unable to get kernel program param size" );
|
||||
if( paramSize != sizeof( testProgram ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
|
||||
test_error( error, "Unable to get kernel program" );
|
||||
if( testProgram != program )
|
||||
{
|
||||
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
|
||||
test_error( error, "Unable to get kernel program" );
|
||||
if( testProgram != program )
|
||||
{
|
||||
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
|
||||
test_error( error, "Unable to get kernel context" );
|
||||
if( testContext != context )
|
||||
{
|
||||
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
|
||||
return -1;
|
||||
}
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
|
||||
test_error( error, "Unable to get kernel context" );
|
||||
if( testContext != context )
|
||||
{
|
||||
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Release memory */
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
return 0;
|
||||
/* Release memory */
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[100];
|
||||
cl_int outputData[100];
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[100];
|
||||
cl_int outputData[100];
|
||||
RandomSeed seed( gRandomSeed );
|
||||
int i;
|
||||
int i;
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
for (i=0; i<100; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Set the arguments */
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)100;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)100;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try again */
|
||||
if( localThreads[0] > 1 )
|
||||
localThreads[0] /= 2;
|
||||
/* Try again */
|
||||
if( localThreads[0] > 1 )
|
||||
localThreads[0] /= 2;
|
||||
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
|
||||
localThreads[0]--;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* And again */
|
||||
if( localThreads[0] > 1 )
|
||||
localThreads[0] /= 2;
|
||||
/* And again */
|
||||
if( localThreads[0] > 1 )
|
||||
localThreads[0] /= 2;
|
||||
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
|
||||
localThreads[0]--;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* One more time */
|
||||
localThreads[0] = (unsigned int)1;
|
||||
/* One more time */
|
||||
localThreads[0] = (unsigned int)1;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -308,61 +308,61 @@ int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, c
|
||||
|
||||
int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
RandomSeed seed( gRandomSeed );
|
||||
int i;
|
||||
int i;
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -372,156 +372,156 @@ int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_c
|
||||
|
||||
int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *args[2];
|
||||
cl_mem outStream;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10], randomTestDataB[10];
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *args[2];
|
||||
cl_mem outStream;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10], randomTestDataB[10];
|
||||
MTdata d;
|
||||
|
||||
struct img_pair_t
|
||||
{
|
||||
cl_mem streamA;
|
||||
cl_mem streamB;
|
||||
} image_pair;
|
||||
struct img_pair_t
|
||||
{
|
||||
cl_mem streamA;
|
||||
cl_mem streamB;
|
||||
} image_pair;
|
||||
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d);
|
||||
randomTestDataB[i] = (cl_int)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d);
|
||||
randomTestDataB[i] = (cl_int)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
args[0] = &image_pair;
|
||||
args[1] = outStream;
|
||||
/* Set the arguments */
|
||||
args[0] = &image_pair;
|
||||
args[1] = outStream;
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify!\n" );
|
||||
log_error( "ERROR: Data did not verify!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
clReleaseMemObject( image_pair.streamA );
|
||||
clReleaseMemObject( image_pair.streamB );
|
||||
clReleaseMemObject( outStream );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
clReleaseMemObject( image_pair.streamA );
|
||||
clReleaseMemObject( image_pair.streamB );
|
||||
clReleaseMemObject( outStream );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10], randomTestDataB[10];
|
||||
cl_ulong maxSize;
|
||||
MTdata d;
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10], randomTestDataB[10];
|
||||
cl_ulong maxSize;
|
||||
MTdata d;
|
||||
|
||||
/* Verify our test buffer won't be bigger than allowed */
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
|
||||
test_error( error, "Unable to get max constant buffer size" );
|
||||
if( maxSize < sizeof( cl_int ) * 10 )
|
||||
{
|
||||
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
|
||||
return -1;
|
||||
}
|
||||
/* Verify our test buffer won't be bigger than allowed */
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
|
||||
test_error( error, "Unable to get max constant buffer size" );
|
||||
if( maxSize < sizeof( cl_int ) * 10 )
|
||||
{
|
||||
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
|
||||
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
|
||||
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
|
||||
{
|
||||
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
|
||||
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -531,67 +531,67 @@ int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_c
|
||||
|
||||
int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
typedef struct img_pair_type
|
||||
{
|
||||
int A;
|
||||
int B;
|
||||
} image_pair_t;
|
||||
typedef struct img_pair_type
|
||||
{
|
||||
int A;
|
||||
int B;
|
||||
} image_pair_t;
|
||||
|
||||
image_pair_t image_pair[ 10 ];
|
||||
image_pair_t image_pair[ 10 ];
|
||||
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
image_pair[i].A = (cl_int)genrand_int32(d);
|
||||
image_pair[i].A = (cl_int)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
image_pair[i].A = (cl_int)genrand_int32(d);
|
||||
image_pair[i].A = (cl_int)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != image_pair[i].A + image_pair[i].B)
|
||||
{
|
||||
log_error( "ERROR: Data did not verify!\n" );
|
||||
log_error( "ERROR: Data did not verify!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -601,98 +601,98 @@ int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context,
|
||||
|
||||
int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel[3];
|
||||
unsigned int kernelCount;
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel[3];
|
||||
unsigned int kernelCount;
|
||||
|
||||
/* Create a test program */
|
||||
program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create test program!\n" );
|
||||
return -1;
|
||||
}
|
||||
/* Create a test program */
|
||||
program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create test program!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Build */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
/* Build */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
/* Try getting the kernel count */
|
||||
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
|
||||
test_error( error, "Unable to get kernel count for built program" );
|
||||
if( kernelCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
|
||||
return -1;
|
||||
}
|
||||
/* Try getting the kernel count */
|
||||
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
|
||||
test_error( error, "Unable to get kernel count for built program" );
|
||||
if( kernelCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Try actually getting the kernels */
|
||||
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
|
||||
test_error( error, "Unable to get kernels for built program" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseKernel( kernel[1] );
|
||||
/* Try actually getting the kernels */
|
||||
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
|
||||
test_error( error, "Unable to get kernels for built program" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseKernel( kernel[1] );
|
||||
|
||||
clReleaseProgram( program );
|
||||
return 0;
|
||||
clReleaseProgram( program );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10];
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10];
|
||||
MTdata d;
|
||||
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != randomTestDataA[i] + 1024)
|
||||
{
|
||||
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
|
||||
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ static volatile cl_int sDestructorIndex;
|
||||
|
||||
void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
|
||||
{
|
||||
int * userPtr = (int *)userData;
|
||||
int * userPtr = (int *)userData;
|
||||
|
||||
// ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
|
||||
*userPtr = ++sDestructorIndex;
|
||||
@@ -31,34 +31,34 @@ void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
|
||||
|
||||
int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
|
||||
{
|
||||
cl_int error;
|
||||
cl_int error;
|
||||
int i;
|
||||
|
||||
// Set up some variables to catch the order in which callbacks are called
|
||||
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
|
||||
sDestructorIndex = 0;
|
||||
// Set up some variables to catch the order in which callbacks are called
|
||||
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
|
||||
sDestructorIndex = 0;
|
||||
|
||||
// Set up the callbacks
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
// Set up the callbacks
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
|
||||
// Now release the buffer, which SHOULD call the callbacks
|
||||
error = clReleaseMemObject( memObject );
|
||||
test_error( error, "Unable to release test buffer" );
|
||||
// Now release the buffer, which SHOULD call the callbacks
|
||||
error = clReleaseMemObject( memObject );
|
||||
test_error( error, "Unable to release test buffer" );
|
||||
|
||||
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
|
||||
memObject = NULL;
|
||||
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
|
||||
memObject = NULL;
|
||||
|
||||
// At this point, all three callbacks should have already been called
|
||||
int numErrors = 0;
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
// At this point, all three callbacks should have already been called
|
||||
int numErrors = 0;
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
// Spin waiting for the release to finish. If you don't call the mem_destructor_callback, you will not
|
||||
// pass the test. bugzilla 6316
|
||||
while( 0 == callbackOrders[i] )
|
||||
@@ -70,39 +70,39 @@ int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
|
||||
i+1, ABS( callbackOrders[ i ] ), i );
|
||||
numErrors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ( numErrors > 0 ) ? -1 : 0;
|
||||
return ( numErrors > 0 ) ? -1 : 0;
|
||||
}
|
||||
|
||||
int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clMemWrapper testBuffer, testImage;
|
||||
cl_int error;
|
||||
clMemWrapper testBuffer, testImage;
|
||||
cl_int error;
|
||||
|
||||
|
||||
// Create a buffer and an image to test callbacks against
|
||||
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
|
||||
test_error( error, "Unable to create testing buffer" );
|
||||
// Create a buffer and an image to test callbacks against
|
||||
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
|
||||
test_error( error, "Unable to create testing buffer" );
|
||||
|
||||
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
|
||||
return -1;
|
||||
}
|
||||
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( checkForImageSupport( deviceID ) == 0 )
|
||||
{
|
||||
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
|
||||
test_error( error, "Unable to create testing image" );
|
||||
if( checkForImageSupport( deviceID ) == 0 )
|
||||
{
|
||||
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
|
||||
test_error( error, "Unable to create testing image" );
|
||||
|
||||
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -25,20 +25,20 @@ extern cl_uint gRandomSeed;
|
||||
|
||||
static void CL_CALLBACK test_native_kernel_fn( void *userData )
|
||||
{
|
||||
struct arg_struct {
|
||||
cl_int * source;
|
||||
cl_int * dest;
|
||||
cl_int count;
|
||||
} *args = (arg_struct *)userData;
|
||||
struct arg_struct {
|
||||
cl_int * source;
|
||||
cl_int * dest;
|
||||
cl_int count;
|
||||
} *args = (arg_struct *)userData;
|
||||
|
||||
for( cl_int i = 0; i < args->count; i++ )
|
||||
args->dest[ i ] = args->source[ i ];
|
||||
for( cl_int i = 0; i < args->count; i++ )
|
||||
args->dest[ i ] = args->source[ i ];
|
||||
}
|
||||
|
||||
int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
int error;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
int error;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
// Check if we support native kernels
|
||||
cl_device_exec_capabilities capabilities;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
|
||||
@@ -47,72 +47,72 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
|
||||
return 0;
|
||||
}
|
||||
|
||||
clMemWrapper streams[ 2 ];
|
||||
clMemWrapper streams[ 2 ];
|
||||
#if !(defined (_WIN32) && defined (_MSC_VER))
|
||||
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
|
||||
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
|
||||
#else
|
||||
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
|
||||
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
|
||||
cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
|
||||
#endif
|
||||
clEventWrapper finishEvent;
|
||||
clEventWrapper finishEvent;
|
||||
|
||||
struct arg_struct
|
||||
{
|
||||
cl_mem inputStream;
|
||||
cl_mem outputStream;
|
||||
cl_int count;
|
||||
} args;
|
||||
struct arg_struct
|
||||
{
|
||||
cl_mem inputStream;
|
||||
cl_mem outputStream;
|
||||
cl_int count;
|
||||
} args;
|
||||
|
||||
|
||||
// Create some input values
|
||||
generate_random_data( kInt, n_elems, seed, inBuffer );
|
||||
// Create some input values
|
||||
generate_random_data( kInt, n_elems, seed, inBuffer );
|
||||
|
||||
|
||||
// Create I/O streams
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
// Create I/O streams
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
|
||||
|
||||
// Set up the arrays to call with
|
||||
args.inputStream = streams[ 0 ];
|
||||
args.outputStream = streams[ 1 ];
|
||||
args.count = n_elems;
|
||||
// Set up the arrays to call with
|
||||
args.inputStream = streams[ 0 ];
|
||||
args.outputStream = streams[ 1 ];
|
||||
args.count = n_elems;
|
||||
|
||||
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
|
||||
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
|
||||
|
||||
|
||||
// Run the kernel
|
||||
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
|
||||
&args, sizeof( args ),
|
||||
2, &streams[ 0 ],
|
||||
(const void **)memLocs,
|
||||
0, NULL, &finishEvent );
|
||||
test_error( error, "Unable to queue native kernel" );
|
||||
// Run the kernel
|
||||
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
|
||||
&args, sizeof( args ),
|
||||
2, &streams[ 0 ],
|
||||
(const void **)memLocs,
|
||||
0, NULL, &finishEvent );
|
||||
test_error( error, "Unable to queue native kernel" );
|
||||
|
||||
// Finish and wait for the kernel to complete
|
||||
error = clFinish( queue );
|
||||
// Finish and wait for the kernel to complete
|
||||
error = clFinish( queue );
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
error = clWaitForEvents( 1, &finishEvent );
|
||||
error = clWaitForEvents( 1, &finishEvent );
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
// Now read the results and verify
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
// Now read the results and verify
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
for( int i = 0; i < n_elems; i++ )
|
||||
{
|
||||
if( inBuffer[ i ] != outBuffer[ i ] )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
|
||||
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
for( int i = 0; i < n_elems; i++ )
|
||||
{
|
||||
if( inBuffer[ i ] != outBuffer[ i ] )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
|
||||
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -44,119 +44,119 @@ const char *kernel_string =
|
||||
* the value of 'test_type'
|
||||
*/
|
||||
static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
|
||||
cl_mem test_buf, cl_mem result_buf, test_type type)
|
||||
cl_mem test_buf, cl_mem result_buf, test_type type)
|
||||
{
|
||||
unsigned int test_success = 0;
|
||||
unsigned int test_success = 0;
|
||||
|
||||
unsigned int i;
|
||||
cl_int status;
|
||||
char *typestr;
|
||||
unsigned int i;
|
||||
cl_int status;
|
||||
char *typestr;
|
||||
|
||||
if (type == NON_NULL_PATH) {
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
|
||||
typestr = "non-NULL";
|
||||
} else if (type == ADDROF_NULL_PATH) {
|
||||
test_buf = NULL;
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
|
||||
typestr = "&NULL";
|
||||
} else if (type == NULL_PATH) {
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
|
||||
typestr = "NULL";
|
||||
}
|
||||
if (type == NON_NULL_PATH) {
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
|
||||
typestr = "non-NULL";
|
||||
} else if (type == ADDROF_NULL_PATH) {
|
||||
test_buf = NULL;
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
|
||||
typestr = "&NULL";
|
||||
} else if (type == NULL_PATH) {
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
|
||||
typestr = "NULL";
|
||||
}
|
||||
|
||||
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
|
||||
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
log_error("clSetKernelArg failed with status: %d\n", status);
|
||||
return FAILURE; // no point in continuing *this* test
|
||||
}
|
||||
if (status != CL_SUCCESS) {
|
||||
log_error("clSetKernelArg failed with status: %d\n", status);
|
||||
return FAILURE; // no point in continuing *this* test
|
||||
}
|
||||
|
||||
size_t global = NITEMS;
|
||||
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
|
||||
NULL, 0, NULL, NULL);
|
||||
test_error(status, "NDRangeKernel failed.");
|
||||
size_t global = NITEMS;
|
||||
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
|
||||
NULL, 0, NULL, NULL);
|
||||
test_error(status, "NDRangeKernel failed.");
|
||||
|
||||
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
|
||||
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
|
||||
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
|
||||
test_error(status, "ReadBuffer failed.");
|
||||
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
|
||||
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
|
||||
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
|
||||
test_error(status, "ReadBuffer failed.");
|
||||
|
||||
// in the non-null case, we expect NONZERO values:
|
||||
if (type == NON_NULL_PATH) {
|
||||
for (i=0; i<NITEMS; i++) {
|
||||
if (host_result[i] == 0) {
|
||||
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
|
||||
test_success = FAILURE; break;
|
||||
}
|
||||
}
|
||||
// in the non-null case, we expect NONZERO values:
|
||||
if (type == NON_NULL_PATH) {
|
||||
for (i=0; i<NITEMS; i++) {
|
||||
if (host_result[i] == 0) {
|
||||
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
|
||||
test_success = FAILURE; break;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
|
||||
for (i=0; i<NITEMS; i++) {
|
||||
if (host_result[i] != 0) {
|
||||
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
|
||||
test_success = FAILURE; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
|
||||
for (i=0; i<NITEMS; i++) {
|
||||
if (host_result[i] != 0) {
|
||||
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
|
||||
test_success = FAILURE; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(host_result);
|
||||
free(host_result);
|
||||
|
||||
if (test_success == SUCCESS) {
|
||||
log_info("\t%s ok.\n", typestr);
|
||||
}
|
||||
if (test_success == SUCCESS) {
|
||||
log_info("\t%s ok.\n", typestr);
|
||||
}
|
||||
|
||||
return test_success;
|
||||
return test_success;
|
||||
}
|
||||
|
||||
int test_null_buffer_arg(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned int test_success = 0;
|
||||
unsigned int i;
|
||||
cl_int status;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
unsigned int test_success = 0;
|
||||
unsigned int i;
|
||||
cl_int status;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
// prep kernel:
|
||||
program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
|
||||
test_error(status, "CreateProgramWithSource failed.");
|
||||
// prep kernel:
|
||||
program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
|
||||
test_error(status, "CreateProgramWithSource failed.");
|
||||
|
||||
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
test_error(status, "BuildProgram failed.");
|
||||
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
test_error(status, "BuildProgram failed.");
|
||||
|
||||
kernel = clCreateKernel(program, "test_kernel", &status);
|
||||
test_error(status, "CreateKernel failed.");
|
||||
kernel = clCreateKernel(program, "test_kernel", &status);
|
||||
test_error(status, "CreateKernel failed.");
|
||||
|
||||
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
|
||||
NULL, NULL);
|
||||
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
|
||||
NULL, NULL);
|
||||
|
||||
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
|
||||
NULL, NULL);
|
||||
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
|
||||
NULL, NULL);
|
||||
|
||||
// set the destination buffer normally:
|
||||
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
|
||||
test_error(status, "SetKernelArg failed.");
|
||||
// set the destination buffer normally:
|
||||
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
|
||||
test_error(status, "SetKernelArg failed.");
|
||||
|
||||
//
|
||||
// we test three cases:
|
||||
//
|
||||
// - typical case, used everyday: non-null buffer
|
||||
// - the case of src as &NULL (the spec-compliance test)
|
||||
// - the case of src as NULL (the backwards-compatibility test, Apple only)
|
||||
//
|
||||
//
|
||||
// we test three cases:
|
||||
//
|
||||
// - typical case, used everyday: non-null buffer
|
||||
// - the case of src as &NULL (the spec-compliance test)
|
||||
// - the case of src as NULL (the backwards-compatibility test, Apple only)
|
||||
//
|
||||
|
||||
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
|
||||
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
|
||||
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
|
||||
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
|
||||
|
||||
#ifdef __APPLE__
|
||||
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
|
||||
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
|
||||
#endif
|
||||
|
||||
// clean up:
|
||||
if (dev_src) clReleaseMemObject(dev_src);
|
||||
clReleaseMemObject(dev_dst);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
// clean up:
|
||||
if (dev_src) clReleaseMemObject(dev_src);
|
||||
clReleaseMemObject(dev_dst);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
return test_success;
|
||||
return test_success;
|
||||
}
|
||||
|
||||
@@ -22,37 +22,37 @@
|
||||
#define PRINT_EXTENSION_INFO 0
|
||||
|
||||
int test_platform_extensions(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
const char * extensions[] = {
|
||||
"cl_khr_byte_addressable_store",
|
||||
// "cl_APPLE_SetMemObjectDestructor",
|
||||
"cl_khr_global_int32_base_atomics",
|
||||
"cl_khr_global_int32_extended_atomics",
|
||||
"cl_khr_local_int32_base_atomics",
|
||||
"cl_khr_local_int32_extended_atomics",
|
||||
"cl_khr_int64_base_atomics",
|
||||
"cl_khr_int64_extended_atomics",
|
||||
"cl_khr_byte_addressable_store",
|
||||
// "cl_APPLE_SetMemObjectDestructor",
|
||||
"cl_khr_global_int32_base_atomics",
|
||||
"cl_khr_global_int32_extended_atomics",
|
||||
"cl_khr_local_int32_base_atomics",
|
||||
"cl_khr_local_int32_extended_atomics",
|
||||
"cl_khr_int64_base_atomics",
|
||||
"cl_khr_int64_extended_atomics",
|
||||
// need to put in entires for various atomics
|
||||
"cl_khr_3d_image_writes",
|
||||
"cl_khr_fp16",
|
||||
"cl_khr_fp64",
|
||||
NULL
|
||||
"cl_khr_3d_image_writes",
|
||||
"cl_khr_fp16",
|
||||
"cl_khr_fp64",
|
||||
NULL
|
||||
};
|
||||
|
||||
bool extensionsSupported[] = {
|
||||
false, //"cl_khr_byte_addressable_store",
|
||||
false, // need to put in entires for various atomics
|
||||
false, // "cl_khr_global_int32_base_atomics",
|
||||
false, // "cl_khr_global_int32_extended_atomics",
|
||||
false, // "cl_khr_local_int32_base_atomics",
|
||||
false, // "cl_khr_local_int32_extended_atomics",
|
||||
false, // "cl_khr_int64_base_atomics",
|
||||
false, // "cl_khr_int64_extended_atomics",
|
||||
false, //"cl_khr_3d_image_writes",
|
||||
false, //"cl_khr_fp16",
|
||||
false, //"cl_khr_fp64",
|
||||
false //NULL
|
||||
false, //"cl_khr_byte_addressable_store",
|
||||
false, // need to put in entires for various atomics
|
||||
false, // "cl_khr_global_int32_base_atomics",
|
||||
false, // "cl_khr_global_int32_extended_atomics",
|
||||
false, // "cl_khr_local_int32_base_atomics",
|
||||
false, // "cl_khr_local_int32_extended_atomics",
|
||||
false, // "cl_khr_int64_base_atomics",
|
||||
false, // "cl_khr_int64_extended_atomics",
|
||||
false, //"cl_khr_3d_image_writes",
|
||||
false, //"cl_khr_fp16",
|
||||
false, //"cl_khr_fp64",
|
||||
false //NULL
|
||||
};
|
||||
|
||||
int extensionIndex;
|
||||
@@ -71,28 +71,28 @@ int test_platform_extensions(cl_device_id deviceID, cl_context context,
|
||||
// to get a result of type cl_platform_id
|
||||
|
||||
err = clGetDeviceInfo(deviceID,
|
||||
CL_DEVICE_PLATFORM,
|
||||
sizeof(cl_platform_id),
|
||||
(void *)(&platformID),
|
||||
NULL);
|
||||
CL_DEVICE_PLATFORM,
|
||||
sizeof(cl_platform_id),
|
||||
(void *)(&platformID),
|
||||
NULL);
|
||||
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
vlog_error("test_platform_extensions : could not get platformID from device\n");
|
||||
return -1;
|
||||
vlog_error("test_platform_extensions : could not get platformID from device\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// now we grab the set of extensions specified by the platform
|
||||
err = clGetPlatformInfo(platformID,
|
||||
CL_PLATFORM_EXTENSIONS,
|
||||
sizeof(platform_extensions),
|
||||
(void *)(&platform_extensions[0]),
|
||||
NULL);
|
||||
CL_PLATFORM_EXTENSIONS,
|
||||
sizeof(platform_extensions),
|
||||
(void *)(&platform_extensions[0]),
|
||||
NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
vlog_error("test_platform_extensions : could not get extension string from platform\n");
|
||||
return -1;
|
||||
vlog_error("test_platform_extensions : could not get extension string from platform\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if PRINT_EXTENSION_INFO
|
||||
@@ -102,28 +102,28 @@ int test_platform_extensions(cl_device_id deviceID, cl_context context,
|
||||
// here we parse the platform extensions, to look for the "important" ones
|
||||
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
|
||||
{
|
||||
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
|
||||
{
|
||||
// we found it
|
||||
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
|
||||
{
|
||||
// we found it
|
||||
#if PRINT_EXTENSION_INFO
|
||||
log_info("Found \"%s\" in platform extensions\n",
|
||||
extensions[extensionIndex]);
|
||||
log_info("Found \"%s\" in platform extensions\n",
|
||||
extensions[extensionIndex]);
|
||||
#endif
|
||||
extensionsSupported[extensionIndex] = true;
|
||||
}
|
||||
extensionsSupported[extensionIndex] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// and then we grab the set of extensions specified by the device
|
||||
// (this can be turned into a "loop over all devices in this platform")
|
||||
err = clGetDeviceInfo(deviceID,
|
||||
CL_DEVICE_EXTENSIONS,
|
||||
sizeof(device_extensions),
|
||||
(void *)(&device_extensions[0]),
|
||||
NULL);
|
||||
CL_DEVICE_EXTENSIONS,
|
||||
sizeof(device_extensions),
|
||||
(void *)(&device_extensions[0]),
|
||||
NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
vlog_error("test_platform_extensions : could not get extension string from device\n");
|
||||
return -1;
|
||||
vlog_error("test_platform_extensions : could not get extension string from device\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
@@ -133,18 +133,18 @@ int test_platform_extensions(cl_device_id deviceID, cl_context context,
|
||||
|
||||
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
|
||||
{
|
||||
if(extensionsSupported[extensionIndex] == false)
|
||||
{
|
||||
continue; // skip this one
|
||||
}
|
||||
if(extensionsSupported[extensionIndex] == false)
|
||||
{
|
||||
continue; // skip this one
|
||||
}
|
||||
|
||||
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
|
||||
{
|
||||
// device does not support it
|
||||
vlog_error("Platform supports extension \"%s\" but device does not\n",
|
||||
extensions[extensionIndex]);
|
||||
return -1;
|
||||
}
|
||||
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
|
||||
{
|
||||
// device does not support it
|
||||
vlog_error("Platform supports extension \"%s\" but device does not\n",
|
||||
extensions[extensionIndex]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -207,18 +207,18 @@ int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get command queue " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get command queue " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
|
||||
@@ -300,18 +300,18 @@ int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get mem object " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get mem object " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
|
||||
@@ -339,24 +339,24 @@ static cl_mem_flags all_flags[16] = {
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
};
|
||||
|
||||
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
|
||||
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get device " name ); \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
} \
|
||||
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
|
||||
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get device " name ); \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
} \
|
||||
log_info( "\tReported device " name " : " type "\n", (cast)val );
|
||||
|
||||
#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div ) \
|
||||
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get device " name ); \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
} \
|
||||
#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div ) \
|
||||
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get device " name ); \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
} \
|
||||
log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
|
||||
|
||||
int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
|
||||
@@ -467,13 +467,13 @@ static const char *sample_compile_size[2] = {
|
||||
"__kernel void sample_test(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
"__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
174
test_conformance/api/test_queue_properties.cpp
Normal file
174
test_conformance/api/test_queue_properties.cpp
Normal file
@@ -0,0 +1,174 @@
|
||||
//
|
||||
// Copyright (c) 2018 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
using namespace std;
|
||||
/*
|
||||
The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function.
|
||||
Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and
|
||||
CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue.
|
||||
*/
|
||||
|
||||
const char *queue_test_kernel[] = {
|
||||
"__kernel void vec_cpy(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements)
|
||||
{
|
||||
clMemWrapper streams[2];
|
||||
int error;
|
||||
std::vector<int> buf(num_elements);
|
||||
clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL;
|
||||
cl_platform_id platform;
|
||||
clEventWrapper event;
|
||||
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
|
||||
|
||||
clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR");
|
||||
if (clCreateCommandQueueWithPropertiesKHR == NULL)
|
||||
{
|
||||
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error);
|
||||
test_error(error, "clCreateCommandQueueWithPropertiesKHR failed");
|
||||
|
||||
for (int i = 0; i < num_elements; ++i)
|
||||
{
|
||||
buf[i] = i;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error);
|
||||
test_error( error, "clCreateBuffer failed." );
|
||||
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed." );
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
||||
test_error( error, "clSetKernelArg failed." );
|
||||
|
||||
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
|
||||
test_error( error, "clSetKernelArg failed." );
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event);
|
||||
test_error( error, "clEnqueueNDRangeKernel failed." );
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
test_error(error, "clWaitForEvents failed.");
|
||||
|
||||
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed." );
|
||||
|
||||
for (int i = 0; i < num_elements; ++i)
|
||||
{
|
||||
if (buf[i] != i)
|
||||
{
|
||||
log_error("ERROR: Incorrect vector copy result.");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
if (num_elements <= 0)
|
||||
{
|
||||
num_elements = 128;
|
||||
}
|
||||
int error = 0;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
size_t strSize;
|
||||
std::string strExt(0, '\0');
|
||||
cl_queue_properties_khr device_props = NULL;
|
||||
cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 };
|
||||
|
||||
// Query extension
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
|
||||
strExt.resize(strSize);
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
|
||||
log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str());
|
||||
|
||||
if (strExt.find("cl_khr_create_command_queue") == string::npos)
|
||||
{
|
||||
log_info("extension cl_khr_create_command_queue is not supported.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy");
|
||||
test_error(error, "create_single_kernel_helper failed");
|
||||
|
||||
log_info("Queue property NULL. Testing ... \n");
|
||||
error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements);
|
||||
test_error(error, "enqueue_kernel failed");
|
||||
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
|
||||
|
||||
if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
|
||||
{
|
||||
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n");
|
||||
queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
|
||||
test_error(error, "enqueue_kernel failed");
|
||||
} else
|
||||
{
|
||||
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n");
|
||||
}
|
||||
|
||||
if (device_props & CL_QUEUE_PROFILING_ENABLE)
|
||||
{
|
||||
log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
|
||||
queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE;
|
||||
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
|
||||
test_error(error, "enqueue_kernel failed");
|
||||
} else
|
||||
{
|
||||
log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
|
||||
}
|
||||
|
||||
if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE)
|
||||
{
|
||||
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
|
||||
queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE;
|
||||
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
|
||||
test_error(error, "enqueue_kernel failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -21,214 +21,214 @@
|
||||
// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
|
||||
// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
|
||||
// this define.
|
||||
//#define VERIFY_AFTER_RELEASE 1
|
||||
//#define VERIFY_AFTER_RELEASE 1
|
||||
|
||||
#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
|
||||
#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
|
||||
|
||||
#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
|
||||
log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
|
||||
return -1; }
|
||||
return -1; }
|
||||
|
||||
int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
|
||||
{
|
||||
cl_command_queue queue;
|
||||
cl_uint numInstances;
|
||||
int err;
|
||||
cl_command_queue queue;
|
||||
cl_uint numInstances;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test queue */
|
||||
queue = clCreateCommandQueue( context, deviceID, 0, &err );
|
||||
test_error( err, "Unable to create command queue to test with" );
|
||||
/* Create a test queue */
|
||||
queue = clCreateCommandQueue( context, deviceID, 0, &err );
|
||||
test_error( err, "Unable to create command queue to test with" );
|
||||
|
||||
/* Test the instance count */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
/* Test the instance count */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* Now release the program */
|
||||
clReleaseCommandQueue( queue );
|
||||
/* Now release the program */
|
||||
clReleaseCommandQueue( queue );
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
if( err != CL_INVALID_COMMAND_QUEUE )
|
||||
{
|
||||
print_error( err, "Command queue was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
if( err != CL_INVALID_COMMAND_QUEUE )
|
||||
{
|
||||
print_error( err, "Command queue was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
|
||||
{
|
||||
cl_command_queue queue;
|
||||
unsigned int numInstances, i;
|
||||
int err;
|
||||
cl_command_queue queue;
|
||||
unsigned int numInstances, i;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test program */
|
||||
queue = clCreateCommandQueue( context, deviceID, 0, &err );
|
||||
test_error( err, "Unable to create command queue to test with" );
|
||||
/* Create a test program */
|
||||
queue = clCreateCommandQueue( context, deviceID, 0, &err );
|
||||
test_error( err, "Unable to create command queue to test with" );
|
||||
|
||||
/* Increment 9 times, which should bring the count to 10 */
|
||||
for( i = 0; i < 9; i++ )
|
||||
{
|
||||
clRetainCommandQueue( queue );
|
||||
}
|
||||
/* Increment 9 times, which should bring the count to 10 */
|
||||
for( i = 0; i < 9; i++ )
|
||||
{
|
||||
clRetainCommandQueue( queue );
|
||||
}
|
||||
|
||||
/* Test the instance count */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 10 );
|
||||
/* Test the instance count */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 10 );
|
||||
|
||||
/* Now release 5 times, which should take us to 5 */
|
||||
for( i = 0; i < 5; i++ )
|
||||
{
|
||||
clReleaseCommandQueue( queue );
|
||||
}
|
||||
/* Now release 5 times, which should take us to 5 */
|
||||
for( i = 0; i < 5; i++ )
|
||||
{
|
||||
clReleaseCommandQueue( queue );
|
||||
}
|
||||
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 5 );
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 5 );
|
||||
|
||||
/* Retain again three times, which should take us to 8 */
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
clRetainCommandQueue( queue );
|
||||
}
|
||||
/* Retain again three times, which should take us to 8 */
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
clRetainCommandQueue( queue );
|
||||
}
|
||||
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 8 );
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 8 );
|
||||
|
||||
/* Release 7 times, which should take it to 1 */
|
||||
for( i = 0; i < 7; i++ )
|
||||
{
|
||||
clReleaseCommandQueue( queue );
|
||||
}
|
||||
/* Release 7 times, which should take it to 1 */
|
||||
for( i = 0; i < 7; i++ )
|
||||
{
|
||||
clReleaseCommandQueue( queue );
|
||||
}
|
||||
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* And one last one */
|
||||
clReleaseCommandQueue( queue );
|
||||
/* And one last one */
|
||||
clReleaseCommandQueue( queue );
|
||||
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
if( err != CL_INVALID_COMMAND_QUEUE )
|
||||
{
|
||||
print_error( err, "Command queue was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
if( err != CL_INVALID_COMMAND_QUEUE )
|
||||
{
|
||||
print_error( err, "Command queue was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem object;
|
||||
cl_uint numInstances;
|
||||
int err;
|
||||
cl_mem object;
|
||||
cl_uint numInstances;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test object */
|
||||
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
|
||||
test_error( err, "Unable to create buffer to test with" );
|
||||
/* Create a test object */
|
||||
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
|
||||
test_error( err, "Unable to create buffer to test with" );
|
||||
|
||||
/* Test the instance count */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
/* Test the instance count */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* Now release the program */
|
||||
clReleaseMemObject( object );
|
||||
/* Now release the program */
|
||||
clReleaseMemObject( object );
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
if( err != CL_INVALID_MEM_OBJECT )
|
||||
{
|
||||
print_error( err, "Mem object was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
if( err != CL_INVALID_MEM_OBJECT )
|
||||
{
|
||||
print_error( err, "Mem object was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem object;
|
||||
unsigned int numInstances, i;
|
||||
int err;
|
||||
cl_mem object;
|
||||
unsigned int numInstances, i;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test object */
|
||||
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
|
||||
test_error( err, "Unable to create buffer to test with" );
|
||||
/* Create a test object */
|
||||
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
|
||||
test_error( err, "Unable to create buffer to test with" );
|
||||
|
||||
/* Increment 9 times, which should bring the count to 10 */
|
||||
for( i = 0; i < 9; i++ )
|
||||
{
|
||||
clRetainMemObject( object );
|
||||
}
|
||||
/* Increment 9 times, which should bring the count to 10 */
|
||||
for( i = 0; i < 9; i++ )
|
||||
{
|
||||
clRetainMemObject( object );
|
||||
}
|
||||
|
||||
/* Test the instance count */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 10 );
|
||||
/* Test the instance count */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 10 );
|
||||
|
||||
/* Now release 5 times, which should take us to 5 */
|
||||
for( i = 0; i < 5; i++ )
|
||||
{
|
||||
clReleaseMemObject( object );
|
||||
}
|
||||
/* Now release 5 times, which should take us to 5 */
|
||||
for( i = 0; i < 5; i++ )
|
||||
{
|
||||
clReleaseMemObject( object );
|
||||
}
|
||||
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 5 );
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 5 );
|
||||
|
||||
/* Retain again three times, which should take us to 8 */
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
clRetainMemObject( object );
|
||||
}
|
||||
/* Retain again three times, which should take us to 8 */
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
clRetainMemObject( object );
|
||||
}
|
||||
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 8 );
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 8 );
|
||||
|
||||
/* Release 7 times, which should take it to 1 */
|
||||
for( i = 0; i < 7; i++ )
|
||||
{
|
||||
clReleaseMemObject( object );
|
||||
}
|
||||
/* Release 7 times, which should take it to 1 */
|
||||
for( i = 0; i < 7; i++ )
|
||||
{
|
||||
clReleaseMemObject( object );
|
||||
}
|
||||
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* And one last one */
|
||||
clReleaseMemObject( object );
|
||||
/* And one last one */
|
||||
clReleaseMemObject( object );
|
||||
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
if( err != CL_INVALID_MEM_OBJECT )
|
||||
{
|
||||
print_error( err, "Mem object was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
if( err != CL_INVALID_MEM_OBJECT )
|
||||
{
|
||||
print_error( err, "Mem object was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,29 +23,29 @@
|
||||
|
||||
int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
int error;
|
||||
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
int error;
|
||||
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
|
||||
|
||||
/* Create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
|
||||
test_error( error, "Unable to create program to test with" );
|
||||
/* Create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
|
||||
test_error( error, "Unable to create program to test with" );
|
||||
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build sample program to test with" );
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build sample program to test with" );
|
||||
|
||||
/* And create a kernel from it */
|
||||
kernel = clCreateKernel( program, "sample_test", &error );
|
||||
test_error( error, "Unable to create kernel" );
|
||||
/* And create a kernel from it */
|
||||
kernel = clCreateKernel( program, "sample_test", &error );
|
||||
test_error( error, "Unable to create kernel" );
|
||||
|
||||
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
|
||||
clReleaseProgram( program );
|
||||
clReleaseKernel( kernel );
|
||||
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
|
||||
clReleaseProgram( program );
|
||||
clReleaseKernel( kernel );
|
||||
|
||||
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
|
||||
return 0;
|
||||
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *sample_delay_kernel[] = {
|
||||
@@ -59,51 +59,51 @@ const char *sample_delay_kernel[] = {
|
||||
|
||||
int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_mem streams[2];
|
||||
size_t threads[1] = { 10 }, localThreadSize;
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_mem streams[2];
|
||||
size_t threads[1] = { 10 }, localThreadSize;
|
||||
|
||||
|
||||
/* We now need an event to test. So we'll execute a kernel to get one */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
/* We now need an event to test. So we'll execute a kernel to get one */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
|
||||
test_error( error, "Unable to calc local thread size" );
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
|
||||
test_error( error, "Unable to calc local thread size" );
|
||||
|
||||
|
||||
/* Execute the kernel */
|
||||
/* Execute the kernel */
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
|
||||
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
|
||||
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
|
||||
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
|
||||
|
||||
clReleaseMemObject( streams[ 1 ] );
|
||||
clReleaseMemObject( streams[ 0 ] );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
clReleaseMemObject( streams[ 1 ] );
|
||||
clReleaseMemObject( streams[ 0 ] );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
|
||||
/* Now make sure we're really finished before we go on. */
|
||||
error = clFinish(queue);
|
||||
test_error( error, "Unable to finish context.");
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -30,49 +30,49 @@
|
||||
#endif
|
||||
|
||||
|
||||
basefn basefn_list[] = {
|
||||
basefn basefn_list[] = {
|
||||
test_atomic_add,
|
||||
test_atomic_sub,
|
||||
test_atomic_xchg,
|
||||
test_atomic_min,
|
||||
test_atomic_max,
|
||||
test_atomic_inc,
|
||||
test_atomic_dec,
|
||||
test_atomic_cmpxchg,
|
||||
test_atomic_and,
|
||||
test_atomic_or,
|
||||
test_atomic_xor,
|
||||
test_atomic_sub,
|
||||
test_atomic_xchg,
|
||||
test_atomic_min,
|
||||
test_atomic_max,
|
||||
test_atomic_inc,
|
||||
test_atomic_dec,
|
||||
test_atomic_cmpxchg,
|
||||
test_atomic_and,
|
||||
test_atomic_or,
|
||||
test_atomic_xor,
|
||||
|
||||
test_atomic_add_index,
|
||||
test_atomic_add_index_bin
|
||||
test_atomic_add_index,
|
||||
test_atomic_add_index_bin
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"atomic_add",
|
||||
"atomic_sub",
|
||||
"atomic_xchg",
|
||||
"atomic_min",
|
||||
"atomic_max",
|
||||
"atomic_inc",
|
||||
"atomic_dec",
|
||||
"atomic_cmpxchg",
|
||||
"atomic_and",
|
||||
"atomic_or",
|
||||
"atomic_xor",
|
||||
"atomic_sub",
|
||||
"atomic_xchg",
|
||||
"atomic_min",
|
||||
"atomic_max",
|
||||
"atomic_inc",
|
||||
"atomic_dec",
|
||||
"atomic_cmpxchg",
|
||||
"atomic_and",
|
||||
"atomic_or",
|
||||
"atomic_xor",
|
||||
|
||||
"atomic_add_index",
|
||||
"atomic_add_index_bin",
|
||||
"atomic_add_index",
|
||||
"atomic_add_index_bin",
|
||||
|
||||
"all",
|
||||
"all",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -20,20 +20,20 @@
|
||||
|
||||
extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
|
||||
|
||||
extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -38,18 +38,18 @@ const char *atomic_local_pattern[] = {
|
||||
"__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int dstItemIdx;\n"
|
||||
" int dstItemIdx;\n"
|
||||
"\n"
|
||||
" // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
|
||||
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
||||
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
|
||||
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
||||
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
"\n"
|
||||
,
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
|
||||
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
||||
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
|
||||
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
||||
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
@@ -58,25 +58,25 @@ const char *atomic_local_pattern[] = {
|
||||
|
||||
struct TestFns
|
||||
{
|
||||
cl_int mIntStartValue;
|
||||
cl_long mLongStartValue;
|
||||
cl_int mIntStartValue;
|
||||
cl_long mLongStartValue;
|
||||
|
||||
size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
|
||||
size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
|
||||
|
||||
// Integer versions
|
||||
cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
|
||||
void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
|
||||
bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
|
||||
cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
|
||||
void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
|
||||
bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
|
||||
|
||||
// Long versions
|
||||
cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
|
||||
void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
|
||||
bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
|
||||
cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
|
||||
void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
|
||||
bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
|
||||
|
||||
// Float versions
|
||||
cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
|
||||
void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
|
||||
bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
|
||||
cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
|
||||
void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
|
||||
bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
|
||||
};
|
||||
|
||||
bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
|
||||
@@ -443,14 +443,14 @@ int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_comma
|
||||
#pragma mark ---- add
|
||||
|
||||
const char atom_add_core[] =
|
||||
" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
|
||||
" atom_add( &destMemory[0], tid + 3 );\n"
|
||||
" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
|
||||
" atom_add( &destMemory[0], tid + 3 );\n"
|
||||
" atom_add( &destMemory[0], tid + 3 );\n"
|
||||
" atom_add( &destMemory[0], tid + 3 );\n";
|
||||
|
||||
const char atomic_add_core[] =
|
||||
" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
|
||||
" atomic_add( &destMemory[0], tid + 3 );\n"
|
||||
" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
|
||||
" atomic_add( &destMemory[0], tid + 3 );\n"
|
||||
" atomic_add( &destMemory[0], tid + 3 );\n"
|
||||
" atomic_add( &destMemory[0], tid + 3 );\n";
|
||||
|
||||
@@ -555,7 +555,7 @@ bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalVa
|
||||
return false;
|
||||
}
|
||||
else
|
||||
valids[ finalValue ] = 1; // So the following loop will be okay
|
||||
valids[ finalValue ] = 1; // So the following loop will be okay
|
||||
|
||||
/* Now check that every entry has one and only one count */
|
||||
if( originalValidCount != 1 )
|
||||
@@ -612,7 +612,7 @@ bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long fina
|
||||
return false;
|
||||
}
|
||||
else
|
||||
valids[ finalValue ] = 1; // So the following loop will be okay
|
||||
valids[ finalValue ] = 1; // So the following loop will be okay
|
||||
|
||||
/* Now check that every entry has one and only one count */
|
||||
if( originalValidCount != 1 )
|
||||
@@ -670,7 +670,7 @@ bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float f
|
||||
return false;
|
||||
}
|
||||
else
|
||||
valids[ (int)finalValue ] = 1; // So the following loop will be okay
|
||||
valids[ (int)finalValue ] = 1; // So the following loop will be okay
|
||||
|
||||
/* Now check that every entry has one and only one count */
|
||||
if( originalValidCount != 1 )
|
||||
|
||||
@@ -27,19 +27,19 @@ const char * atomic_index_source =
|
||||
"// will be missing some.\n"
|
||||
"\n"
|
||||
"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" \n"
|
||||
" int counter_to_use = atom_add(counter, 1);\n"
|
||||
" counts[counter_to_use] = tid;\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" \n"
|
||||
" int counter_to_use = atom_add(counter, 1);\n"
|
||||
" counts[counter_to_use] = tid;\n"
|
||||
"}";
|
||||
|
||||
int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper counter, counters;
|
||||
size_t numGlobalThreads, numLocalThreads;
|
||||
int fail = 0, succeed = 0, err;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper counter, counters;
|
||||
size_t numGlobalThreads, numLocalThreads;
|
||||
int fail = 0, succeed = 0, err;
|
||||
|
||||
/* Check if atomics are supported. */
|
||||
if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
|
||||
@@ -47,87 +47,87 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_
|
||||
return 0;
|
||||
}
|
||||
|
||||
//===== add_index test
|
||||
// The index test replicates what particles does.
|
||||
// It uses one memory location to keep track of the current index and then each thread
|
||||
// does an atomic add to it to get its new location. The threads then write to their
|
||||
// assigned location. At the end we check to make sure that each thread's ID shows up
|
||||
// exactly once in the output.
|
||||
//===== add_index test
|
||||
// The index test replicates what particles does.
|
||||
// It uses one memory location to keep track of the current index and then each thread
|
||||
// does an atomic add to it to get its new location. The threads then write to their
|
||||
// assigned location. At the end we check to make sure that each thread's ID shows up
|
||||
// exactly once in the output.
|
||||
|
||||
numGlobalThreads = 2048;
|
||||
numGlobalThreads = 2048;
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
|
||||
return -1;
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
|
||||
return -1;
|
||||
|
||||
if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
|
||||
return -1;
|
||||
if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
|
||||
return -1;
|
||||
|
||||
log_info("Execute global_threads:%d local_threads:%d\n",
|
||||
(int)numGlobalThreads, (int)numLocalThreads);
|
||||
log_info("Execute global_threads:%d local_threads:%d\n",
|
||||
(int)numGlobalThreads, (int)numLocalThreads);
|
||||
|
||||
// Create the counter that will keep track of where each thread writes.
|
||||
counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * 1, NULL, NULL);
|
||||
// Create the counters that will hold the results of each thread writing
|
||||
// its ID into a (hopefully) unique location.
|
||||
counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
|
||||
// Create the counter that will keep track of where each thread writes.
|
||||
counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * 1, NULL, NULL);
|
||||
// Create the counters that will hold the results of each thread writing
|
||||
// its ID into a (hopefully) unique location.
|
||||
counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
|
||||
|
||||
// Reset all those locations to -1 to indciate they have not been used.
|
||||
cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
|
||||
if (values == NULL) {
|
||||
log_error("add_index_test FAILED to allocate memory for initial values.\n");
|
||||
fail = 1; succeed = -1;
|
||||
} else {
|
||||
memset(values, -1, numLocalThreads);
|
||||
unsigned int i=0;
|
||||
for (i=0; i<numGlobalThreads; i++)
|
||||
values[i] = -1;
|
||||
int init=0;
|
||||
err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
|
||||
err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to execute kernel: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to read back results: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
} else {
|
||||
unsigned int looking_for, index;
|
||||
for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
|
||||
int instances_found=0;
|
||||
for (index=0; index<numGlobalThreads; index++) {
|
||||
if (values[index]==(int)looking_for)
|
||||
instances_found++;
|
||||
}
|
||||
if (instances_found != 1) {
|
||||
log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!fail) {
|
||||
log_info("add_index_test passed. Each thread used exactly one index.\n");
|
||||
}
|
||||
free(values);
|
||||
}
|
||||
return fail;
|
||||
// Reset all those locations to -1 to indciate they have not been used.
|
||||
cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
|
||||
if (values == NULL) {
|
||||
log_error("add_index_test FAILED to allocate memory for initial values.\n");
|
||||
fail = 1; succeed = -1;
|
||||
} else {
|
||||
memset(values, -1, numLocalThreads);
|
||||
unsigned int i=0;
|
||||
for (i=0; i<numGlobalThreads; i++)
|
||||
values[i] = -1;
|
||||
int init=0;
|
||||
err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
|
||||
err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to execute kernel: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to read back results: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
} else {
|
||||
unsigned int looking_for, index;
|
||||
for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
|
||||
int instances_found=0;
|
||||
for (index=0; index<numGlobalThreads; index++) {
|
||||
if (values[index]==(int)looking_for)
|
||||
instances_found++;
|
||||
}
|
||||
if (instances_found != 1) {
|
||||
log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!fail) {
|
||||
log_info("add_index_test passed. Each thread used exactly one index.\n");
|
||||
}
|
||||
free(values);
|
||||
}
|
||||
return fail;
|
||||
}
|
||||
|
||||
const char *add_index_bin_kernel[] = {
|
||||
@@ -137,11 +137,11 @@ const char *add_index_bin_kernel[] = {
|
||||
"// This is the same as the memory update for the particles demo.\n"
|
||||
"\n"
|
||||
"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" int location = bin_assignments[tid];\n"
|
||||
" int counter = atom_add(&bin_counters[location], 1);\n"
|
||||
" bins[location*max_counts_per_bin + counter] = tid;\n"
|
||||
" int location = bin_assignments[tid];\n"
|
||||
" int counter = atom_add(&bin_counters[location], 1);\n"
|
||||
" bins[location*max_counts_per_bin + counter] = tid;\n"
|
||||
"}" };
|
||||
|
||||
// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
|
||||
@@ -149,212 +149,212 @@ const char *add_index_bin_kernel[] = {
|
||||
// This is the same as the memory update for the particles demo.
|
||||
int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
|
||||
{
|
||||
int number_of_items = (int)global_threads[0];
|
||||
size_t local_threads[1];
|
||||
int divisor = 12;
|
||||
int number_of_bins = number_of_items/divisor;
|
||||
int max_counts_per_bin = divisor*2;
|
||||
int number_of_items = (int)global_threads[0];
|
||||
size_t local_threads[1];
|
||||
int divisor = 12;
|
||||
int number_of_bins = number_of_items/divisor;
|
||||
int max_counts_per_bin = divisor*2;
|
||||
|
||||
int fail = 0;
|
||||
int succeed = 0;
|
||||
int err;
|
||||
int fail = 0;
|
||||
int succeed = 0;
|
||||
int err;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
// log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
|
||||
// number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
|
||||
// log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
|
||||
// number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
|
||||
|
||||
//===== add_index_bin test
|
||||
// The index test replicates what particles does.
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
|
||||
test_error( err, "Unable to create testing kernel" );
|
||||
//===== add_index_bin test
|
||||
// The index test replicates what particles does.
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
|
||||
test_error( err, "Unable to create testing kernel" );
|
||||
|
||||
if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
|
||||
return -1;
|
||||
if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
|
||||
return -1;
|
||||
|
||||
log_info("Execute global_threads:%d local_threads:%d\n",
|
||||
(int)global_threads[0], (int)local_threads[0]);
|
||||
log_info("Execute global_threads:%d local_threads:%d\n",
|
||||
(int)global_threads[0], (int)local_threads[0]);
|
||||
|
||||
// Allocate our storage
|
||||
cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * number_of_bins, NULL, NULL);
|
||||
cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
|
||||
cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
|
||||
sizeof(cl_int) * number_of_items, NULL, NULL);
|
||||
// Allocate our storage
|
||||
cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * number_of_bins, NULL, NULL);
|
||||
cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
|
||||
cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
|
||||
sizeof(cl_int) * number_of_items, NULL, NULL);
|
||||
|
||||
if (bin_counters == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bins == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bins.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bin_assignments == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bin_counters == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bins == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bins.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bin_assignments == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize our storage
|
||||
cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
|
||||
if (!l_bin_counts) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
int i;
|
||||
for (i=0; i<number_of_bins; i++)
|
||||
l_bin_counts[i] = 0;
|
||||
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
// Initialize our storage
|
||||
cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
|
||||
if (!l_bin_counts) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
int i;
|
||||
for (i=0; i<number_of_bins; i++)
|
||||
l_bin_counts[i] = 0;
|
||||
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
|
||||
if (!values) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<number_of_bins*max_counts_per_bin; i++)
|
||||
values[i] = -1;
|
||||
err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
free(values);
|
||||
cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
|
||||
if (!values) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<number_of_bins*max_counts_per_bin; i++)
|
||||
values[i] = -1;
|
||||
err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
free(values);
|
||||
|
||||
cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
|
||||
if (!l_bin_assignments) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<number_of_items; i++) {
|
||||
int bin = random_in_range(0, number_of_bins-1, d);
|
||||
while (l_bin_counts[bin] >= max_counts_per_bin) {
|
||||
bin = random_in_range(0, number_of_bins-1, d);
|
||||
}
|
||||
if (bin >= number_of_bins)
|
||||
log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
|
||||
if (l_bin_counts[bin]+1 > max_counts_per_bin)
|
||||
log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
|
||||
l_bin_counts[bin]++;
|
||||
l_bin_assignments[i] = bin;
|
||||
// log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
// Setup the kernel
|
||||
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
return -1;
|
||||
}
|
||||
cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
|
||||
if (!l_bin_assignments) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<number_of_items; i++) {
|
||||
int bin = random_in_range(0, number_of_bins-1, d);
|
||||
while (l_bin_counts[bin] >= max_counts_per_bin) {
|
||||
bin = random_in_range(0, number_of_bins-1, d);
|
||||
}
|
||||
if (bin >= number_of_bins)
|
||||
log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
|
||||
if (l_bin_counts[bin]+1 > max_counts_per_bin)
|
||||
log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
|
||||
l_bin_counts[bin]++;
|
||||
l_bin_assignments[i] = bin;
|
||||
// log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
// Setup the kernel
|
||||
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
}
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
}
|
||||
|
||||
cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
|
||||
if (!final_bin_assignments) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
|
||||
if (!final_bin_assignments) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
|
||||
cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
|
||||
if (!final_bin_counts) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
|
||||
if (!final_bin_counts) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
|
||||
// Verification.
|
||||
int errors=0;
|
||||
int current_bin;
|
||||
int search;
|
||||
// Print out all the contents of the bins.
|
||||
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
|
||||
// for (search=0; search<max_counts_per_bin; search++)
|
||||
// log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
|
||||
// Verification.
|
||||
int errors=0;
|
||||
int current_bin;
|
||||
int search;
|
||||
// Print out all the contents of the bins.
|
||||
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
|
||||
// for (search=0; search<max_counts_per_bin; search++)
|
||||
// log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
|
||||
|
||||
// First verify that there are the correct number in each bin.
|
||||
for (current_bin=0; current_bin<number_of_bins; current_bin++) {
|
||||
int expected_number = l_bin_counts[current_bin];
|
||||
int actual_number = final_bin_counts[current_bin];
|
||||
if (expected_number != actual_number) {
|
||||
log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
|
||||
errors++;
|
||||
}
|
||||
for (search=0; search<expected_number; search++) {
|
||||
if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
|
||||
log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
for (search=expected_number; search<max_counts_per_bin; search++) {
|
||||
if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
|
||||
log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now verify that the correct ones are in each bin
|
||||
int index;
|
||||
for (index=0; index<number_of_items; index++) {
|
||||
int expected_bin = l_bin_assignments[index];
|
||||
int found_it = 0;
|
||||
for (search=0; search<l_bin_counts[expected_bin]; search++) {
|
||||
if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
|
||||
found_it = 1;
|
||||
}
|
||||
}
|
||||
if (found_it == 0) {
|
||||
log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
free(l_bin_counts);
|
||||
free(l_bin_assignments);
|
||||
free(final_bin_assignments);
|
||||
free(final_bin_counts);
|
||||
clReleaseMemObject(bin_counters);
|
||||
clReleaseMemObject(bins);
|
||||
clReleaseMemObject(bin_assignments);
|
||||
if (errors == 0) {
|
||||
log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
|
||||
return 0;
|
||||
} else {
|
||||
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
|
||||
return -1;
|
||||
}
|
||||
// First verify that there are the correct number in each bin.
|
||||
for (current_bin=0; current_bin<number_of_bins; current_bin++) {
|
||||
int expected_number = l_bin_counts[current_bin];
|
||||
int actual_number = final_bin_counts[current_bin];
|
||||
if (expected_number != actual_number) {
|
||||
log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
|
||||
errors++;
|
||||
}
|
||||
for (search=0; search<expected_number; search++) {
|
||||
if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
|
||||
log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
for (search=expected_number; search<max_counts_per_bin; search++) {
|
||||
if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
|
||||
log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now verify that the correct ones are in each bin
|
||||
int index;
|
||||
for (index=0; index<number_of_items; index++) {
|
||||
int expected_bin = l_bin_assignments[index];
|
||||
int found_it = 0;
|
||||
for (search=0; search<l_bin_counts[expected_bin]; search++) {
|
||||
if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
|
||||
found_it = 1;
|
||||
}
|
||||
}
|
||||
if (found_it == 0) {
|
||||
log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
free(l_bin_counts);
|
||||
free(l_bin_assignments);
|
||||
free(final_bin_assignments);
|
||||
free(final_bin_counts);
|
||||
clReleaseMemObject(bin_counters);
|
||||
clReleaseMemObject(bins);
|
||||
clReleaseMemObject(bin_assignments);
|
||||
if (errors == 0) {
|
||||
log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
|
||||
return 0;
|
||||
} else {
|
||||
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
//===== add_index_bin test
|
||||
size_t numGlobalThreads = 2048;
|
||||
int iteration=0;
|
||||
int err, failed = 0;
|
||||
//===== add_index_bin test
|
||||
size_t numGlobalThreads = 2048;
|
||||
int iteration=0;
|
||||
int err, failed = 0;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
/* Check if atomics are supported. */
|
||||
@@ -364,17 +364,17 @@ int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_comm
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(iteration=0; iteration<10; iteration++) {
|
||||
log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
|
||||
err = add_index_bin_test(&numGlobalThreads, queue, context, d);
|
||||
if (err) {
|
||||
failed++;
|
||||
break;
|
||||
}
|
||||
numGlobalThreads*=2;
|
||||
}
|
||||
for(iteration=0; iteration<10; iteration++) {
|
||||
log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
|
||||
err = add_index_bin_test(&numGlobalThreads, queue, context, d);
|
||||
if (err) {
|
||||
failed++;
|
||||
break;
|
||||
}
|
||||
numGlobalThreads*=2;
|
||||
}
|
||||
free_mtdata( d );
|
||||
return failed;
|
||||
return failed;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -33,15 +33,15 @@ extern int test_intmath_long(cl_device_id deviceID, cl_context context, cl_
|
||||
extern int test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
0
test_conformance/basic/run_array
Normal file → Executable file
0
test_conformance/basic/run_array
Normal file → Executable file
0
test_conformance/basic/run_array_image_copy
Normal file → Executable file
0
test_conformance/basic/run_array_image_copy
Normal file → Executable file
0
test_conformance/basic/run_image
Normal file → Executable file
0
test_conformance/basic/run_image
Normal file → Executable file
0
test_conformance/basic/run_multi_read_image
Normal file → Executable file
0
test_conformance/basic/run_multi_read_image
Normal file → Executable file
@@ -39,118 +39,118 @@ const char *copy_kernel_code =
|
||||
int
|
||||
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_uint *input_ptr, *output_ptr;
|
||||
cl_mem streams[4], results;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
unsigned num_elements = 128 * 1024;
|
||||
cl_uint num_copies = 1;
|
||||
size_t delta_offset;
|
||||
unsigned i;
|
||||
cl_uint *input_ptr, *output_ptr;
|
||||
cl_mem streams[4], results;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
unsigned num_elements = 128 * 1024;
|
||||
cl_uint num_copies = 1;
|
||||
size_t delta_offset;
|
||||
unsigned i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
int error_count = 0;
|
||||
|
||||
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
|
||||
// results
|
||||
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
// results
|
||||
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
/*****************************************************************************************************************************************/
|
||||
#pragma mark client backing
|
||||
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
// client backing
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
// client backing
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
}
|
||||
|
||||
// Try upload from client backing
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
// Try upload from client backing
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
|
||||
|
||||
|
||||
|
||||
#pragma mark framework backing (no client data)
|
||||
|
||||
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
// no backing
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
// no backing
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
|
||||
// Copy the array up from host ptr
|
||||
// Copy the array up from host ptr
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
|
||||
if (err)
|
||||
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
|
||||
|
||||
/*****************************************************************************************************************************************/
|
||||
#pragma mark kernel copy test
|
||||
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
free_mtdata(d); d= NULL;
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
free_mtdata(d); d= NULL;
|
||||
|
||||
// client backing
|
||||
// client backing
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
@@ -163,30 +163,30 @@ test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
|
||||
size_t threads[3] = {num_elements, 0, 0};
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep track of multiple errors.
|
||||
if (error_count != 0)
|
||||
err = error_count;
|
||||
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
|
||||
|
||||
|
||||
clReleaseProgram(program);
|
||||
@@ -199,7 +199,7 @@ test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -32,67 +32,67 @@
|
||||
int
|
||||
test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_uint *inptr, *outptr;
|
||||
cl_mem streams[1];
|
||||
int num_tries = 400;
|
||||
num_elements = 1024 * 1024 * 4;
|
||||
int i, j, err;
|
||||
cl_uint *inptr, *outptr;
|
||||
cl_mem streams[1];
|
||||
int num_tries = 400;
|
||||
num_elements = 1024 * 1024 * 4;
|
||||
int i, j, err;
|
||||
MTdata d;
|
||||
|
||||
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
|
||||
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
|
||||
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
|
||||
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
|
||||
|
||||
// randomize data
|
||||
// randomize data
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
for (i=0; i<num_elements; i++)
|
||||
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
for (i=0; i<num_tries; i++)
|
||||
{
|
||||
int offset;
|
||||
int cb;
|
||||
for (i=0; i<num_tries; i++)
|
||||
{
|
||||
int offset;
|
||||
int cb;
|
||||
|
||||
do {
|
||||
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
if (offset > 0 && offset < num_elements)
|
||||
break;
|
||||
} while (1);
|
||||
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
if (cb > (num_elements - offset))
|
||||
cb = num_elements - offset;
|
||||
do {
|
||||
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
if (offset > 0 && offset < num_elements)
|
||||
break;
|
||||
} while (1);
|
||||
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
if (cb > (num_elements - offset))
|
||||
cb = num_elements - offset;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (j=offset; j<offset+cb; j++)
|
||||
{
|
||||
if (inptr[j] != outptr[j])
|
||||
{
|
||||
log_error("ARRAY read, write test failed\n");
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (j=offset; j<offset+cb; j++)
|
||||
{
|
||||
if (inptr[j] != outptr[j])
|
||||
{
|
||||
log_error("ARRAY read, write test failed\n");
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
clReleaseMemObject(streams[0]);
|
||||
free(inptr);
|
||||
free(outptr);
|
||||
free_mtdata(d);
|
||||
clReleaseMemObject(streams[0]);
|
||||
free(inptr);
|
||||
free(outptr);
|
||||
|
||||
if (!err)
|
||||
log_info("ARRAY read, write test passed\n");
|
||||
if (!err)
|
||||
log_info("ARRAY read, write test passed\n");
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -41,8 +41,8 @@ static const char *astype_kernel_pattern =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( src[ tid ] );\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( src[ tid ] );\n"
|
||||
" dst[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
|
||||
@@ -50,8 +50,8 @@ static const char *astype_kernel_pattern_V3srcV3dst =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
|
||||
" vstore3(tmp,tid,dst);\n"
|
||||
"}\n";
|
||||
// in the printf, remove the third and fifth argument, each of which
|
||||
@@ -61,8 +61,8 @@ static const char *astype_kernel_pattern_V3dst =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s3 tmp = as_%s3( src[ tid ] );\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s3 tmp = as_%s3( src[ tid ] );\n"
|
||||
" vstore3(tmp,tid,dst);\n"
|
||||
"}\n";
|
||||
// in the printf, remove the fifth argument, which
|
||||
@@ -73,8 +73,8 @@ static const char *astype_kernel_pattern_V3src =
|
||||
"%s\n"
|
||||
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
|
||||
" int tid = get_global_id( 0 );\n"
|
||||
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
|
||||
" dst[ tid ] = tmp;\n"
|
||||
"}\n";
|
||||
// in the printf, remove the third argument, which
|
||||
@@ -256,8 +256,8 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
// change this check
|
||||
if( inTypeIdx == outTypeIdx ) {
|
||||
|
||||
@@ -37,13 +37,13 @@ static const char *async_global_to_local_kernel =
|
||||
" int i;\n"
|
||||
// Zero the local storage first
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 );\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, (event_t)0 );\n"
|
||||
// Wait for the copy to complete, then verify by manually copying to the dest
|
||||
" wait_group_events( 1, &event );\n"
|
||||
" wait_group_events( 1, &event );\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
|
||||
"}\n" ;
|
||||
@@ -57,14 +57,14 @@ static const char *async_local_to_global_kernel =
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
|
||||
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" for(i=0; i<copiesPerWorkItem; i++)\n"
|
||||
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
|
||||
// Do this to verify all kernels are done copying to the local buffer before we try the copy
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 );\n"
|
||||
" wait_group_events( 1, &event );\n"
|
||||
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
||||
" event_t event;\n"
|
||||
" event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (event_t)0 );\n"
|
||||
" wait_group_events( 1, &event );\n"
|
||||
"}\n" ;
|
||||
|
||||
|
||||
|
||||
@@ -70,12 +70,12 @@ static const char *async_strided_local_to_global_kernel =
|
||||
|
||||
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
void *inBuffer, *outBuffer;
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 2 ];
|
||||
size_t threads[ 1 ], localThreads[ 1 ];
|
||||
void *inBuffer, *outBuffer;
|
||||
MTdata d;
|
||||
char vecNameString[64]; vecNameString[0] = 0;
|
||||
|
||||
@@ -100,14 +100,14 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
||||
char *programPtr;
|
||||
|
||||
sprintf(programSource, kernelCode,
|
||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
|
||||
"",
|
||||
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
|
||||
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
|
||||
//log_info("program: %s\n", programSource);
|
||||
programPtr = programSource;
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
size_t max_workgroup_size;
|
||||
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
|
||||
@@ -165,36 +165,36 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
||||
localThreads[0] = localWorkgroupSize;
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
|
||||
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
|
||||
test_error( error, "Unable to create input buffer" );
|
||||
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Enqueue
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to queue kernel" );
|
||||
// Enqueue
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to queue kernel" );
|
||||
|
||||
// Read
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
// Read
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Verify
|
||||
// Verify
|
||||
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
|
||||
{
|
||||
if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, elementSize) != 0 )
|
||||
@@ -207,34 +207,33 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
||||
log_error( "ERROR: Results of copy did not validate!\n" );
|
||||
sprintf(values + strlen( values), "%d -> [", i);
|
||||
for (int j=0; j<(int)elementSize; j++)
|
||||
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
|
||||
sprintf(values + strlen( values), "%2x ", inchar[j]);
|
||||
sprintf(values + strlen(values), "] != [");
|
||||
for (int j=0; j<(int)elementSize; j++)
|
||||
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
|
||||
sprintf(values + strlen( values), "%2x ", outchar[j]);
|
||||
sprintf(values + strlen(values), "]");
|
||||
log_error("%s\n", values);
|
||||
|
||||
return -1;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(inBuffer);
|
||||
free(outBuffer);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
|
||||
{
|
||||
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
|
||||
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
||||
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
|
||||
unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
|
||||
unsigned int size, typeIndex, stride;
|
||||
unsigned int size, typeIndex, stride;
|
||||
|
||||
int errors = 0;
|
||||
|
||||
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
|
||||
{
|
||||
{
|
||||
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
@@ -262,11 +261,11 @@ int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_co
|
||||
|
||||
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
|
||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
|
||||
}
|
||||
|
||||
int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
|
||||
return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
|
||||
}
|
||||
|
||||
|
||||
@@ -38,17 +38,17 @@ const char *barrier_kernel_code =
|
||||
" tmp_sum[tid] = 0;\n"
|
||||
" for (i=tid; i<n; i+=lsize)\n"
|
||||
" tmp_sum[tid] += a[i];\n"
|
||||
" \n"
|
||||
" // updated to work for any workgroup size \n"
|
||||
" \n"
|
||||
" // updated to work for any workgroup size \n"
|
||||
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
|
||||
" {\n"
|
||||
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
|
||||
" if (tid + i < lsize)\n"
|
||||
" tmp_sum[tid] += tmp_sum[tid + i];\n"
|
||||
" lsize = i; \n"
|
||||
" lsize = i; \n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
|
||||
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
|
||||
" if (tid == 0)\n"
|
||||
" *sum = tmp_sum[0];\n"
|
||||
"}\n";
|
||||
@@ -57,19 +57,19 @@ const char *barrier_kernel_code =
|
||||
static int
|
||||
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
|
||||
{
|
||||
int r = 0;
|
||||
int r = 0;
|
||||
int i;
|
||||
|
||||
for (i=0; i<n; i++)
|
||||
{
|
||||
r += inptr[i];
|
||||
r += inptr[i];
|
||||
}
|
||||
|
||||
if (r != outptr[0])
|
||||
{
|
||||
log_error("BARRIER test failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (r != outptr[0])
|
||||
{
|
||||
log_error("BARRIER test failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("BARRIER test passed\n");
|
||||
return 0;
|
||||
@@ -79,14 +79,14 @@ verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
|
||||
int
|
||||
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
size_t local_threads[3];
|
||||
int err;
|
||||
int i;
|
||||
cl_mem streams[3];
|
||||
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
size_t local_threads[3];
|
||||
int err;
|
||||
int i;
|
||||
size_t max_local_workgroup_size[3];
|
||||
size_t max_threadgroup_size = 0;
|
||||
MTdata d;
|
||||
@@ -110,18 +110,18 @@ test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, in
|
||||
max_threadgroup_size--;
|
||||
|
||||
input_ptr = (int*)malloc(sizeof(int) * num_elements);
|
||||
output_ptr = (int*)malloc(sizeof(int));
|
||||
output_ptr = (int*)malloc(sizeof(int));
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed.");
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
@@ -133,28 +133,28 @@ test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, in
|
||||
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
|
||||
test_error(err, "clSetKernelArg failed.");
|
||||
|
||||
global_threads[0] = max_threadgroup_size;
|
||||
local_threads[0] = max_threadgroup_size;
|
||||
global_threads[0] = max_threadgroup_size;
|
||||
local_threads[0] = max_threadgroup_size;
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed.");
|
||||
test_error(err, "clEnqueueNDRangeKernel failed.");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed.");
|
||||
test_error(err, "clEnqueueReadBuffer failed.");
|
||||
|
||||
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ const char *kernel_code_long =
|
||||
int
|
||||
test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clMemWrapper results;
|
||||
clMemWrapper results;
|
||||
int error;
|
||||
size_t global[3] = {1, 1, 1};
|
||||
float results_back[2*16];
|
||||
@@ -82,7 +82,7 @@ test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_comm
|
||||
|
||||
// Get the maximum parameter size allowed
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
|
||||
test_error( error, "Unable to get max parameter size from device" );
|
||||
test_error( error, "Unable to get max parameter size from device" );
|
||||
|
||||
// Create the results buffer
|
||||
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
|
||||
@@ -165,7 +165,7 @@ test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_comm
|
||||
int
|
||||
test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clMemWrapper results;
|
||||
clMemWrapper results;
|
||||
int error;
|
||||
size_t global[3] = {1, 1, 1};
|
||||
float results_back[7*16];
|
||||
@@ -202,7 +202,7 @@ test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_q
|
||||
|
||||
// Get the maximum parameter size allowed
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
|
||||
test_error( error, "Unable to get max parameter size from device" );
|
||||
test_error( error, "Unable to get max parameter size from device" );
|
||||
|
||||
// Create the results buffer
|
||||
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
|
||||
|
||||
@@ -27,14 +27,14 @@
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#define CL_EXIT_ERROR(cmd,format,...) \
|
||||
{ \
|
||||
if ((cmd) != CL_SUCCESS) { \
|
||||
log_error("CL ERROR: %s %u: ", __FILE__,__LINE__); \
|
||||
log_error(format,## __VA_ARGS__ ); \
|
||||
log_error("\n"); \
|
||||
#define CL_EXIT_ERROR(cmd,format,...) \
|
||||
{ \
|
||||
if ((cmd) != CL_SUCCESS) { \
|
||||
log_error("CL ERROR: %s %u: ", __FILE__,__LINE__); \
|
||||
log_error(format,## __VA_ARGS__ ); \
|
||||
log_error("\n"); \
|
||||
/*abort();*/ \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
typedef unsigned char BufferType;
|
||||
|
||||
@@ -59,11 +59,11 @@ verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
|
||||
for (i=0; i < n; i++)
|
||||
{
|
||||
float f = tmpF[i] * tmpI[i];
|
||||
if( out[i] != f )
|
||||
{
|
||||
log_error("CONSTANT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
if( out[i] != f )
|
||||
{
|
||||
log_error("CONSTANT test failed\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
log_info("CONSTANT test passed\n");
|
||||
@@ -96,23 +96,23 @@ verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
|
||||
int
|
||||
test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[3];
|
||||
cl_int *tmpI;
|
||||
cl_float *tmpF, *out;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
int err;
|
||||
unsigned int i;
|
||||
cl_mem streams[3];
|
||||
cl_int *tmpI;
|
||||
cl_float *tmpF, *out;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3];
|
||||
int err;
|
||||
unsigned int i;
|
||||
cl_ulong maxSize;
|
||||
size_t num_floats, num_ints, constant_values;
|
||||
MTdata d;
|
||||
RoundingMode oldRoundMode;
|
||||
RoundingMode oldRoundMode;
|
||||
int isRTZ = 0;
|
||||
|
||||
/* Verify our test buffer won't be bigger than allowed */
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
|
||||
test_error( err, "Unable to get max constant buffer size" );
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
|
||||
test_error( err, "Unable to get max constant buffer size" );
|
||||
|
||||
log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
|
||||
maxSize/=4;
|
||||
@@ -127,50 +127,50 @@ test_constant(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
|
||||
constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
|
||||
|
||||
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
|
||||
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
|
||||
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
|
||||
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
|
||||
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
|
||||
if (!streams[2])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<constant_values; i++) {
|
||||
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
|
||||
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
|
||||
}
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<constant_values; i++) {
|
||||
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
|
||||
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
|
||||
if (err) {
|
||||
if (err) {
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
@@ -179,13 +179,13 @@ test_constant(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
global_threads[0] = constant_values;
|
||||
global_threads[0] = constant_values;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -201,10 +201,10 @@ test_constant(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
|
||||
{
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
{
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
isRTZ = 1;
|
||||
}
|
||||
}
|
||||
|
||||
err = verify(tmpF, tmpI, out, (int)constant_values);
|
||||
|
||||
@@ -245,19 +245,19 @@ test_constant(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
|
||||
err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(loop_kernel);
|
||||
clReleaseProgram(loop_program);
|
||||
free(tmpI);
|
||||
free(tmpF);
|
||||
free(out);
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(loop_kernel);
|
||||
clReleaseProgram(loop_program);
|
||||
free(tmpI);
|
||||
free(tmpF);
|
||||
free(out);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -37,66 +37,66 @@ const char *constant_source_kernel_code[] = {
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" if( tid == 0 )\n"
|
||||
" {\n"
|
||||
" out[ 0 ] = outVal;\n"
|
||||
" out[ 1 ] = outValues[ outIndex ];\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" out[ tid + 1 ] = outValues[ tid ];\n"
|
||||
" }\n"
|
||||
" if( tid == 0 )\n"
|
||||
" {\n"
|
||||
" out[ 0 ] = outVal;\n"
|
||||
" out[ 1 ] = outValues[ outIndex ];\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" out[ tid + 1 ] = outValues[ tid ];\n"
|
||||
" }\n"
|
||||
"}\n" };
|
||||
|
||||
int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
clMemWrapper outStream;
|
||||
cl_int outValues[ 17 ];
|
||||
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
|
||||
clMemWrapper outStream;
|
||||
cl_int outValues[ 17 ];
|
||||
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
|
||||
|
||||
cl_int error;
|
||||
cl_int error;
|
||||
|
||||
|
||||
// Create a kernel to test with
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
// Create a kernel to test with
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Create our output buffer
|
||||
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
// Create our output buffer
|
||||
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
// Set the argument
|
||||
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
// Set the argument
|
||||
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Run test kernel
|
||||
size_t threads[ 1 ] = { 16 };
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue kernel" );
|
||||
// Run test kernel
|
||||
size_t threads[ 1 ] = { 16 };
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "Unable to enqueue kernel" );
|
||||
|
||||
// Read results
|
||||
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
// Read results
|
||||
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Verify results
|
||||
for( int i = 0; i < 17; i++ )
|
||||
{
|
||||
if( expectedValues[ i ] != outValues[ i ] )
|
||||
{
|
||||
if( i == 0 )
|
||||
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
else if( i == 1 )
|
||||
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
else
|
||||
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// Verify results
|
||||
for( int i = 0; i < 17; i++ )
|
||||
{
|
||||
if( expectedValues[ i ] != outValues[ i ] )
|
||||
{
|
||||
if( i == 0 )
|
||||
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
else if( i == 1 )
|
||||
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
else
|
||||
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -57,67 +57,67 @@ const char *sample_double_kernel = {
|
||||
int
|
||||
test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_program program;
|
||||
cl_kernel kernel[2];
|
||||
unsigned int num_kernels;
|
||||
size_t lengths[2];
|
||||
int err;
|
||||
cl_program program;
|
||||
cl_kernel kernel[2];
|
||||
unsigned int num_kernels;
|
||||
size_t lengths[2];
|
||||
int err;
|
||||
|
||||
lengths[0] = strlen(sample_single_kernel);
|
||||
lengths[0] = strlen(sample_single_kernel);
|
||||
program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
|
||||
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
|
||||
{
|
||||
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
|
||||
return -1;
|
||||
}
|
||||
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
|
||||
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
|
||||
{
|
||||
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseProgram(program);
|
||||
|
||||
lengths[0] = strlen(sample_double_kernel);
|
||||
program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
|
||||
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
|
||||
{
|
||||
log_error("clCreateKernelsInProgram test failed for two kernels\n");
|
||||
return -1;
|
||||
}
|
||||
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
|
||||
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
|
||||
{
|
||||
log_error("clCreateKernelsInProgram test failed for two kernels\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("clCreateKernelsInProgram test passed\n");
|
||||
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(kernel[0]);
|
||||
clReleaseKernel(kernel[1]);
|
||||
clReleaseProgram(program);
|
||||
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -47,16 +47,16 @@ const char* flag_set_names[] = {
|
||||
|
||||
int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
const size_t bufferSize = 256*256;
|
||||
int error;
|
||||
const size_t bufferSize = 256*256;
|
||||
int src_flag_id;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
cl_char *initialData = (cl_char*)malloc(bufferSize);
|
||||
cl_char *finalData = (cl_char*)malloc(bufferSize);
|
||||
cl_char *initialData = (cl_char*)malloc(bufferSize);
|
||||
cl_char *finalData = (cl_char*)malloc(bufferSize);
|
||||
|
||||
for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
|
||||
{
|
||||
clMemWrapper memObject;
|
||||
clMemWrapper memObject;
|
||||
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
|
||||
|
||||
generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
|
||||
@@ -85,8 +85,8 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman
|
||||
{
|
||||
print_error( error, "clEnqueueMapBuffer call failed" );
|
||||
log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
@@ -121,47 +121,47 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman
|
||||
if( initialData[ q ] != finalData[ q ] )
|
||||
{
|
||||
log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
} // cl_mem flags
|
||||
} // cl_mem flags
|
||||
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free( initialData );
|
||||
free( finalData );
|
||||
free_mtdata(d);
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
const size_t imageSize = 256;
|
||||
int error;
|
||||
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
const size_t imageSize = 256;
|
||||
int src_flag_id;
|
||||
cl_uint *initialData;
|
||||
cl_uint *finalData;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
|
||||
|
||||
initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
|
||||
finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
|
||||
|
||||
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
|
||||
{
|
||||
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
|
||||
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
|
||||
{
|
||||
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
|
||||
clMemWrapper memObject;
|
||||
clMemWrapper memObject;
|
||||
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
|
||||
|
||||
generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
|
||||
@@ -240,7 +240,7 @@ int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command
|
||||
if( initialData[ q ] != finalData[ q ] )
|
||||
{
|
||||
log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
|
||||
(int)finalData[ q ], (int)initialData[ q ] );
|
||||
(int)finalData[ q ], (int)initialData[ q ] );
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
free_mtdata(d);
|
||||
@@ -249,10 +249,10 @@ int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command
|
||||
}
|
||||
} // cl_mem_flags
|
||||
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
free(initialData);
|
||||
free(finalData);
|
||||
free_mtdata(d);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -31,17 +31,17 @@
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
|
||||
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
|
||||
"{\n" \
|
||||
" int tid = get_global_id(0);\n" \
|
||||
" " srctype " src = sourceValues[tid];\n" \
|
||||
"\n" \
|
||||
" destValues[tid] = (" dsttype #size ")src;\n" \
|
||||
"\n" \
|
||||
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
|
||||
"{\n" \
|
||||
" int tid = get_global_id(0);\n" \
|
||||
" " srctype " src = sourceValues[tid];\n" \
|
||||
"\n" \
|
||||
" destValues[tid] = (" dsttype #size ")src;\n" \
|
||||
"\n" \
|
||||
"}\n"
|
||||
|
||||
#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
|
||||
{ \
|
||||
{ \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
|
||||
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
|
||||
@@ -52,27 +52,27 @@ DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
|
||||
|
||||
/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
|
||||
|
||||
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
|
||||
{ \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
|
||||
DECLARE_EMPTY \
|
||||
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
|
||||
{ \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
|
||||
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
|
||||
DECLARE_EMPTY \
|
||||
}
|
||||
|
||||
#define DECLARE_EMPTY_SET \
|
||||
{ \
|
||||
#define DECLARE_EMPTY_SET \
|
||||
{ \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
@@ -87,7 +87,7 @@ DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY, \
|
||||
DECLARE_EMPTY \
|
||||
DECLARE_EMPTY \
|
||||
}
|
||||
|
||||
|
||||
@@ -113,265 +113,265 @@ const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] =
|
||||
int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
|
||||
ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error;
|
||||
clMemWrapper streams[2];
|
||||
void *outData;
|
||||
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
|
||||
size_t threadSize[3], groupSize[3];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error;
|
||||
clMemWrapper streams[2];
|
||||
void *outData;
|
||||
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
|
||||
size_t threadSize[3], groupSize[3];
|
||||
unsigned int i, s;
|
||||
unsigned char *inPtr, *outPtr;
|
||||
size_t paramSize, destTypeSize;
|
||||
unsigned char *inPtr, *outPtr;
|
||||
size_t paramSize, destTypeSize;
|
||||
|
||||
const char* finalProgramSrc[2] = {
|
||||
"", // optional pragma
|
||||
programSrc
|
||||
};
|
||||
|
||||
if (srcType == kDouble || destType == kDouble) {
|
||||
if (srcType == kDouble || destType == kDouble) {
|
||||
finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
|
||||
}
|
||||
|
||||
|
||||
if( programSrc == NULL )
|
||||
return 0;
|
||||
if( programSrc == NULL )
|
||||
return 0;
|
||||
|
||||
paramSize = get_explicit_type_size( srcType );
|
||||
destTypeSize = get_explicit_type_size( destType );
|
||||
paramSize = get_explicit_type_size( srcType );
|
||||
destTypeSize = get_explicit_type_size( destType );
|
||||
|
||||
size_t destStride = destTypeSize * vecSize;
|
||||
size_t destStride = destTypeSize * vecSize;
|
||||
|
||||
outData = malloc( destStride * count );
|
||||
outData = malloc( destStride * count );
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
|
||||
{
|
||||
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
|
||||
return -1;
|
||||
}
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
|
||||
{
|
||||
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Run the kernel */
|
||||
threadSize[0] = count;
|
||||
/* Run the kernel */
|
||||
threadSize[0] = count;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
|
||||
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
|
||||
do a memcpy instead of relying on the actual type of data */
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output values!" );
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read output values!" );
|
||||
|
||||
inPtr = (unsigned char *)inputData;
|
||||
outPtr = (unsigned char *)outData;
|
||||
inPtr = (unsigned char *)inputData;
|
||||
outPtr = (unsigned char *)outData;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
/* Convert the input data element to our output data type to compare against */
|
||||
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
/* Convert the input data element to our output data type to compare against */
|
||||
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
|
||||
|
||||
/* Now compare every element of the vector */
|
||||
for( s = 0; s < vecSize; s++ )
|
||||
{
|
||||
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
|
||||
{
|
||||
unsigned int *p = (unsigned int *)outPtr;
|
||||
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
|
||||
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
|
||||
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
inPtr += paramSize;
|
||||
outPtr += destStride;
|
||||
}
|
||||
/* Now compare every element of the vector */
|
||||
for( s = 0; s < vecSize; s++ )
|
||||
{
|
||||
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
|
||||
{
|
||||
unsigned int *p = (unsigned int *)outPtr;
|
||||
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
|
||||
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
|
||||
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
inPtr += paramSize;
|
||||
outPtr += destStride;
|
||||
}
|
||||
|
||||
free( outData );
|
||||
free( outData );
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
|
||||
unsigned int count, void *inputData )
|
||||
{
|
||||
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
|
||||
int i, dstType, failed = 0;
|
||||
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
|
||||
int i, dstType, failed = 0;
|
||||
|
||||
|
||||
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
|
||||
{
|
||||
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
|
||||
{
|
||||
if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
|
||||
continue;
|
||||
|
||||
if (( dstType == kLong || dstType == kULong ) && !gHasLong )
|
||||
continue;
|
||||
|
||||
for( i = 0; sizes[i] != 0; i++ )
|
||||
{
|
||||
if( dstType != srcType )
|
||||
continue;
|
||||
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
|
||||
for( i = 0; sizes[i] != 0; i++ )
|
||||
{
|
||||
if( dstType != srcType )
|
||||
continue;
|
||||
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
|
||||
strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
|
||||
continue;
|
||||
continue;
|
||||
|
||||
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
|
||||
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
|
||||
srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
|
||||
{
|
||||
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
|
||||
get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
|
||||
failed = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
failed = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return failed;
|
||||
return failed;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
|
||||
return 0;
|
||||
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
|
||||
return 0;
|
||||
#if 0
|
||||
bool data[128];
|
||||
bool data[128];
|
||||
|
||||
generate_random_data( kBool, 128, data );
|
||||
generate_random_data( kBool, 128, data );
|
||||
|
||||
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
|
||||
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
|
||||
#endif
|
||||
}
|
||||
|
||||
int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
char data[128];
|
||||
char data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kChar, 128, seed, data );
|
||||
generate_random_data( kChar, 128, seed, data );
|
||||
|
||||
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
|
||||
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
|
||||
}
|
||||
|
||||
int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned char data[128];
|
||||
unsigned char data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kUChar, 128, seed, data );
|
||||
generate_random_data( kUChar, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
short data[128];
|
||||
short data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kShort, 128, seed, data );
|
||||
generate_random_data( kShort, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned short data[128];
|
||||
unsigned short data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kUShort, 128, seed, data );
|
||||
generate_random_data( kUShort, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int data[128];
|
||||
int data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kInt, 128, seed, data );
|
||||
generate_random_data( kInt, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned int data[128];
|
||||
unsigned int data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kUInt, 128, seed, data );
|
||||
generate_random_data( kUInt, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_long data[128];
|
||||
cl_long data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kLong, 128, seed, data );
|
||||
generate_random_data( kLong, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_ulong data[128];
|
||||
cl_ulong data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kULong, 128, seed, data );
|
||||
generate_random_data( kULong, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
|
||||
return -1;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
float data[128];
|
||||
float data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
generate_random_data( kFloat, 128, seed, data );
|
||||
generate_random_data( kFloat, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
double data[128];
|
||||
double data[128];
|
||||
RandomSeed seed(gRandomSeed);
|
||||
|
||||
if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
|
||||
@@ -379,11 +379,11 @@ int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_comma
|
||||
return 0;
|
||||
}
|
||||
|
||||
generate_random_data( kDouble, 128, seed, data );
|
||||
generate_random_data( kDouble, 128, seed, data );
|
||||
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -60,78 +60,78 @@ verify_float2int(cl_float *inptr, cl_int *outptr, int n)
|
||||
int
|
||||
test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_float *input_ptr;
|
||||
cl_mem streams[2];
|
||||
cl_float *input_ptr;
|
||||
cl_int *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *values[2];
|
||||
size_t lengths[1];
|
||||
size_t threads[1];
|
||||
int err;
|
||||
int i;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *values[2];
|
||||
size_t lengths[1];
|
||||
size_t threads[1];
|
||||
int err;
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
|
||||
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteArray failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
lengths[0] = strlen(float2int_kernel_code);
|
||||
lengths[0] = strlen(float2int_kernel_code);
|
||||
program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (!program)
|
||||
{
|
||||
log_error("clCreateProgramWithSource failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clBuildProgramExecutable failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel = clCreateKernel(program, "test_float2int", NULL);
|
||||
if (!kernel)
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
kernel = clCreateKernel(program, "test_float2int", NULL);
|
||||
if (!kernel)
|
||||
{
|
||||
log_error("clCreateKernel failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
values[0] = streams[0];
|
||||
values[1] = streams[1];
|
||||
values[0] = streams[0];
|
||||
values[1] = streams[1];
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)num_elements;
|
||||
threads[0] = (size_t)num_elements;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -148,15 +148,15 @@ test_float2int(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
|
||||
err = verify_float2int(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ const char *fpmul_kernel_code =
|
||||
"}\n";
|
||||
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
int
|
||||
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
@@ -119,13 +119,13 @@ verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
int
|
||||
test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
|
||||
float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
size_t length = sizeof(cl_float) * num_elements;
|
||||
int isRTZ = 0;
|
||||
@@ -163,10 +163,10 @@ test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queu
|
||||
}
|
||||
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
@@ -118,13 +118,13 @@ verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
int
|
||||
test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
|
||||
cl_float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_float) * 2 * num_elements;
|
||||
@@ -162,10 +162,10 @@ test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue que
|
||||
oldMode = get_round();
|
||||
}
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
@@ -177,20 +177,20 @@ test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue que
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements*2; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
|
||||
@@ -250,24 +250,24 @@ test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue que
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -118,13 +118,13 @@ verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
|
||||
int
|
||||
test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
cl_mem streams[4];
|
||||
cl_program program[3];
|
||||
cl_kernel kernel[3];
|
||||
|
||||
cl_float *input_ptr[3], *output_ptr, *p;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_float) * 4 * num_elements;
|
||||
@@ -162,10 +162,10 @@ test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue que
|
||||
oldMode = get_round();
|
||||
}
|
||||
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
input_ptr[0] = (cl_float*)malloc(length);
|
||||
input_ptr[1] = (cl_float*)malloc(length);
|
||||
input_ptr[2] = (cl_float*)malloc(length);
|
||||
output_ptr = (cl_float*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
@@ -177,22 +177,22 @@ test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue que
|
||||
test_error( err, "clCreateBuffer failed.");
|
||||
|
||||
p = input_ptr[0];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[1];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
p = input_ptr[2];
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
for (i=0; i<num_elements*4; i++)
|
||||
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
|
||||
@@ -251,24 +251,24 @@ test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue que
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
clReleaseKernel(kernel[i]);
|
||||
clReleaseProgram(program[i]);
|
||||
}
|
||||
free(input_ptr[0]);
|
||||
free(input_ptr[1]);
|
||||
free(input_ptr[2]);
|
||||
free(output_ptr);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -18,267 +18,267 @@
|
||||
|
||||
|
||||
const char *work_offset_test[] = {
|
||||
"__kernel void test( __global int * outputID_A, \n"
|
||||
" __global int * outputID_B, __global int * outputID_C )\n"
|
||||
"{\n"
|
||||
" size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
|
||||
" size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
|
||||
" size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
|
||||
" size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
|
||||
"\n"
|
||||
" outputID_A[ id ] = get_global_id( 0 );\n"
|
||||
" outputID_B[ id ] = get_global_id( 1 );\n"
|
||||
" outputID_C[ id ] = get_global_id( 2 );\n"
|
||||
"}\n"
|
||||
};
|
||||
"__kernel void test( __global int * outputID_A, \n"
|
||||
" __global int * outputID_B, __global int * outputID_C )\n"
|
||||
"{\n"
|
||||
" size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
|
||||
" size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
|
||||
" size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
|
||||
" size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
|
||||
"\n"
|
||||
" outputID_A[ id ] = get_global_id( 0 );\n"
|
||||
" outputID_B[ id ] = get_global_id( 1 );\n"
|
||||
" outputID_C[ id ] = get_global_id( 2 );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
#define MAX_TEST_ITEMS 16 * 16 * 16
|
||||
#define NUM_TESTS 16
|
||||
#define MAX_OFFSET 256
|
||||
|
||||
#define CHECK_RANGE( v, m, c ) \
|
||||
if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
|
||||
return -1; \
|
||||
}
|
||||
if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
|
||||
{
|
||||
size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
|
||||
size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
|
||||
size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
|
||||
size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
|
||||
|
||||
static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
|
||||
memset( counts, 0, sizeof( counts ) );
|
||||
static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
|
||||
memset( counts, 0, sizeof( counts ) );
|
||||
|
||||
for( size_t i = 0; i < limit; i++ )
|
||||
{
|
||||
// Check ranges first
|
||||
CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
|
||||
CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
|
||||
CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
|
||||
for( size_t i = 0; i < limit; i++ )
|
||||
{
|
||||
// Check ranges first
|
||||
CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
|
||||
CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
|
||||
CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
|
||||
|
||||
// Now set the value in the map
|
||||
counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
|
||||
}
|
||||
// Now set the value in the map
|
||||
counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
|
||||
}
|
||||
|
||||
// Now check the map
|
||||
int missed = 0, multiple = 0, errored = 0, corrected = 0;
|
||||
for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
|
||||
{
|
||||
for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
|
||||
{
|
||||
for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
|
||||
{
|
||||
const char * limitMsg = " (further errors of this type suppressed)";
|
||||
if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
|
||||
{
|
||||
if( counts[ x ][ y ][ z ] < 1 )
|
||||
{
|
||||
if( missed < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
|
||||
missed++;
|
||||
}
|
||||
else if( counts[ x ][ y ][ z ] > 1 )
|
||||
{
|
||||
if( multiple < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
|
||||
multiple++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( counts[ x ][ y ][ z ] > 0 )
|
||||
{
|
||||
if( errored < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
|
||||
errored++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now check the map
|
||||
int missed = 0, multiple = 0, errored = 0, corrected = 0;
|
||||
for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
|
||||
{
|
||||
for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
|
||||
{
|
||||
for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
|
||||
{
|
||||
const char * limitMsg = " (further errors of this type suppressed)";
|
||||
if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
|
||||
{
|
||||
if( counts[ x ][ y ][ z ] < 1 )
|
||||
{
|
||||
if( missed < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
|
||||
missed++;
|
||||
}
|
||||
else if( counts[ x ][ y ][ z ] > 1 )
|
||||
{
|
||||
if( multiple < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
|
||||
multiple++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( counts[ x ][ y ][ z ] > 0 )
|
||||
{
|
||||
if( errored < 3 )
|
||||
log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
|
||||
errored++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( missed || multiple || errored )
|
||||
{
|
||||
size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
|
||||
( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
|
||||
( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
|
||||
int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
|
||||
if( missed || multiple || errored )
|
||||
{
|
||||
size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
|
||||
( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
|
||||
( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
|
||||
int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
|
||||
|
||||
if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
|
||||
log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
|
||||
else
|
||||
log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
|
||||
missed, multiple, errored );
|
||||
}
|
||||
return ( missed | multiple | errored | corrected );
|
||||
if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
|
||||
log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
|
||||
else
|
||||
log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
|
||||
missed, multiple, errored );
|
||||
}
|
||||
return ( missed | multiple | errored | corrected );
|
||||
}
|
||||
|
||||
int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 7 ];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 7 ];
|
||||
|
||||
int error;
|
||||
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
|
||||
cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
|
||||
int error;
|
||||
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
|
||||
cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
|
||||
|
||||
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
//// Create some output streams
|
||||
//// Create some output streams
|
||||
|
||||
// Use just one output array to init them all (no need to init every single stack storage here)
|
||||
memset( outputA, 0xff, sizeof( outputA ) );
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
|
||||
test_error( error, "Unable to create output array" );
|
||||
}
|
||||
// Use just one output array to init them all (no need to init every single stack storage here)
|
||||
memset( outputA, 0xff, sizeof( outputA ) );
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
|
||||
test_error( error, "Unable to create output array" );
|
||||
}
|
||||
|
||||
// Run a few different times
|
||||
MTdata seed = init_genrand( gRandomSeed );
|
||||
for( int test = 0; test < NUM_TESTS; test++ )
|
||||
{
|
||||
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
|
||||
threads[ 0 ] = random_in_range( 1, 32, seed );
|
||||
threads[ 1 ] = random_in_range( 1, 16, seed );
|
||||
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
|
||||
// Run a few different times
|
||||
MTdata seed = init_genrand( gRandomSeed );
|
||||
for( int test = 0; test < NUM_TESTS; test++ )
|
||||
{
|
||||
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
|
||||
threads[ 0 ] = random_in_range( 1, 32, seed );
|
||||
threads[ 1 ] = random_in_range( 1, 16, seed );
|
||||
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
|
||||
|
||||
// Make sure we get the local thread count right
|
||||
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
|
||||
test_error( error, "Unable to determine local work group sizes" );
|
||||
// Make sure we get the local thread count right
|
||||
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
|
||||
test_error( error, "Unable to determine local work group sizes" );
|
||||
|
||||
// Randomize some offsets
|
||||
for( int j = 0; j < 3; j++ )
|
||||
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
|
||||
// Randomize some offsets
|
||||
for( int j = 0; j < 3; j++ )
|
||||
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
|
||||
|
||||
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
|
||||
// Now set up and run
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
}
|
||||
// Now set up and run
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
}
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
// Read our results back now
|
||||
cl_int * resultBuffers[] = { outputA, outputB, outputC };
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
}
|
||||
// Read our results back now
|
||||
cl_int * resultBuffers[] = { outputA, outputB, outputC };
|
||||
for( int i = 0; i < 3; i++ )
|
||||
{
|
||||
error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
}
|
||||
|
||||
// Now we need to check the results. The outputs should have one entry for each possible ID,
|
||||
// but they won't be in order, so we need to construct a count map to determine what we got
|
||||
if( check_results( threads, offsets, outputA, outputB, outputC ) )
|
||||
{
|
||||
log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// Now we need to check the results. The outputs should have one entry for each possible ID,
|
||||
// but they won't be in order, so we need to construct a count map to determine what we got
|
||||
if( check_results( threads, offsets, outputA, outputB, outputC ) )
|
||||
{
|
||||
log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(seed);
|
||||
|
||||
// All done!
|
||||
return 0;
|
||||
// All done!
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *get_offset_test[] = {
|
||||
"__kernel void test( __global int * outOffsets )\n"
|
||||
"{\n"
|
||||
" // We use local ID here so we don't have to worry about offsets\n"
|
||||
" // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
|
||||
" outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
|
||||
" outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
|
||||
" outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
|
||||
"}\n"
|
||||
"__kernel void test( __global int * outOffsets )\n"
|
||||
"{\n"
|
||||
" // We use local ID here so we don't have to worry about offsets\n"
|
||||
" // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
|
||||
" outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
|
||||
" outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
|
||||
" outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 1 ];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ 1 ];
|
||||
|
||||
int error;
|
||||
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
|
||||
cl_int outOffsets[ 3 ];
|
||||
int error;
|
||||
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
|
||||
cl_int outOffsets[ 3 ];
|
||||
|
||||
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
// Create the kernel
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create some output streams, and storage for a single control ID
|
||||
memset( outOffsets, 0xff, sizeof( outOffsets ) );
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
|
||||
test_error( error, "Unable to create control ID buffer" );
|
||||
// Create some output streams, and storage for a single control ID
|
||||
memset( outOffsets, 0xff, sizeof( outOffsets ) );
|
||||
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
|
||||
test_error( error, "Unable to create control ID buffer" );
|
||||
|
||||
// Run a few different times
|
||||
MTdata seed = init_genrand( gRandomSeed );
|
||||
for( int test = 0; test < NUM_TESTS; test++ )
|
||||
{
|
||||
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
|
||||
threads[ 0 ] = random_in_range( 1, 32, seed );
|
||||
threads[ 1 ] = random_in_range( 1, 16, seed );
|
||||
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
|
||||
// Run a few different times
|
||||
MTdata seed = init_genrand( gRandomSeed );
|
||||
for( int test = 0; test < NUM_TESTS; test++ )
|
||||
{
|
||||
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
|
||||
threads[ 0 ] = random_in_range( 1, 32, seed );
|
||||
threads[ 1 ] = random_in_range( 1, 16, seed );
|
||||
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
|
||||
|
||||
// Make sure we get the local thread count right
|
||||
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
|
||||
test_error( error, "Unable to determine local work group sizes" );
|
||||
// Make sure we get the local thread count right
|
||||
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
|
||||
test_error( error, "Unable to determine local work group sizes" );
|
||||
|
||||
// Randomize some offsets
|
||||
for( int j = 0; j < 3; j++ )
|
||||
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
|
||||
// Randomize some offsets
|
||||
for( int j = 0; j < 3; j++ )
|
||||
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
|
||||
|
||||
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
|
||||
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
|
||||
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
|
||||
|
||||
// Now set up and run
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
// Now set up and run
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
// Read our results back now
|
||||
error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
// Read our results back now
|
||||
error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
// And check!
|
||||
int errors = 0;
|
||||
for( int j = 0; j < 3; j++ )
|
||||
{
|
||||
if( outOffsets[ j ] != (cl_int)offsets[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if( errors > 0 )
|
||||
return errors;
|
||||
}
|
||||
// And check!
|
||||
int errors = 0;
|
||||
for( int j = 0; j < 3; j++ )
|
||||
{
|
||||
if( outOffsets[ j ] != (cl_int)offsets[ j ] )
|
||||
{
|
||||
log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if( errors > 0 )
|
||||
return errors;
|
||||
}
|
||||
free_mtdata(seed);
|
||||
|
||||
// All done!
|
||||
return 0;
|
||||
// All done!
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -95,8 +95,8 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
continue;
|
||||
|
||||
if( !gHasLong &&
|
||||
( 0 == strcmp( test_str_names[type], "long" )) &&
|
||||
( 0 == strcmp( test_str_names[type], "ulong" )))
|
||||
( 0 == strcmp( test_str_names[type], "long" )) &&
|
||||
( 0 == strcmp( test_str_names[type], "ulong" )))
|
||||
continue;
|
||||
|
||||
log_info( "%s", test_str_names[type] );
|
||||
@@ -239,7 +239,7 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
char *outP = (char *)output_ptr;
|
||||
outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
|
||||
( vector_sizes[ out_vector_idx[vectorSize] ] ) );
|
||||
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
|
||||
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
|
||||
for( size_t e = 0; e < size; e++ )
|
||||
{
|
||||
if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
|
||||
@@ -369,22 +369,22 @@ static int CheckResults( void *in, void *out, size_t elementCount, int type, int
|
||||
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
|
||||
|
||||
for( j = 0; j < cmpVectorSize; j++ )
|
||||
{
|
||||
/* Allow float nans to be binary different */
|
||||
if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
|
||||
{
|
||||
log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
|
||||
for( j = 1; j < halfVectorSize * 2; j++ )
|
||||
log_info( ", 0x%8.8x", i[j] );
|
||||
log_info( " } --> { 0x%8.8x", o[0] );
|
||||
for( j = 1; j < halfVectorSize; j++ )
|
||||
log_info( ", 0x%8.8x", o[j] );
|
||||
log_info( " }\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
i += 2 * halfVectorSize;
|
||||
o += halfVectorSize;
|
||||
{
|
||||
/* Allow float nans to be binary different */
|
||||
if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
|
||||
{
|
||||
log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
|
||||
for( j = 1; j < halfVectorSize * 2; j++ )
|
||||
log_info( ", 0x%8.8x", i[j] );
|
||||
log_info( " } --> { 0x%8.8x", o[0] );
|
||||
for( j = 1; j < halfVectorSize; j++ )
|
||||
log_info( ", 0x%8.8x", o[j] );
|
||||
log_info( " }\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
i += 2 * halfVectorSize;
|
||||
o += halfVectorSize;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -36,7 +36,7 @@ const char *hostptr_kernel_code =
|
||||
" dst[tid] = srcA[tid] + srcB[tid];\n"
|
||||
"}\n";
|
||||
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
static const float MAX_ERR = 1e-5f;
|
||||
|
||||
static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
|
||||
{
|
||||
@@ -56,7 +56,7 @@ static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr,
|
||||
|
||||
static void make_random_data(unsigned count, float *ptr, MTdata d)
|
||||
{
|
||||
cl_uint i;
|
||||
cl_uint i;
|
||||
for (i=0; i<count; i++)
|
||||
ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
|
||||
}
|
||||
@@ -102,7 +102,7 @@ int
|
||||
test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_float *input_ptr[2], *output_ptr;
|
||||
cl_program program;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[3]={0,0,0};
|
||||
cl_image_format img_format;
|
||||
@@ -112,8 +112,8 @@ test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, in
|
||||
int img_height = 512;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
RoundingMode oldRoundMode;
|
||||
int isRTZ = 0;
|
||||
RoundingMode oldRoundMode;
|
||||
int isRTZ = 0;
|
||||
|
||||
// Block to mark deletion of streams before deletion of host_ptr
|
||||
{
|
||||
@@ -186,15 +186,15 @@ test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, in
|
||||
cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
|
||||
test_error( err, "clEnqueueMapBuffer failed" );
|
||||
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
|
||||
{
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
isRTZ = 1;
|
||||
}
|
||||
//If we only support rtz mode
|
||||
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
|
||||
{
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
isRTZ = 1;
|
||||
}
|
||||
|
||||
if (isRTZ)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
if (isRTZ)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
// Verify that we got the expected results back on the host side
|
||||
err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
|
||||
@@ -207,8 +207,8 @@ test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, in
|
||||
"and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
|
||||
}
|
||||
|
||||
if (isRTZ)
|
||||
set_round(oldRoundMode, kfloat);
|
||||
if (isRTZ)
|
||||
set_round(oldRoundMode, kfloat);
|
||||
|
||||
err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
|
||||
test_error( err, "clEnqueueUnmapMemObject failed" );
|
||||
|
||||
@@ -90,17 +90,17 @@ verify_if(int *inptr, int *outptr, int n)
|
||||
|
||||
int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_int *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
cl_mem streams[2];
|
||||
cl_int *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[1];
|
||||
int err, i;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
size_t length = sizeof(cl_int) * num_elements;
|
||||
input_ptr = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
input_ptr = (cl_int*)malloc(length);
|
||||
output_ptr = (cl_int*)malloc(length);
|
||||
|
||||
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
|
||||
if (!streams[0])
|
||||
@@ -115,8 +115,8 @@ int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(0, 32, d);
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (int)get_random_float(0, 32, d);
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
@@ -133,13 +133,13 @@ int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
threads[0] = (unsigned int)num_elements;
|
||||
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -156,15 +156,15 @@ int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int
|
||||
|
||||
err = verify_if(input_ptr, output_ptr, num_elements);
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -156,7 +156,7 @@ test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_c
|
||||
|
||||
int num_input_streams = 8;
|
||||
cl_mem *input_streams;
|
||||
cl_mem accum_streams[2];
|
||||
cl_mem accum_streams[2];
|
||||
unsigned char *expected_output;
|
||||
unsigned char *output_ptr;
|
||||
cl_kernel kernel[2];
|
||||
@@ -172,7 +172,7 @@ test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_c
|
||||
|
||||
// Create the accum images with initial data.
|
||||
{
|
||||
unsigned char *initial_data;
|
||||
unsigned char *initial_data;
|
||||
cl_mem_flags flags;
|
||||
|
||||
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
|
||||
@@ -410,7 +410,7 @@ test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_com
|
||||
|
||||
int num_input_streams = 8;
|
||||
cl_mem *input_streams;
|
||||
cl_mem accum_streams[2];
|
||||
cl_mem accum_streams[2];
|
||||
unsigned char *expected_output;
|
||||
unsigned char *output_ptr;
|
||||
cl_kernel kernel[2];
|
||||
@@ -425,7 +425,7 @@ test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_com
|
||||
|
||||
// Create the accum images with initial data.
|
||||
{
|
||||
unsigned char *initial_data;
|
||||
unsigned char *initial_data;
|
||||
cl_mem_flags flags;
|
||||
|
||||
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
|
||||
|
||||
@@ -43,214 +43,214 @@ static const char *param_kernel[] = {
|
||||
|
||||
int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
|
||||
{
|
||||
for( size_t i = 0; i < width * height; i++ )
|
||||
{
|
||||
cl_float expected[ 4 ], tolerance;
|
||||
for( size_t i = 0; i < width * height; i++ )
|
||||
{
|
||||
cl_float expected[ 4 ], tolerance;
|
||||
|
||||
switch( format.image_channel_data_type )
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
{
|
||||
cl_uchar *p = (cl_uchar *)inputData;
|
||||
expected[ 0 ] = p[ 0 ] / 255.f;
|
||||
expected[ 1 ] = p[ 1 ] / 255.f;
|
||||
expected[ 2 ] = p[ 2 ] / 255.f;
|
||||
expected[ 3 ] = p[ 3 ] / 255.f;
|
||||
tolerance = 1.f / 255.f;
|
||||
break;
|
||||
}
|
||||
case CL_SNORM_INT8:
|
||||
{
|
||||
cl_char *p = (cl_char *)inputData;
|
||||
expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
|
||||
expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
|
||||
expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
|
||||
expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNSIGNED_INT8:
|
||||
{
|
||||
cl_uchar *p = (cl_uchar *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_SIGNED_INT8:
|
||||
{
|
||||
cl_short *p = (cl_short *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNORM_INT16:
|
||||
{
|
||||
cl_ushort *p = (cl_ushort *)inputData;
|
||||
expected[ 0 ] = p[ 0 ] / 65535.f;
|
||||
expected[ 1 ] = p[ 1 ] / 65535.f;
|
||||
expected[ 2 ] = p[ 2 ] / 65535.f;
|
||||
expected[ 3 ] = p[ 3 ] / 65535.f;
|
||||
tolerance = 1.f / 65535.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNSIGNED_INT32:
|
||||
{
|
||||
cl_uint *p = (cl_uint *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 0.0001f;
|
||||
break;
|
||||
}
|
||||
case CL_FLOAT:
|
||||
{
|
||||
cl_float *p = (cl_float *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 0.0001f;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should never get here
|
||||
break;
|
||||
}
|
||||
switch( format.image_channel_data_type )
|
||||
{
|
||||
case CL_UNORM_INT8:
|
||||
{
|
||||
cl_uchar *p = (cl_uchar *)inputData;
|
||||
expected[ 0 ] = p[ 0 ] / 255.f;
|
||||
expected[ 1 ] = p[ 1 ] / 255.f;
|
||||
expected[ 2 ] = p[ 2 ] / 255.f;
|
||||
expected[ 3 ] = p[ 3 ] / 255.f;
|
||||
tolerance = 1.f / 255.f;
|
||||
break;
|
||||
}
|
||||
case CL_SNORM_INT8:
|
||||
{
|
||||
cl_char *p = (cl_char *)inputData;
|
||||
expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
|
||||
expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
|
||||
expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
|
||||
expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNSIGNED_INT8:
|
||||
{
|
||||
cl_uchar *p = (cl_uchar *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_SIGNED_INT8:
|
||||
{
|
||||
cl_short *p = (cl_short *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 1.f / 127.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNORM_INT16:
|
||||
{
|
||||
cl_ushort *p = (cl_ushort *)inputData;
|
||||
expected[ 0 ] = p[ 0 ] / 65535.f;
|
||||
expected[ 1 ] = p[ 1 ] / 65535.f;
|
||||
expected[ 2 ] = p[ 2 ] / 65535.f;
|
||||
expected[ 3 ] = p[ 3 ] / 65535.f;
|
||||
tolerance = 1.f / 65535.f;
|
||||
break;
|
||||
}
|
||||
case CL_UNSIGNED_INT32:
|
||||
{
|
||||
cl_uint *p = (cl_uint *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 0.0001f;
|
||||
break;
|
||||
}
|
||||
case CL_FLOAT:
|
||||
{
|
||||
cl_float *p = (cl_float *)inputData;
|
||||
expected[ 0 ] = p[ 0 ];
|
||||
expected[ 1 ] = p[ 1 ];
|
||||
expected[ 2 ] = p[ 2 ];
|
||||
expected[ 3 ] = p[ 3 ];
|
||||
tolerance = 0.0001f;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
// Should never get here
|
||||
break;
|
||||
}
|
||||
|
||||
if( format.image_channel_order == CL_BGRA )
|
||||
{
|
||||
cl_float tmp = expected[ 0 ];
|
||||
expected[ 0 ] = expected[ 2 ];
|
||||
expected[ 2 ] = tmp;
|
||||
}
|
||||
if( format.image_channel_order == CL_BGRA )
|
||||
{
|
||||
cl_float tmp = expected[ 0 ];
|
||||
expected[ 0 ] = expected[ 2 ];
|
||||
expected[ 2 ] = tmp;
|
||||
}
|
||||
|
||||
// Within an error tolerance, make sure the results match
|
||||
cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
|
||||
cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
|
||||
cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
|
||||
cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
|
||||
// Within an error tolerance, make sure the results match
|
||||
cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
|
||||
cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
|
||||
cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
|
||||
cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
|
||||
|
||||
if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
|
||||
{
|
||||
log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
|
||||
GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
|
||||
log_error( " Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
|
||||
log_error( " Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
|
||||
if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
|
||||
{
|
||||
log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
|
||||
GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
|
||||
log_error( " Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
|
||||
log_error( " Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
|
||||
|
||||
// Check real quick a special case error here
|
||||
cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
|
||||
cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
|
||||
cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
|
||||
cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
|
||||
if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
|
||||
{
|
||||
log_error( "\t(Kernel did not respect change in channel order)\n" );
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
// Check real quick a special case error here
|
||||
cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
|
||||
cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
|
||||
cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
|
||||
cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
|
||||
if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
|
||||
{
|
||||
log_error( "\t(Kernel did not respect change in channel order)\n" );
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Increment and go
|
||||
actualResults += 4;
|
||||
inputData += get_format_type_size( &format ) * 4;
|
||||
}
|
||||
// Increment and go
|
||||
actualResults += 4;
|
||||
inputData += get_format_type_size( &format ) * 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
size_t sizes[] = { 64, 100, 128, 250, 512 };
|
||||
cl_image_format formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
|
||||
ExplicitType types[] = { kUChar, kUShort, kFloat, kUChar };
|
||||
int error;
|
||||
size_t i, j, idx;
|
||||
size_t threads[ 2 ];
|
||||
MTdata d;
|
||||
size_t sizes[] = { 64, 100, 128, 250, 512 };
|
||||
cl_image_format formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
|
||||
ExplicitType types[] = { kUChar, kUShort, kFloat, kUChar };
|
||||
int error;
|
||||
size_t i, j, idx;
|
||||
size_t threads[ 2 ];
|
||||
MTdata d;
|
||||
|
||||
const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
|
||||
const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
|
||||
const size_t numAttempts = numSizes * numFormats;
|
||||
const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
|
||||
const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
|
||||
const size_t numAttempts = numSizes * numFormats;
|
||||
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ numAttempts ][ 2 ];
|
||||
BufferOwningPtr<char> inputs[ numAttempts ];
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[ numAttempts ][ 2 ];
|
||||
BufferOwningPtr<char> inputs[ numAttempts ];
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
// For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
|
||||
// that we can read values from. The output buffer will remain consistent to ensure that any changes we
|
||||
// witness are due to the image changes
|
||||
inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
// For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
|
||||
// that we can read values from. The output buffer will remain consistent to ensure that any changes we
|
||||
// witness are due to the image changes
|
||||
inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
|
||||
|
||||
streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
|
||||
{
|
||||
char err_str[256];
|
||||
sprintf(err_str, "Unable to create input image for format %s order %s" ,
|
||||
GetChannelOrderName( formats[j].image_channel_order ),
|
||||
GetChannelTypeName( formats[j].image_channel_data_type ));
|
||||
test_error( error, err_str);
|
||||
}
|
||||
streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
|
||||
{
|
||||
char err_str[256];
|
||||
sprintf(err_str, "Unable to create input image for format %s order %s" ,
|
||||
GetChannelOrderName( formats[j].image_channel_order ),
|
||||
GetChannelTypeName( formats[j].image_channel_data_type ));
|
||||
test_error( error, err_str);
|
||||
}
|
||||
|
||||
streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
}
|
||||
}
|
||||
streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
}
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
// Create a single kernel to use for all the tests
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
// Create a single kernel to use for all the tests
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Also create a sampler to use for all the runs
|
||||
clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error );
|
||||
test_error( error, "clCreateSampler failed" );
|
||||
// Also create a sampler to use for all the runs
|
||||
clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error );
|
||||
test_error( error, "clCreateSampler failed" );
|
||||
|
||||
// Set up the arguments for each and queue
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
|
||||
error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
|
||||
error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
// Set up the arguments for each and queue
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
|
||||
error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
|
||||
error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
|
||||
threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueNDRangeKernel failed" );
|
||||
}
|
||||
}
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error( error, "clEnqueueNDRangeKernel failed" );
|
||||
}
|
||||
}
|
||||
|
||||
// Now go through each combo and validate the results
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
|
||||
// Now go through each combo and validate the results
|
||||
for( i = 0, idx = 0; i < numSizes; i++ )
|
||||
{
|
||||
for( j = 0; j < numFormats; j++, idx++ )
|
||||
{
|
||||
BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
|
||||
if( error )
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
|
||||
if( error )
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ static const char *r_uint8_kernel_code =
|
||||
" int tid_x = get_global_id(0);\n"
|
||||
" int tid_y = get_global_id(1);\n"
|
||||
" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
|
||||
" uint4 color;\n"
|
||||
" uint4 color;\n"
|
||||
"\n"
|
||||
" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" dst[indx] = (unsigned char)(color.x);\n"
|
||||
@@ -45,7 +45,7 @@ static const char *r_uint8_kernel_code =
|
||||
static unsigned char *
|
||||
generate_8bit_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
|
||||
unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h; i++)
|
||||
@@ -75,21 +75,21 @@ verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
int
|
||||
test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
cl_uchar *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[3];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int err;
|
||||
MTdata d;
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
cl_uchar *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[3];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
img_format.image_channel_order = CL_R;
|
||||
img_format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
img_format.image_channel_order = CL_R;
|
||||
img_format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
|
||||
// early out if this image type is not supported
|
||||
if( ! is_image_format_supported( context, (cl_mem_flags)(CL_MEM_READ_ONLY), CL_MEM_OBJECT_IMAGE2D, &img_format ) ) {
|
||||
@@ -98,37 +98,37 @@ test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
input_ptr = generate_8bit_image(img_width, img_height, d);
|
||||
input_ptr = generate_8bit_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_ONLY), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uchar) * img_width*img_height, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("clCreateBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
input_ptr,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" );
|
||||
if (err) {
|
||||
if (err) {
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
@@ -139,14 +139,14 @@ test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
@@ -164,15 +164,15 @@ test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, i
|
||||
err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height);
|
||||
|
||||
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
// cleanup
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
static unsigned short *
|
||||
generate_rgba16_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
unsigned short *ptr = (unsigned short *)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
@@ -112,82 +112,82 @@ verify_rgbafp_image(float *image, float *outptr, int w, int h)
|
||||
int
|
||||
test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err;
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, delta_w = img_width/8, delta_h = img_height/16;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, delta_w = img_width/8, delta_h = img_height/16;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
log_info("Testing CL_RGBA CL_UNORM_INT8\n");
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
log_info("Testing CL_RGBA CL_UNORM_INT16\n");
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
log_info("Testing CL_RGBA CL_FLOAT\n");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
int copy_number = 0;
|
||||
for (y=0; y<img_height; y+=delta_h)
|
||||
{
|
||||
for (x=0; x<img_width; x+=delta_w)
|
||||
{
|
||||
for (y=0; y<img_height; y+=delta_h)
|
||||
{
|
||||
for (x=0; x<img_width; x+=delta_w)
|
||||
{
|
||||
copy_number++;
|
||||
size_t copy_origin[3] = {x,y,0}, copy_region[3]={delta_w, delta_h, 1};
|
||||
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1],
|
||||
@@ -197,28 +197,28 @@ test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
log_error("Copy %d (origin [%d, %d], size [%d, %d], image size [%d x %d]) Failed\n", copy_number, x, y, delta_w, delta_h, img_width, img_height);
|
||||
}
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
|
||||
break;
|
||||
}
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, img_width, img_height);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, img_width, img_height);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
@@ -227,12 +227,12 @@ test_imagecopy(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE copy test passed\n");
|
||||
if (err)
|
||||
log_error("IMAGE copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE copy test passed\n");
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -31,197 +31,197 @@
|
||||
static unsigned char *
|
||||
generate_uint8_image(unsigned num_elements, MTdata d)
|
||||
{
|
||||
unsigned char *ptr = (unsigned char*)malloc(num_elements);
|
||||
unsigned i;
|
||||
unsigned char *ptr = (unsigned char*)malloc(num_elements);
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = (unsigned char)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_uint8_image(unsigned char *image, unsigned char *outptr, unsigned num_elements)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_uint16_image(unsigned num_elements, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
|
||||
unsigned i;
|
||||
unsigned short *ptr = (unsigned short *)malloc(num_elements * sizeof(unsigned short));
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = (unsigned short)genrand_int32(d);
|
||||
|
||||
return ptr;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_uint16_image(unsigned short *image, unsigned short *outptr, unsigned num_elements)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static float *
|
||||
generate_float_image(unsigned num_elements, MTdata d)
|
||||
{
|
||||
float *ptr = (float*)malloc(num_elements * sizeof(float));
|
||||
unsigned i;
|
||||
float *ptr = (float*)malloc(num_elements * sizeof(float));
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
for (i=0; i<num_elements; i++)
|
||||
ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
|
||||
|
||||
return ptr;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static int
|
||||
verify_float_image(float *image, float *outptr, unsigned num_elements)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (outptr[i] != image[i])
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements_ignored)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 128;
|
||||
int img_height = 128;
|
||||
int img_depth = 64;
|
||||
int i;
|
||||
cl_int err;
|
||||
unsigned num_elements = img_width * img_height * img_depth * 4;
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 128;
|
||||
int img_height = 128;
|
||||
int img_depth = 64;
|
||||
int i;
|
||||
cl_int err;
|
||||
unsigned num_elements = img_width * img_height * img_depth * 4;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
|
||||
rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
|
||||
rgbafp_inptr = (float *)generate_float_image(num_elements, d);
|
||||
rgba8_inptr = (unsigned char *)generate_uint8_image(num_elements, d);
|
||||
rgba16_inptr = (unsigned short *)generate_uint16_image(num_elements, d);
|
||||
rgbafp_inptr = (float *)generate_float_image(num_elements, d);
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * num_elements);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * num_elements);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * num_elements);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[1] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[3] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
streams[5] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
|
||||
test_error(err, "create_image_3d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
int x, y, z, delta_w = img_width/8, delta_h = img_height/16, delta_d = img_depth/4;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, img_depth};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
for (z=0; z<img_depth; z+=delta_d)
|
||||
{
|
||||
for (y=0; y<img_height; y+=delta_h)
|
||||
{
|
||||
for (x=0; x<img_width; x+=delta_w)
|
||||
{
|
||||
for (z=0; z<img_depth; z+=delta_d)
|
||||
{
|
||||
for (y=0; y<img_height; y+=delta_h)
|
||||
{
|
||||
for (x=0; x<img_width; x+=delta_w)
|
||||
{
|
||||
origin[0] = x; origin[1] = y; origin[2] = z;
|
||||
region[0] = delta_w; region[1] = delta_h; region[2] = delta_d;
|
||||
|
||||
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
origin[0] = 0; origin[1] = 0; origin[2] = 0;
|
||||
region[0] = img_width; region[1] = img_height; region[2] = img_depth;
|
||||
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_uint8_image(rgba8_inptr, rgba8_outptr, num_elements);
|
||||
if (err) log_error("Failed uint8\n");
|
||||
break;
|
||||
case 1:
|
||||
err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_uint16_image(rgba16_inptr, rgba16_outptr, num_elements);
|
||||
if (err) log_error("Failed uint16\n");
|
||||
break;
|
||||
case 2:
|
||||
err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_float_image(rgbafp_inptr, rgbafp_outptr, num_elements);
|
||||
if (err) log_error("Failed float\n");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free(rgba8_inptr);
|
||||
free(rgba16_inptr);
|
||||
@@ -230,12 +230,12 @@ test_imagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE3D copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE3D copy test passed\n");
|
||||
if (err)
|
||||
log_error("IMAGE3D copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE3D copy test passed\n");
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ static const char *image_dim_kernel_code =
|
||||
" float4 color;\n"
|
||||
"\n"
|
||||
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
|
||||
"\n"
|
||||
"}\n";
|
||||
|
||||
@@ -72,154 +72,154 @@ verify_8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
|
||||
int
|
||||
test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
cl_ulong max_mem_size;
|
||||
int img_width, max_img_width;
|
||||
int img_height, max_img_height;
|
||||
int max_img_dim;
|
||||
int i, j, i2, j2, err=0;
|
||||
size_t max_image2d_width, max_image2d_height;
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2];
|
||||
cl_ulong max_mem_size;
|
||||
int img_width, max_img_width;
|
||||
int img_height, max_img_height;
|
||||
int max_img_dim;
|
||||
int i, j, i2, j2, err=0;
|
||||
size_t max_image2d_width, max_image2d_height;
|
||||
int total_errors = 0;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
|
||||
if (err)
|
||||
{
|
||||
log_error("create_program_and_kernel_with_sources failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err)
|
||||
{
|
||||
log_error("create_program_and_kernel_with_sources failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
|
||||
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
|
||||
|
||||
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
|
||||
test_error(err, "clCreateSampler failed");
|
||||
|
||||
max_img_width = (int)max_image2d_width;
|
||||
max_img_height = (int)max_image2d_height;
|
||||
max_img_width = (int)max_image2d_width;
|
||||
max_img_height = (int)max_image2d_height;
|
||||
|
||||
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
|
||||
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
|
||||
// and we want to consume 1/4 of global memory (this is the minimum required to be
|
||||
// supported by the spec)
|
||||
max_mem_size /= 4; // use 1/4
|
||||
max_mem_size /= 4; // 4 bytes per pixel
|
||||
max_img_dim = (int)sqrt((double)max_mem_size);
|
||||
// convert to a power of 2
|
||||
{
|
||||
unsigned int n = (unsigned int)max_img_dim;
|
||||
unsigned int m = 0x80000000;
|
||||
max_mem_size /= 4; // 4 bytes per pixel
|
||||
max_img_dim = (int)sqrt((double)max_mem_size);
|
||||
// convert to a power of 2
|
||||
{
|
||||
unsigned int n = (unsigned int)max_img_dim;
|
||||
unsigned int m = 0x80000000;
|
||||
|
||||
// round-down to the nearest power of 2
|
||||
while (m > n)
|
||||
m >>= 1;
|
||||
// round-down to the nearest power of 2
|
||||
while (m > n)
|
||||
m >>= 1;
|
||||
|
||||
max_img_dim = (int)m;
|
||||
}
|
||||
max_img_dim = (int)m;
|
||||
}
|
||||
|
||||
if (max_img_width > max_img_dim)
|
||||
max_img_width = max_img_dim;
|
||||
if (max_img_height > max_img_dim)
|
||||
max_img_height = max_img_dim;
|
||||
if (max_img_width > max_img_dim)
|
||||
max_img_width = max_img_dim;
|
||||
if (max_img_height > max_img_dim)
|
||||
max_img_height = max_img_dim;
|
||||
|
||||
log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
|
||||
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
|
||||
// test power of 2 width, height starting at 1 to 4K
|
||||
for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
|
||||
{
|
||||
img_height = (1 << i2);
|
||||
for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
|
||||
{
|
||||
img_width = (1 << j2);
|
||||
input_ptr = generate_8888_image(img_width, img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
// test power of 2 width, height starting at 1 to 4K
|
||||
for (i=1,i2=0; i<=max_img_height; i<<=1,i2++)
|
||||
{
|
||||
img_height = (1 << i2);
|
||||
for (j=1,j2=0; j<=max_img_width; j<<=1,j2++)
|
||||
{
|
||||
img_width = (1 << j2);
|
||||
input_ptr = generate_8888_image(img_width, img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed. width = %d, height = %d\n", img_width, img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0};
|
||||
size_t region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
threads[0] = (size_t)img_width;
|
||||
threads[1] = (size_t)img_height;
|
||||
log_info("Testing image dimensions %d x %d with local threads NULL.\n", img_width, img_height);
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueNDRangeKernel failed\n");
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d, local NULL\n",
|
||||
img_width, img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
@@ -227,12 +227,12 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clReadImage failed\n");
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d, local NULL\n",
|
||||
img_width, img_height);
|
||||
clReleaseMemObject(streams[0]);
|
||||
@@ -240,29 +240,29 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
|
||||
if (err)
|
||||
{
|
||||
total_errors++;
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d\n", img_width, img_height);
|
||||
}
|
||||
log_error("Image Dimension test failed. image width = %d, image height = %d\n", img_width, img_height);
|
||||
}
|
||||
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
}
|
||||
}
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
// cleanup
|
||||
// cleanup
|
||||
free_mtdata(d);
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
return total_errors;
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
|
||||
@@ -270,30 +270,30 @@ test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue que
|
||||
int
|
||||
test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2], local_threads[2];
|
||||
cl_ulong max_mem_size;
|
||||
int img_width, max_img_width;
|
||||
int img_height, max_img_height;
|
||||
int max_img_dim;
|
||||
int i, j, i2, j2, err=0;
|
||||
size_t max_image2d_width, max_image2d_height;
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t threads[2], local_threads[2];
|
||||
cl_ulong max_mem_size;
|
||||
int img_width, max_img_width;
|
||||
int img_height, max_img_height;
|
||||
int max_img_dim;
|
||||
int i, j, i2, j2, err=0;
|
||||
size_t max_image2d_width, max_image2d_height;
|
||||
int total_errors = 0;
|
||||
size_t max_local_workgroup_size[3];
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
|
||||
if (err)
|
||||
{
|
||||
log_error("create_program_and_kernel_with_sources failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err)
|
||||
{
|
||||
log_error("create_program_and_kernel_with_sources failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t work_group_size = 0;
|
||||
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
|
||||
@@ -302,55 +302,55 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
|
||||
if (err)
|
||||
{
|
||||
log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
|
||||
return -1;
|
||||
}
|
||||
log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
|
||||
max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
|
||||
|
||||
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
|
||||
test_error(err, "clCreateSampler failed");
|
||||
|
||||
max_img_width = (int)max_image2d_width;
|
||||
max_img_height = (int)max_image2d_height;
|
||||
max_img_width = (int)max_image2d_width;
|
||||
max_img_height = (int)max_image2d_height;
|
||||
|
||||
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
|
||||
// determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
|
||||
// and we want to consume 1/4 of global memory (this is the minimum required to be
|
||||
// supported by the spec)
|
||||
max_mem_size /= 4; // use 1/4
|
||||
max_mem_size /= 4; // 4 bytes per pixel
|
||||
max_img_dim = (int)sqrt((double)max_mem_size);
|
||||
// convert to a power of 2
|
||||
{
|
||||
unsigned int n = (unsigned int)max_img_dim;
|
||||
unsigned int m = 0x80000000;
|
||||
max_mem_size /= 4; // 4 bytes per pixel
|
||||
max_img_dim = (int)sqrt((double)max_mem_size);
|
||||
// convert to a power of 2
|
||||
{
|
||||
unsigned int n = (unsigned int)max_img_dim;
|
||||
unsigned int m = 0x80000000;
|
||||
|
||||
// round-down to the nearest power of 2
|
||||
while (m > n)
|
||||
m >>= 1;
|
||||
// round-down to the nearest power of 2
|
||||
while (m > n)
|
||||
m >>= 1;
|
||||
|
||||
max_img_dim = (int)m;
|
||||
}
|
||||
max_img_dim = (int)m;
|
||||
}
|
||||
|
||||
if (max_img_width > max_img_dim)
|
||||
max_img_width = max_img_dim;
|
||||
if (max_img_height > max_img_dim)
|
||||
max_img_height = max_img_dim;
|
||||
if (max_img_width > max_img_dim)
|
||||
max_img_width = max_img_dim;
|
||||
if (max_img_height > max_img_dim)
|
||||
max_img_height = max_img_dim;
|
||||
|
||||
log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
|
||||
max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
|
||||
@@ -360,7 +360,7 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
|
||||
for (plus_minus=0; plus_minus < 3; plus_minus++)
|
||||
{
|
||||
|
||||
// test power of 2 width, height starting at 1 to 4K
|
||||
// test power of 2 width, height starting at 1 to 4K
|
||||
for (i=2,i2=1; i<=max_img_height; i<<=1,i2++)
|
||||
{
|
||||
img_height = (1 << i2);
|
||||
@@ -496,13 +496,13 @@ test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue
|
||||
|
||||
}
|
||||
|
||||
// cleanup
|
||||
// cleanup
|
||||
free_mtdata(d);
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
return total_errors;
|
||||
return total_errors;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -75,27 +75,27 @@ verify_rgba8888_image(unsigned char *src, unsigned char *dst, int w, int h)
|
||||
}
|
||||
|
||||
|
||||
int img_width_selection[] = { 97, 111, 322, 479 };
|
||||
int img_height_selection[] = { 149, 222, 754, 385 };
|
||||
int img_width_selection[] = { 97, 111, 322, 479 };
|
||||
int img_height_selection[] = { 149, 222, 754, 385 };
|
||||
|
||||
int
|
||||
test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3], local_threads[3];
|
||||
size_t local_workgroup_size;
|
||||
int img_width;
|
||||
int img_height;
|
||||
int err;
|
||||
cl_uint m;
|
||||
cl_mem streams[2];
|
||||
cl_image_format img_format;
|
||||
unsigned char *input_ptr, *output_ptr;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
size_t global_threads[3], local_threads[3];
|
||||
size_t local_workgroup_size;
|
||||
int img_width;
|
||||
int img_height;
|
||||
int err;
|
||||
cl_uint m;
|
||||
size_t max_local_workgroup_size[3];
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device_id )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device_id )
|
||||
|
||||
cl_device_type device_type;
|
||||
err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
|
||||
@@ -105,67 +105,67 @@ test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queu
|
||||
}
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (m=0; m<sizeof(img_width_selection)/sizeof(int); m++)
|
||||
{
|
||||
img_width = img_width_selection[m];
|
||||
img_height = img_height_selection[m];
|
||||
input_ptr = generate_8888_image(img_width, img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
for (m=0; m<sizeof(img_width_selection)/sizeof(int); m++)
|
||||
{
|
||||
img_width = img_width_selection[m];
|
||||
img_height = img_height_selection[m];
|
||||
input_ptr = generate_8888_image(img_width, img_height, d);
|
||||
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format,
|
||||
img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
if (!streams[0])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
return -1;
|
||||
}
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format,
|
||||
img_width, img_height, 0, NULL, NULL);
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
if (!streams[1])
|
||||
{
|
||||
log_error("create_image_2d failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t origin[3] = {0,0,0}, region[3] = {img_width, img_height, 1};
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
|
||||
err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
input_ptr,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
{
|
||||
log_error("clWriteImage failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba8888_kernel_code, "test_rgba8888" );
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
if (err)
|
||||
{
|
||||
log_error("Failed to create kernel and program: %d\n", err);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
|
||||
test_error(err, "clCreateSampler failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clSetKernelArgs failed\n");
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local_workgroup_size), &local_workgroup_size, NULL);
|
||||
test_error(err, "clGetKernelWorkGroupInfo for CL_KERNEL_WORK_GROUP_SIZE failed");
|
||||
@@ -177,46 +177,46 @@ test_imagenpot(cl_device_id device_id, cl_context context, cl_command_queue queu
|
||||
if (local_workgroup_size > max_local_workgroup_size[0])
|
||||
local_workgroup_size = max_local_workgroup_size[0];
|
||||
|
||||
global_threads[0] = ((img_width + local_workgroup_size - 1) / local_workgroup_size) * local_workgroup_size;
|
||||
global_threads[1] = img_height;
|
||||
global_threads[0] = ((img_width + local_workgroup_size - 1) / local_workgroup_size) * local_workgroup_size;
|
||||
global_threads[1] = img_height;
|
||||
local_threads[0] = local_workgroup_size;
|
||||
local_threads[1] = 1;
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
|
||||
free_mtdata(d);
|
||||
return -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadImage(queue, streams[1], CL_TRUE,
|
||||
origin, region, 0, 0,
|
||||
(void *)output_ptr,
|
||||
0, NULL, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
if (err != CL_SUCCESS)
|
||||
{
|
||||
log_error("clEnqueueReadBuffer failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = verify_rgba8888_image(input_ptr, output_ptr, img_width, img_height);
|
||||
err = verify_rgba8888_image(input_ptr, output_ptr, img_width, img_height);
|
||||
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
// cleanup
|
||||
clReleaseSampler(sampler);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[1]);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -45,23 +45,23 @@ verify_rgba8_image(unsigned char *image, unsigned char *outptr, int x, int y, in
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static unsigned short *
|
||||
generate_rgba16_image(int w, int h, MTdata d)
|
||||
{
|
||||
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
unsigned short *ptr = (unsigned short*)malloc(w * h * 4 * sizeof(unsigned short));
|
||||
int i;
|
||||
|
||||
for (i=0; i<w*h*4; i++)
|
||||
@@ -75,16 +75,16 @@ verify_rgba16_image(unsigned short *image, unsigned short *outptr, int x, int y,
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -105,114 +105,114 @@ verify_rgbafp_image(float *image, float *outptr, int x, int y, int w, int h, int
|
||||
{
|
||||
int i, j, indx;
|
||||
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
for (j=y; j<(y+h); j++)
|
||||
{
|
||||
indx = j*img_width*4;
|
||||
for (i=x*4; i<(x+w)*4; i++)
|
||||
{
|
||||
if (outptr[indx+i] != image[indx+i])
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define NUM_COPIES 10
|
||||
#define NUM_COPIES 10
|
||||
static const char *test_str_names[] = { "CL_RGBA CL_UNORM_INT8", "CL_RGBA CL_UNORM_INT16", "CL_RGBA CL_FLOAT" };
|
||||
|
||||
int
|
||||
test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, j;
|
||||
cl_image_format img_format;
|
||||
unsigned char *rgba8_inptr, *rgba8_outptr;
|
||||
unsigned short *rgba16_inptr, *rgba16_outptr;
|
||||
float *rgbafp_inptr, *rgbafp_outptr;
|
||||
clMemWrapper streams[6];
|
||||
int img_width = 512;
|
||||
int img_height = 512;
|
||||
int i, j;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
log_info("Testing with image %d x %d.\n", img_width, img_height);
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
rgba8_inptr = (unsigned char *)generate_rgba8_image(img_width, img_height, d);
|
||||
rgba16_inptr = (unsigned short *)generate_rgba16_image(img_width, img_height, d);
|
||||
rgbafp_inptr = (float *)generate_rgbafp_image(img_width, img_height, d);
|
||||
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
rgba8_outptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
|
||||
rgba16_outptr = (unsigned short*)malloc(sizeof(unsigned short) * 4 * img_width * img_height);
|
||||
rgbafp_outptr = (float*)malloc(sizeof(float) * 4 * img_width * img_height);
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT8;
|
||||
streams[0] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
streams[1] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_UNORM_INT16;
|
||||
streams[2] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
img_format.image_channel_order = CL_RGBA;
|
||||
img_format.image_channel_data_type = CL_FLOAT;
|
||||
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), &img_format, img_width, img_height, 0, NULL, &err);
|
||||
test_error(err, "create_image_2d failed");
|
||||
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
unsigned int x[2], y[2], delta_w, delta_h ;
|
||||
for (i=0; i<3; i++)
|
||||
{
|
||||
void *p, *outp;
|
||||
unsigned int x[2], y[2], delta_w, delta_h ;
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
p = (void *)rgba8_inptr;
|
||||
outp = (void *)rgba8_outptr;
|
||||
break;
|
||||
case 1:
|
||||
p = (void *)rgba16_inptr;
|
||||
outp = (void *)rgba16_outptr;
|
||||
break;
|
||||
case 2:
|
||||
p = (void *)rgbafp_inptr;
|
||||
outp = (void *)rgbafp_outptr;
|
||||
break;
|
||||
}
|
||||
|
||||
size_t origin[3]={0,0,0}, region[3]={img_width, img_height,1};
|
||||
err = clEnqueueWriteImage(queue, streams[i*2], CL_TRUE, origin, region, 0, 0, p, 0, NULL, NULL);
|
||||
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
|
||||
// err = clWriteImage(context, streams[i*2], false, 0, 0, 0, img_width, img_height, 0, NULL, 0, 0, p, NULL);
|
||||
test_error(err, "clEnqueueWriteImage failed");
|
||||
|
||||
for (j=0; j<NUM_COPIES; j++)
|
||||
{
|
||||
x[0] = (int)get_random_float(0, img_width, d);
|
||||
do
|
||||
{
|
||||
x[1] = (int)get_random_float(0, img_width, d);
|
||||
} while (x[1] <= x[0]);
|
||||
for (j=0; j<NUM_COPIES; j++)
|
||||
{
|
||||
x[0] = (int)get_random_float(0, img_width, d);
|
||||
do
|
||||
{
|
||||
x[1] = (int)get_random_float(0, img_width, d);
|
||||
} while (x[1] <= x[0]);
|
||||
|
||||
y[0] = (int)get_random_float(0, img_height, d);
|
||||
do
|
||||
{
|
||||
y[1] = (int)get_random_float(0, img_height, d);
|
||||
} while (y[1] <= y[0]);
|
||||
y[0] = (int)get_random_float(0, img_height, d);
|
||||
do
|
||||
{
|
||||
y[1] = (int)get_random_float(0, img_height, d);
|
||||
} while (y[1] <= y[0]);
|
||||
|
||||
delta_w = x[1] - x[0];
|
||||
delta_h = y[1] - y[0];
|
||||
log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
|
||||
origin[0] = x[0];
|
||||
delta_w = x[1] - x[0];
|
||||
delta_h = y[1] - y[0];
|
||||
log_info("Testing clCopyImage for %s: x = %d, y = %d, w = %d, h = %d\n", test_str_names[i], x[0], y[0], delta_w, delta_h);
|
||||
origin[0] = x[0];
|
||||
origin[1] = y[0];
|
||||
origin[2] = 0;
|
||||
region[0] = delta_w;
|
||||
@@ -220,39 +220,39 @@ test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue q
|
||||
region[2] = 1;
|
||||
err = clEnqueueCopyImage(queue, streams[i*2], streams[i*2+1], origin, origin, region, 0, NULL, NULL);
|
||||
// err = clCopyImage(context, streams[i*2], streams[i*2+1],
|
||||
// x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
|
||||
// x[0], y[0], 0, x[0], y[0], 0, delta_w, delta_h, 0, NULL);
|
||||
test_error(err, "clEnqueueCopyImage failed");
|
||||
|
||||
origin[0] = 0;
|
||||
origin[0] = 0;
|
||||
origin[1] = 0;
|
||||
origin[2] = 0;
|
||||
region[0] = img_width;
|
||||
region[1] = img_height;
|
||||
region[2] = 1;
|
||||
err = clEnqueueReadImage(queue, streams[i*2+1], CL_TRUE, origin, region, 0, 0, outp, 0, NULL, NULL);
|
||||
// err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
|
||||
// err = clReadImage(context, streams[i*2+1], false, 0, 0, 0, img_width, img_height, 0, 0, 0, outp, NULL);
|
||||
test_error(err, "clEnqueueReadImage failed");
|
||||
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
}
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
case 1:
|
||||
err = verify_rgba16_image(rgba16_inptr, rgba16_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
case 2:
|
||||
err = verify_rgbafp_image(rgbafp_inptr, rgbafp_outptr, x[0], y[0], delta_w, delta_h, img_width);
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
free_mtdata(d); d = NULL;
|
||||
free(rgba8_inptr);
|
||||
@@ -262,12 +262,12 @@ test_imagerandomcopy(cl_device_id device, cl_context context, cl_command_queue q
|
||||
free(rgba16_outptr);
|
||||
free(rgbafp_outptr);
|
||||
|
||||
if (err)
|
||||
log_error("IMAGE random copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE random copy test passed\n");
|
||||
if (err)
|
||||
log_error("IMAGE random copy test failed\n");
|
||||
else
|
||||
log_info("IMAGE random copy test passed\n");
|
||||
|
||||
return err;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user