mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-23 15:39:03 +00:00
Initial open source release of OpenCL 1.2 CTS.
This commit is contained in:
26
test_common/Makefile
Normal file
26
test_common/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
PRODUCTS = harness/\
|
||||
|
||||
# utils/
|
||||
|
||||
TOP=$(shell pwd)
|
||||
|
||||
all: $(PRODUCTS)
|
||||
|
||||
clean:
|
||||
@for testdir in $(dir $(PRODUCTS)) ; \
|
||||
do ( \
|
||||
echo "==================================================================================" ; \
|
||||
echo "Cleaning $$testdir" ; \
|
||||
echo "==================================================================================" ; \
|
||||
cd $$testdir && make clean \
|
||||
); \
|
||||
done \
|
||||
|
||||
$(PRODUCTS):
|
||||
@echo "==================================================================================" ;
|
||||
@echo "(`date "+%H:%M:%S"`) Make $@" ;
|
||||
@echo "==================================================================================" ;
|
||||
cd $(dir $@) && make
|
||||
|
||||
.PHONY: clean $(PRODUCTS) all
|
||||
52
test_common/gl/gl_headers.h
Normal file
52
test_common/gl/gl_headers.h
Normal file
@@ -0,0 +1,52 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _gl_headers_h
|
||||
#define _gl_headers_h
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenGL/OpenGL.h>
|
||||
#if defined(CGL_VERSION_1_3)
|
||||
#include <OpenGL/gl3.h>
|
||||
#include <OpenGL/gl3ext.h>
|
||||
#else
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glext.h>
|
||||
#endif
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <GL/glew.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glext.h>
|
||||
#ifdef _WIN32
|
||||
#include <GL/glut.h>
|
||||
#else
|
||||
#include <GL/freeglut.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
|
||||
// No glutGetProcAddress in the standard glut v3.7.
|
||||
#define glutGetProcAddress(procName) wglGetProcAddress(procName)
|
||||
#endif
|
||||
|
||||
|
||||
#endif // __gl_headers_h
|
||||
|
||||
1622
test_common/gl/helpers.cpp
Normal file
1622
test_common/gl/helpers.cpp
Normal file
File diff suppressed because it is too large
Load Diff
288
test_common/gl/helpers.h
Normal file
288
test_common/gl/helpers.h
Normal file
@@ -0,0 +1,288 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _helpers_h
|
||||
#define _helpers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#include "gl_headers.h"
|
||||
#include <CL/cl_gl.h>
|
||||
#else
|
||||
#include "gl_headers.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
GLuint bufobj,
|
||||
int * errcode_ret);
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLuint renderbuffer ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_object_type * gl_object_type ,
|
||||
GLuint * gl_object_name) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_texture_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value ,
|
||||
size_t * param_value_size_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
|
||||
extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
|
||||
extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
|
||||
extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
|
||||
extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
|
||||
extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
|
||||
extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
|
||||
extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
|
||||
extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
|
||||
extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
|
||||
|
||||
|
||||
class glBufferWrapper
|
||||
{
|
||||
public:
|
||||
glBufferWrapper() { mBuffer = 0; }
|
||||
glBufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
|
||||
|
||||
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glTextureWrapper
|
||||
{
|
||||
public:
|
||||
glTextureWrapper() { mHandle = 0; }
|
||||
glTextureWrapper( GLuint b ) { mHandle = b; }
|
||||
~glTextureWrapper() {
|
||||
if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
|
||||
}
|
||||
|
||||
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
|
||||
operator GLuint() { return mHandle; }
|
||||
operator GLuint *() { return &mHandle; }
|
||||
|
||||
GLuint * operator&() { return &mHandle; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mHandle == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
// The texture handle.
|
||||
GLuint mHandle;
|
||||
};
|
||||
|
||||
class glRenderbufferWrapper
|
||||
{
|
||||
public:
|
||||
glRenderbufferWrapper() { mBuffer = 0; }
|
||||
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glFramebufferWrapper
|
||||
{
|
||||
public:
|
||||
glFramebufferWrapper() { mBuffer = 0; }
|
||||
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
|
||||
// Helper functions (defined in helpers.cpp)
|
||||
|
||||
extern void * CreateGLTexture1DArray( size_t width, size_t length,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTextureBuffer( size_t width,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTexture1D(size_t width,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
extern void * CreateGLTexture2D( size_t width, size_t height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
|
||||
extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, MTdata d, bool allocateMem = true );
|
||||
|
||||
extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
|
||||
GLenum glFormat, GLenum glInternalFormat,
|
||||
GLenum glType, ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer );
|
||||
|
||||
extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer,
|
||||
int *outError, MTdata d, bool allocateMem );
|
||||
|
||||
extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
|
||||
GLenum attachment, GLenum glFormat,
|
||||
GLenum glInternalFormat, GLenum glType,
|
||||
ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
|
||||
extern const char *GetGLTypeName( GLenum type );
|
||||
extern const char *GetGLAttachmentName( GLenum att );
|
||||
extern const char *GetGLTargetName( GLenum tgt );
|
||||
extern const char *GetGLBaseFormatName( GLenum baseformat );
|
||||
extern const char *GetGLFormatName( GLenum format );
|
||||
|
||||
extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
|
||||
|
||||
extern GLenum GetGLFormat(GLenum internalFormat);
|
||||
extern GLenum GetGLTypeForExplicitType(ExplicitType type);
|
||||
extern size_t GetGLTypeSize(GLenum type);
|
||||
extern ExplicitType GetExplicitTypeForGLType(GLenum type);
|
||||
|
||||
extern GLenum get_base_gl_target( GLenum target );
|
||||
|
||||
extern int init_clgl_ext( void );
|
||||
|
||||
#endif // _helpers_h
|
||||
|
||||
|
||||
|
||||
48
test_common/gl/setup.h
Normal file
48
test_common/gl/setup.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _setup_h
|
||||
#define _setup_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "gl_headers.h"
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
|
||||
// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
|
||||
// subclass internally, then return it as a definition for GLEnvironment::Create
|
||||
|
||||
class GLEnvironment
|
||||
{
|
||||
public:
|
||||
GLEnvironment() {}
|
||||
virtual ~GLEnvironment() {}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
|
||||
virtual cl_context CreateCLContext( void ) = 0;
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
|
||||
|
||||
static GLEnvironment * Instance( void );
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // _setup_h
|
||||
156
test_common/gl/setup_osx.cpp
Normal file
156
test_common/gl/setup_osx.cpp
Normal file
@@ -0,0 +1,156 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "setup.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include <OpenGL/CGLDevice.h>
|
||||
|
||||
class OSXGLEnvironment : public GLEnvironment
|
||||
{
|
||||
public:
|
||||
OSXGLEnvironment()
|
||||
{
|
||||
mCGLContext = NULL;
|
||||
}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
if (!use_opengl_32) {
|
||||
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
}
|
||||
|
||||
else {
|
||||
|
||||
CGLPixelFormatAttribute attribs[] = {
|
||||
kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
|
||||
kCGLPFAAllowOfflineRenderers,
|
||||
kCGLPFANoRecovery,
|
||||
kCGLPFAAccelerated,
|
||||
kCGLPFADoubleBuffer,
|
||||
(CGLPixelFormatAttribute)0
|
||||
};
|
||||
|
||||
CGLError err;
|
||||
CGLPixelFormatObj pix;
|
||||
GLint npix;
|
||||
err = CGLChoosePixelFormat (attribs, &pix, &npix);
|
||||
if(err != kCGLNoError)
|
||||
{
|
||||
log_error("Failed to choose pixel format\n");
|
||||
return -1;
|
||||
}
|
||||
err = CGLCreateContext(pix, NULL, &mCGLContext);
|
||||
if(err != kCGLNoError)
|
||||
{
|
||||
log_error("Failed to create GL context\n");
|
||||
return -1;
|
||||
}
|
||||
CGLSetCurrentContext(mCGLContext);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
int error;
|
||||
|
||||
if( mCGLContext == NULL )
|
||||
mCGLContext = CGLGetCurrentContext();
|
||||
|
||||
CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
|
||||
cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
|
||||
cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
|
||||
if (error) {
|
||||
print_error(error, "clCreateContext failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Verify that all devices in the context support the required extension
|
||||
cl_device_id devices[64];
|
||||
size_t size_out;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
|
||||
if (error) {
|
||||
print_error(error, "clGetContextInfo failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
|
||||
log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
|
||||
found_valid_device = 1;
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~OSXGLEnvironment()
|
||||
{
|
||||
CGLDestroyContext( mCGLContext );
|
||||
}
|
||||
|
||||
CGLContextObj mCGLContext;
|
||||
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static OSXGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new OSXGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
204
test_common/gl/setup_win32.cpp
Normal file
204
test_common/gl/setup_win32.cpp
Normal file
@@ -0,0 +1,204 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
|
||||
#include "setup.h"
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glut.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/glut.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties *properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
// Rename references to this dynamically linked function to avoid
|
||||
// collision with static link version
|
||||
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
|
||||
static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
|
||||
|
||||
#define MAX_DEVICES 32
|
||||
|
||||
class WGLEnvironment : public GLEnvironment
|
||||
{
|
||||
private:
|
||||
cl_device_id m_devices[MAX_DEVICES];
|
||||
int m_device_count;
|
||||
cl_platform_id m_platform;
|
||||
|
||||
public:
|
||||
WGLEnvironment()
|
||||
{
|
||||
m_device_count = 0;
|
||||
m_platform = 0;
|
||||
|
||||
}
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
glewInit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
HGLRC hGLRC = wglGetCurrentContext();
|
||||
HDC hDC = wglGetCurrentDC();
|
||||
cl_context_properties properties[] = {
|
||||
CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
|
||||
CL_GL_CONTEXT_KHR, (cl_context_properties) hGLRC,
|
||||
CL_WGL_HDC_KHR, (cl_context_properties) hDC,
|
||||
0
|
||||
};
|
||||
cl_device_id devices[MAX_DEVICES];
|
||||
size_t dev_size;
|
||||
cl_int status;
|
||||
|
||||
if (!hGLRC || !hDC) {
|
||||
print_error(CL_INVALID_CONTEXT, "No GL context bound");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!clGetGLContextInfoKHR) {
|
||||
// As OpenCL for the platforms. Warn if more than one platform found,
|
||||
// since this might not be the platform we want. By default, we simply
|
||||
// use the first returned platform.
|
||||
|
||||
cl_uint nplatforms;
|
||||
cl_platform_id platform;
|
||||
clGetPlatformIDs(0, NULL, &nplatforms);
|
||||
clGetPlatformIDs(1, &platform, NULL);
|
||||
|
||||
if (nplatforms > 1) {
|
||||
log_info("clGetPlatformIDs returned multiple values. This is not "
|
||||
"an error, but might result in obtaining incorrect function "
|
||||
"pointers if you do not want the first returned platform.\n");
|
||||
|
||||
// Show them the platform name, in case it is a problem.
|
||||
|
||||
size_t size;
|
||||
char *name;
|
||||
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
|
||||
name = (char*)malloc(size);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
|
||||
|
||||
log_info("Using platform with name: %s \n", name);
|
||||
free(name);
|
||||
}
|
||||
|
||||
clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
|
||||
if (!clGetGLContextInfoKHR) {
|
||||
print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
|
||||
}
|
||||
}
|
||||
|
||||
status = clGetGLContextInfoKHR(properties,
|
||||
CL_DEVICES_FOR_GL_CONTEXT_KHR,
|
||||
sizeof(devices),
|
||||
devices,
|
||||
&dev_size);
|
||||
if (status != CL_SUCCESS) {
|
||||
print_error(status, "clGetGLContextInfoKHR failed");
|
||||
return 0;
|
||||
}
|
||||
dev_size /= sizeof(cl_device_id);
|
||||
log_info("GL context supports %d compute devices\n", dev_size);
|
||||
|
||||
status = clGetGLContextInfoKHR(properties,
|
||||
CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
|
||||
sizeof(devices),
|
||||
devices,
|
||||
&dev_size);
|
||||
if (status != CL_SUCCESS) {
|
||||
print_error(status, "clGetGLContextInfoKHR failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_device_id ctxDevice = m_devices[0];
|
||||
if (dev_size > 0) {
|
||||
log_info("GL context current device: 0x%x\n", devices[0]);
|
||||
for (int i = 0; i < m_device_count; i++) {
|
||||
if (m_devices[i] == devices[0]) {
|
||||
ctxDevice = devices[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
|
||||
}
|
||||
|
||||
return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
cl_device_id devices[MAX_DEVICES];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetPlatformIDs(1, &m_platform, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetPlatformIDs failed");
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check all devices, search for one that supports cl_khr_gl_sharing
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
m_devices[m_device_count++] = devices[i];
|
||||
}
|
||||
}
|
||||
return m_device_count > 0;
|
||||
}
|
||||
|
||||
virtual ~WGLEnvironment()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static WGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new WGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
122
test_common/gl/setup_x11.cpp
Normal file
122
test_common/gl/setup_x11.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
|
||||
#include "setup.h"
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glut.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/freeglut.h>
|
||||
#include <GL/glx.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
class X11GLEnvironment : public GLEnvironment
|
||||
{
|
||||
private:
|
||||
cl_device_id m_devices[64];
|
||||
cl_uint m_device_count;
|
||||
|
||||
public:
|
||||
X11GLEnvironment()
|
||||
{
|
||||
m_device_count = 0;
|
||||
}
|
||||
virtual int Init( int *argc, char **argv, int use_opencl_32 )
|
||||
{
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
glewInit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
GLXContext context = glXGetCurrentContext();
|
||||
Display *dpy = glXGetCurrentDisplay();
|
||||
|
||||
cl_context_properties properties[] = {
|
||||
CL_GL_CONTEXT_KHR, (cl_context_properties) context,
|
||||
CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
|
||||
0
|
||||
};
|
||||
cl_int status;
|
||||
|
||||
if (!context || !dpy) {
|
||||
print_error(CL_INVALID_CONTEXT, "No GL context bound");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_platform_id platform;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetPlatformIDs failed");
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
|
||||
// If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
|
||||
if (error == CL_DEVICE_NOT_FOUND)
|
||||
return 0;
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
found_valid_device = 1;
|
||||
m_devices[m_device_count++] = devices[i];
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~X11GLEnvironment()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static X11GLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new X11GLEnvironment();
|
||||
return env;
|
||||
}
|
||||
18
test_common/harness/Jamfile
Normal file
18
test_common/harness/Jamfile
Normal file
@@ -0,0 +1,18 @@
|
||||
project
|
||||
: requirements <include>.
|
||||
<toolset>gcc:<cflags>"-xc++"
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
<warnings-as-errors>off
|
||||
: usage-requirements <include>.
|
||||
;
|
||||
|
||||
local harness.objs ;
|
||||
for source in [ glob *.c *.cpp ]
|
||||
{
|
||||
harness.objs += [ obj $(source:B).obj : $(source) ] ;
|
||||
}
|
||||
|
||||
alias harness : $(harness.objs)
|
||||
: <use>/Runtime//OpenCL.lib :
|
||||
: <library>/Runtime//OpenCL.lib
|
||||
;
|
||||
41
test_common/harness/Makefile
Normal file
41
test_common/harness/Makefile
Normal file
@@ -0,0 +1,41 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = conversions.c \
|
||||
errorHelpers.c \
|
||||
genericThread.cpp \
|
||||
imageHelpers.cpp \
|
||||
kernelHelpers.c \
|
||||
mt19937.c \
|
||||
rounding_mode.c \
|
||||
testHarness.c \
|
||||
testHarness.cpp \
|
||||
ThreadPool.c \
|
||||
threadTesting.c \
|
||||
typeWrappers.cpp
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
all: $(OBJECTS)
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
899
test_common/harness/ThreadPool.c
Normal file
899
test_common/harness/ThreadPool.c
Normal file
@@ -0,0 +1,899 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "ThreadPool.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "fpcontrol.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system
|
||||
|
||||
#if defined( _WIN32 )
|
||||
#include <windows.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include "mingw_compat.h"
|
||||
#include <process.h>
|
||||
#else // !_WIN32
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/errno.h>
|
||||
#endif // !_WIN32
|
||||
|
||||
// declarations
|
||||
#ifdef _WIN32
|
||||
void ThreadPool_WorkerFunc( void *p );
|
||||
#else
|
||||
void *ThreadPool_WorkerFunc( void *p );
|
||||
#endif
|
||||
void ThreadPool_Init(void);
|
||||
void ThreadPool_Exit(void);
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
// Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
|
||||
CRITICAL_SECTION gAtomicLock;
|
||||
#elif defined( __GNUC__ ) || defined( _MSC_VER)
|
||||
#else
|
||||
pthread_mutex_t gAtomicLock;
|
||||
#endif
|
||||
|
||||
// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions.
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
|
||||
{
|
||||
#if defined (__MINGW32__)
|
||||
// No atomics on Mingw32
|
||||
EnterCriticalSection(&gAtomicLock);
|
||||
cl_int old = *a;
|
||||
*a = old + b;
|
||||
LeaveCriticalSection(&gAtomicLock);
|
||||
return old;
|
||||
#elif defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
return __sync_fetch_and_add( a, b );
|
||||
// do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize
|
||||
#elif defined( _MSC_VER )
|
||||
return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
|
||||
#else
|
||||
#warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
|
||||
if( pthread_mutex_lock(&gAtomicLock) )
|
||||
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
|
||||
cl_int old = *a;
|
||||
*a = old + b;
|
||||
if( pthread_mutex_unlock(&gAtomicLock) )
|
||||
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
|
||||
return old;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined( _WIN32 )
|
||||
// Uncomment the following line if Windows XP support is not required.
|
||||
// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
|
||||
|
||||
#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
|
||||
#define _INIT_ONCE INIT_ONCE
|
||||
#define _PINIT_ONCE PINIT_ONCE
|
||||
#define _InitOnceExecuteOnce InitOnceExecuteOnce
|
||||
#else // !HAS_INIT_ONCE_EXECUTE_ONCE
|
||||
|
||||
typedef volatile LONG _INIT_ONCE;
|
||||
typedef _INIT_ONCE *_PINIT_ONCE;
|
||||
typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
|
||||
|
||||
#define _INIT_ONCE_UNINITIALIZED 0
|
||||
#define _INIT_ONCE_IN_PROGRESS 1
|
||||
#define _INIT_ONCE_DONE 2
|
||||
|
||||
static BOOL _InitOnceExecuteOnce(
|
||||
_PINIT_ONCE InitOnce,
|
||||
_PINIT_ONCE_FN InitFn,
|
||||
PVOID Parameter,
|
||||
LPVOID *Context
|
||||
)
|
||||
{
|
||||
while ( *InitOnce != _INIT_ONCE_DONE )
|
||||
{
|
||||
if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
|
||||
{
|
||||
InitFn( InitOnce, Parameter, Context );
|
||||
*InitOnce = _INIT_ONCE_DONE;
|
||||
return TRUE;
|
||||
}
|
||||
Sleep( 1 );
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
|
||||
|
||||
// Uncomment the following line if Windows XP support is not required.
|
||||
// #define HAS_CONDITION_VARIABLE 1
|
||||
|
||||
#if defined(HAS_CONDITION_VARIABLE)
|
||||
#define _CONDITION_VARIABLE CONDITION_VARIABLE
|
||||
#define _InitializeConditionVariable InitializeConditionVariable
|
||||
#define _SleepConditionVariableCS SleepConditionVariableCS
|
||||
#define _WakeAllConditionVariable WakeAllConditionVariable
|
||||
#else // !HAS_CONDITION_VARIABLE
|
||||
typedef struct
|
||||
{
|
||||
HANDLE mEvent; // Used to park the thread.
|
||||
CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
|
||||
volatile cl_int mWaiters; // Number of threads waiting on this cond var.
|
||||
volatile cl_int mGeneration; // Wait generation count.
|
||||
volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event.
|
||||
} _CONDITION_VARIABLE;
|
||||
|
||||
typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
|
||||
|
||||
static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
|
||||
{
|
||||
cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
|
||||
InitializeCriticalSection( cond_var->mLock );
|
||||
cond_var->mWaiters = 0;
|
||||
cond_var->mGeneration = 0;
|
||||
#if !defined ( NDEBUG )
|
||||
cond_var->mReleaseCount = 0;
|
||||
#endif // !NDEBUG
|
||||
}
|
||||
|
||||
static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
|
||||
{
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
cl_int generation = cond_var->mGeneration;
|
||||
++cond_var->mWaiters;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
|
||||
while ( TRUE )
|
||||
{
|
||||
WaitForSingleObject( cond_var->mEvent, INFINITE );
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
if ( done )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EnterCriticalSection( cond_lock );
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
if ( --cond_var->mReleaseCount == 0 )
|
||||
{
|
||||
ResetEvent( cond_var->mEvent );
|
||||
}
|
||||
--cond_var->mWaiters;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
}
|
||||
|
||||
static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
|
||||
{
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
if (cond_var->mWaiters > 0 )
|
||||
{
|
||||
++cond_var->mGeneration;
|
||||
cond_var->mReleaseCount = cond_var->mWaiters;
|
||||
SetEvent( cond_var->mEvent );
|
||||
}
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
}
|
||||
#endif // !HAS_CONDITION_VARIABLE
|
||||
#endif // _WIN32
|
||||
|
||||
#define MAX_COUNT (1<<29)
|
||||
|
||||
// Global state to coordinate whether the threads have been launched successfully or not
|
||||
#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
|
||||
static _INIT_ONCE threadpool_init_control;
|
||||
#elif defined (_WIN32) // MingW of XP
|
||||
static int threadpool_init_control;
|
||||
#else // Posix platforms
|
||||
pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
|
||||
#endif
|
||||
cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
|
||||
|
||||
// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time,
|
||||
// because we are too lazy to set up a queue here, and don't expect to need one.
|
||||
#if defined( _WIN32 )
|
||||
CRITICAL_SECTION gThreadPoolLock[1];
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t gThreadPoolLock;
|
||||
#endif // !_WIN32
|
||||
|
||||
// Condition variable to park ThreadPool threads when not working
|
||||
#if defined( _WIN32 )
|
||||
CRITICAL_SECTION cond_lock[1];
|
||||
_CONDITION_VARIABLE cond_var[1];
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t cond_lock;
|
||||
pthread_cond_t cond_var;
|
||||
#endif // !_WIN32
|
||||
volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run.
|
||||
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
|
||||
|
||||
// State that only changes when the threadpool is not working.
|
||||
volatile TPFuncPtr gFunc_ptr = NULL;
|
||||
volatile void *gUserInfo = NULL;
|
||||
volatile cl_int gJobCount = 0;
|
||||
|
||||
// State that may change while the thread pool is working
|
||||
volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
|
||||
|
||||
// Condition variable to park caller while waiting
|
||||
#if defined( _WIN32 )
|
||||
HANDLE caller_event;
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t caller_cond_lock;
|
||||
pthread_cond_t caller_cond_var;
|
||||
#endif // !_WIN32
|
||||
volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
|
||||
|
||||
// The total number of threads launched.
|
||||
volatile cl_int gThreadCount = 0;
|
||||
#ifdef _WIN32
|
||||
void ThreadPool_WorkerFunc( void *p )
|
||||
#else
|
||||
void *ThreadPool_WorkerFunc( void *p )
|
||||
#endif
|
||||
{
|
||||
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
|
||||
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
||||
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
|
||||
|
||||
while( MAX_COUNT > item )
|
||||
{
|
||||
cl_int err;
|
||||
|
||||
// check for more work to do
|
||||
if( 0 >= item )
|
||||
{
|
||||
// log_info( "Thread %d has run out of work.\n", threadID );
|
||||
|
||||
// No work to do. Attempt to block waiting for work
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
|
||||
// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
|
||||
if( 1 == remaining )
|
||||
{ // last thread out signal the main thread to wake up
|
||||
#if defined( _WIN32 )
|
||||
SetEvent( caller_event );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if( (err = pthread_cond_broadcast( &caller_cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
// loop in case we are woken only to discover that some other thread already did all the work
|
||||
while( 0 >= item )
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
_SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
|
||||
pthread_mutex_unlock( &cond_lock);
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// try again to get a valid item id
|
||||
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
if( MAX_COUNT <= item ) // exit if we are done
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
pthread_mutex_unlock( &cond_lock);
|
||||
#endif // !_WIN32
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
||||
// log_info( "Thread %d has found work.\n", threadID);
|
||||
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_unlock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
}
|
||||
|
||||
// we have a valid item, so do the work
|
||||
if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error
|
||||
{
|
||||
// log_info( "Thread %d doing job %d\n", threadID, item - 1);
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
// Call the user's function with this item ID
|
||||
err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
if( err )
|
||||
{
|
||||
#if (__MINGW32__)
|
||||
EnterCriticalSection(&gAtomicLock);
|
||||
if( jobError == CL_SUCCESS );
|
||||
jobError = err;
|
||||
gRunCount = 0;
|
||||
LeaveCriticalSection(&gAtomicLock);
|
||||
#elif defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
// set the new error if we are the first one there.
|
||||
__sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
__sync_synchronize();
|
||||
#elif defined( _MSC_VER )
|
||||
// set the new error if we are the first one there.
|
||||
_InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
_mm_mfence();
|
||||
#else
|
||||
if( pthread_mutex_lock(&gAtomicLock) )
|
||||
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
|
||||
if( jobError == CL_SUCCESS );
|
||||
jobError = err;
|
||||
gRunCount = 0;
|
||||
if( pthread_mutex_unlock(&gAtomicLock) )
|
||||
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// get the next item
|
||||
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
}
|
||||
|
||||
exit:
|
||||
log_info( "ThreadPool: thread %d exiting.\n", threadID );
|
||||
ThreadPool_AtomicAdd( &gThreadCount, -1 );
|
||||
#if !defined(_WIN32)
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||
// If the value is 0 (the default) the number of threads will be determined based on
|
||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
||||
// that we still get some testing for thread safety.
|
||||
//
|
||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
||||
// code will run single threaded, but will report an error to indicate that the test
|
||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
||||
//
|
||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
||||
// otherwise the behavior is indefined.
|
||||
void SetThreadCount( int count )
|
||||
{
|
||||
if( threadPoolInitErr == CL_SUCCESS )
|
||||
{
|
||||
log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
|
||||
abort();
|
||||
}
|
||||
|
||||
gThreadCount = count;
|
||||
}
|
||||
|
||||
void ThreadPool_Init(void)
|
||||
{
|
||||
cl_int i;
|
||||
int err;
|
||||
volatile cl_uint threadID = 0;
|
||||
|
||||
// Check for manual override of multithreading code. We add this for better debuggability.
|
||||
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
|
||||
{
|
||||
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Figure out how many threads to run -- check first for non-zero to give the implementation the chance
|
||||
if( 0 == gThreadCount )
|
||||
{
|
||||
#if defined(_MSC_VER) || defined (__MINGW64__)
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
|
||||
DWORD length = 0;
|
||||
|
||||
GetLogicalProcessorInformation( NULL, &length );
|
||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
|
||||
if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
||||
{
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
||||
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
|
||||
{
|
||||
if( ptr->Relationship == RelationProcessorCore )
|
||||
{
|
||||
// Count the number of bits in ProcessorMask (number of logical cores)
|
||||
ULONG mask = ptr->ProcessorMask;
|
||||
while( mask )
|
||||
{
|
||||
++gThreadCount;
|
||||
mask &= mask - 1; // Remove 1 bit at a time
|
||||
}
|
||||
}
|
||||
++ptr;
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
#elif defined (__MINGW32__)
|
||||
{
|
||||
#warning How about this, instead of hard coding it to 2?
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo( &sysinfo );
|
||||
gThreadCount = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#else // !_WIN32
|
||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||
#endif // !_WIN32
|
||||
|
||||
// Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
|
||||
if( 1 == gThreadCount )
|
||||
gThreadCount = 2;
|
||||
}
|
||||
|
||||
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
|
||||
if( gThreadCount < 2 )
|
||||
{
|
||||
log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined( _WIN32 )
|
||||
InitializeCriticalSection( gThreadPoolLock );
|
||||
InitializeCriticalSection( cond_lock );
|
||||
_InitializeConditionVariable( cond_var );
|
||||
caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
|
||||
#elif defined (__GNUC__)
|
||||
// Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
|
||||
// with some flavors of gcc compilers.
|
||||
pthread_cond_init(&cond_var, NULL);
|
||||
pthread_mutex_init(&cond_lock ,NULL);
|
||||
pthread_cond_init(&caller_cond_var, NULL);
|
||||
pthread_mutex_init(&caller_cond_lock, NULL);
|
||||
pthread_mutex_init(&gThreadPoolLock, NULL);
|
||||
#endif
|
||||
|
||||
#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
|
||||
pthread_mutex_initialize(gAtomicLock);
|
||||
#elif defined (__MINGW32__)
|
||||
InitializeCriticalSection(&gAtomicLock);
|
||||
#endif
|
||||
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
|
||||
// That would cause a deadlock.
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// init threads
|
||||
for( i = 0; i < gThreadCount; i++ )
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
|
||||
err = ( handle == 0 );
|
||||
#else // !_WIN32
|
||||
pthread_t tid = 0;
|
||||
err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
|
||||
#endif // !_WIN32
|
||||
if( err )
|
||||
{
|
||||
log_error( "Error %d launching thread %d\n", err, i );
|
||||
threadPoolInitErr = err;
|
||||
gThreadCount = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
atexit( ThreadPool_Exit );
|
||||
|
||||
// block until they are done launching.
|
||||
do
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
WaitForSingleObject( caller_event, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
pthread_mutex_unlock( &caller_cond_lock);
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
while( gRunCount != -gThreadCount );
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
threadPoolInitErr = CL_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
|
||||
{
|
||||
ThreadPool_Init();
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
void ThreadPool_Exit(void)
|
||||
{
|
||||
int err, count;
|
||||
gRunCount = CL_INT_MAX;
|
||||
|
||||
#if defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
__sync_synchronize();
|
||||
#elif defined( _MSC_VER )
|
||||
_mm_mfence();
|
||||
#else
|
||||
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
||||
#endif
|
||||
|
||||
// spin waiting for threads to die
|
||||
for (count = 0; 0 != gThreadCount && count < 1000; count++)
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
Sleep(1);
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_cond_broadcast( &cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
|
||||
break;
|
||||
}
|
||||
usleep(1000);
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
if( gThreadCount )
|
||||
log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
|
||||
else
|
||||
log_info( "Thread pool exited in a orderly fashion.\n" );
|
||||
}
|
||||
|
||||
|
||||
// Blocking API that farms out count jobs to a thread pool.
|
||||
// It may return with some work undone if func_ptr() returns a non-zero
|
||||
// result.
|
||||
//
|
||||
// This function obviously has its shortcommings. Only one call to ThreadPool_Do
|
||||
// can be running at a time. It is not intended for general purpose use.
|
||||
// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
|
||||
// all available then it would make more sense to use those features.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo )
|
||||
{
|
||||
cl_int newErr;
|
||||
cl_int err = 0;
|
||||
// Lazily set up our threads
|
||||
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
|
||||
err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
|
||||
#elif defined (_WIN32)
|
||||
if (threadpool_init_control == 0) {
|
||||
#warning This is buggy and race prone. Find a better way.
|
||||
ThreadPool_Init();
|
||||
threadpool_init_control = 1;
|
||||
}
|
||||
#else //posix platform
|
||||
err = pthread_once( &threadpool_init_control, ThreadPool_Init );
|
||||
if( err )
|
||||
{
|
||||
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
// Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
|
||||
if( threadPoolInitErr )
|
||||
{
|
||||
cl_uint currentJob = 0;
|
||||
cl_int result = CL_SUCCESS;
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
for( currentJob = 0; currentJob < count; currentJob++ )
|
||||
if((result = func_ptr( currentJob, 0, userInfo )))
|
||||
{
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
if( count >= MAX_COUNT )
|
||||
{
|
||||
log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Enter critical region
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( gThreadPoolLock );
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_mutex_lock( &gThreadPoolLock )))
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case EDEADLK:
|
||||
log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
|
||||
break;
|
||||
case EINVAL:
|
||||
log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Start modifying the job state observable by worker threads
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
|
||||
// That would cause a deadlock.
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Prime the worker threads to get going
|
||||
jobError = CL_SUCCESS;
|
||||
gRunCount = gJobCount = count;
|
||||
gFunc_ptr = func_ptr;
|
||||
gUserInfo = userInfo;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_cond_broadcast( &cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if((err = pthread_mutex_unlock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// block until they are done. It would be slightly more efficient to do some of the work here though.
|
||||
do
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
WaitForSingleObject( caller_event, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
pthread_mutex_unlock( &caller_cond_lock);
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
while( gRunning );
|
||||
#if !defined(_WIN32)
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
err = jobError;
|
||||
|
||||
exit:
|
||||
// exit critical region
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( gThreadPoolLock );
|
||||
#else // !_WIN32
|
||||
newErr = pthread_mutex_unlock( &gThreadPoolLock );
|
||||
if( newErr)
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
cl_uint GetThreadCount( void )
|
||||
{
|
||||
// Lazily set up our threads
|
||||
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
|
||||
cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
|
||||
#elif defined (_WIN32)
|
||||
if (threadpool_init_control == 0) {
|
||||
#warning This is buggy and race prone. Find a better way.
|
||||
ThreadPool_Init();
|
||||
threadpool_init_control = 1;
|
||||
}
|
||||
#else
|
||||
cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
|
||||
if( err )
|
||||
{
|
||||
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
if( gThreadCount < 1 )
|
||||
return 1;
|
||||
|
||||
return gThreadCount;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
|
||||
#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
|
||||
#endif
|
||||
//
|
||||
// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
|
||||
// of OpenCL API, while also checking
|
||||
//
|
||||
// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
|
||||
// It is not okay to use this for conformance testing!!!
|
||||
//
|
||||
// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code.
|
||||
//
|
||||
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
|
||||
{
|
||||
cl_uint r = *a;
|
||||
|
||||
// since this fallback code path is not multithreaded, we just do a regular add here
|
||||
// If your operating system supports memory-barrier-atomics, use those here
|
||||
*a = r + b;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// Blocking API that farms out count jobs to a thread pool.
|
||||
// It may return with some work undone if func_ptr() returns a non-zero
|
||||
// result.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo )
|
||||
{
|
||||
cl_uint currentJob = 0;
|
||||
cl_int result = CL_SUCCESS;
|
||||
|
||||
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
|
||||
// THIS FUNCTION IS NOT INTENDED FOR USE!!
|
||||
log_error( "ERROR: Test must be multithreaded!\n" );
|
||||
exit(-1);
|
||||
#else
|
||||
static int spewCount = 0;
|
||||
|
||||
if( 0 == spewCount )
|
||||
{
|
||||
log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
|
||||
spewCount = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// The multithreaded code should mimic this behavior:
|
||||
for( currentJob = 0; currentJob < count; currentJob++ )
|
||||
if((result = func_ptr( currentJob, 0, userInfo )))
|
||||
return result;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_uint GetThreadCount( void )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void SetThreadCount( int count )
|
||||
{
|
||||
if( count > 1 )
|
||||
log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
|
||||
}
|
||||
|
||||
#endif
|
||||
76
test_common/harness/ThreadPool.h
Normal file
76
test_common/harness/ThreadPool.h
Normal file
@@ -0,0 +1,76 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef THREAD_POOL_H
|
||||
#define THREAD_POOL_H
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// An atomic add operator
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
|
||||
|
||||
// Your function prototype
|
||||
//
|
||||
// A function pointer to the function you want to execute in a multithreaded context. No
|
||||
// synchronization primitives are provided, other than the atomic add above. You may not
|
||||
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
|
||||
// work, however.
|
||||
//
|
||||
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
|
||||
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
|
||||
// the work is done.
|
||||
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
|
||||
|
||||
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
|
||||
// Some workitems may not run if a non-zero result is returned from func_ptr().
|
||||
// This function may not be called from a TPFuncPtr.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo );
|
||||
|
||||
// Returns the number of worker threads that underlie the threadpool. The value passed
|
||||
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
|
||||
// This is safe to call from a TPFuncPtr.
|
||||
cl_uint GetThreadCount( void );
|
||||
|
||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||
// If the value is 0 (the default) the number of threads will be determined based on
|
||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
||||
// that we still get some testing for thread safety.
|
||||
//
|
||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
||||
// code will run single threaded, but will report an error to indicate that the test
|
||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
||||
//
|
||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
||||
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
|
||||
void SetThreadCount( int count );
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* THREAD_POOL_H */
|
||||
253
test_common/harness/clImageHelper.h
Normal file
253
test_common/harness/clImageHelper.h
Normal file
@@ -0,0 +1,253 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef test_conformance_clImageHelper_h
|
||||
#define test_conformance_clImageHelper_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include "errorHelpers.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
// helper function to replace clCreateImage2D , to make the existing code use
|
||||
// the functions of version 1.2 and veriosn 1.1 respectively
|
||||
|
||||
inline cl_mem create_image_2d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_row_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage = NULL;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_image_desc image_desc_dest;
|
||||
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
|
||||
image_desc_dest.image_width = image_width;
|
||||
image_desc_dest.image_height = image_height;
|
||||
image_desc_dest.image_depth= 0;// not usedfor 2d
|
||||
image_desc_dest.image_array_size = 0;// not used for 2d
|
||||
image_desc_dest.image_row_pitch = image_row_pitch;
|
||||
image_desc_dest.image_slice_pitch = 0;
|
||||
image_desc_dest.num_mip_levels = 0;
|
||||
image_desc_dest.num_samples = 0;
|
||||
image_desc_dest.buffer = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
||||
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
#else
|
||||
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_3d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_depth,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = image_height;
|
||||
image_desc.image_depth = image_depth;
|
||||
image_desc.image_array_size = 0;// not used for one image
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
#else
|
||||
mImage = clCreateImage3D( context,
|
||||
flags, image_format,
|
||||
image_width,
|
||||
image_height,
|
||||
image_depth,
|
||||
image_row_pitch,
|
||||
image_slice_pitch,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_2d_array (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_array_size,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = image_height;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_array_size = image_array_size;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = NULL;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_1d_array (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_array_size,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_array_size = image_array_size;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = NULL;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_1d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_row_pitch,
|
||||
void *host_ptr,
|
||||
cl_mem buffer,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = 0;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = buffer;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
200
test_common/harness/compat.h
Normal file
200
test_common/harness/compat.h
Normal file
@@ -0,0 +1,200 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _COMPAT_H_
|
||||
#define _COMPAT_H_
|
||||
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
|
||||
#include <Windows.h>
|
||||
#include <Winbase.h>
|
||||
#include <CL/cl.h>
|
||||
#include <float.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
||||
|
||||
#define isfinite(x) _finite(x)
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#define inline
|
||||
|
||||
#else
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef char int8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
|
||||
#define MAXPATHLEN MAX_PATH
|
||||
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
|
||||
|
||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||
//#define NAN (INFINITY | 1)
|
||||
//const static int PINFBITPATT_SP32 = INFINITY;
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846264338327950288
|
||||
#endif
|
||||
|
||||
|
||||
#define isnan( x ) ((x) != (x))
|
||||
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
||||
|
||||
double rint( double x);
|
||||
float rintf( float x);
|
||||
long double rintl( long double x);
|
||||
|
||||
float cbrtf( float );
|
||||
double cbrt( double );
|
||||
|
||||
int ilogb( double x);
|
||||
int ilogbf (float x);
|
||||
int ilogbl(long double x);
|
||||
|
||||
double fmax(double x, double y);
|
||||
double fmin(double x, double y);
|
||||
float fmaxf( float x, float y );
|
||||
float fminf(float x, float y);
|
||||
|
||||
double log2(double x);
|
||||
long double log2l(long double x);
|
||||
|
||||
double exp2(double x);
|
||||
long double exp2l(long double x);
|
||||
|
||||
double fdim(double x, double y);
|
||||
float fdimf(float x, float y);
|
||||
long double fdiml(long double x, long double y);
|
||||
|
||||
double remquo( double x, double y, int *quo);
|
||||
float remquof( float x, float y, int *quo);
|
||||
long double remquol( long double x, long double y, int *quo);
|
||||
|
||||
long double scalblnl(long double x, long n);
|
||||
|
||||
inline long long
|
||||
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||
|
||||
|
||||
// end of math functions
|
||||
|
||||
uint64_t ReadTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
|
||||
#define sleep(X) Sleep(1000*X)
|
||||
#define snprintf sprintf_s
|
||||
//#define hypotl _hypot
|
||||
|
||||
float make_nan();
|
||||
float nanf( const char* str);
|
||||
double nan( const char* str);
|
||||
long double nanl( const char* str);
|
||||
|
||||
//#if defined USE_BOOST
|
||||
//#include <boost/math/tr1.hpp>
|
||||
//double hypot(double x, double y);
|
||||
float hypotf(float x, float y);
|
||||
long double hypotl(long double x, long double y) ;
|
||||
double lgamma(double x);
|
||||
float lgammaf(float x);
|
||||
|
||||
double trunc(double x);
|
||||
float truncf(float x);
|
||||
|
||||
double log1p(double x);
|
||||
float log1pf(float x);
|
||||
long double log1pl(long double x);
|
||||
|
||||
double copysign(double x, double y);
|
||||
float copysignf(float x, float y);
|
||||
long double copysignl(long double x, long double y);
|
||||
|
||||
long lround(double x);
|
||||
long lroundf(float x);
|
||||
//long lroundl(long double x)
|
||||
|
||||
double round(double x);
|
||||
float roundf(float x);
|
||||
long double roundl(long double x);
|
||||
|
||||
int signbit(double x);
|
||||
int signbitf(float x);
|
||||
|
||||
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); }
|
||||
//#endif // USE_BOOST
|
||||
|
||||
long int lrint (double flt);
|
||||
long int lrintf (float flt);
|
||||
|
||||
|
||||
float int2float (int32_t ix);
|
||||
int32_t float2int (float fx);
|
||||
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
int __builtin_clz(unsigned int pattern);
|
||||
|
||||
|
||||
static const double zero= 0.00000000000000000000e+00;
|
||||
#define NAN (INFINITY - INFINITY)
|
||||
#define HUGE_VALF (float)HUGE_VAL
|
||||
|
||||
int usleep(int usec);
|
||||
|
||||
// reimplement fenv.h because windows doesn't have it
|
||||
#define FE_INEXACT 0x0020
|
||||
#define FE_UNDERFLOW 0x0010
|
||||
#define FE_OVERFLOW 0x0008
|
||||
#define FE_DIVBYZERO 0x0004
|
||||
#define FE_INVALID 0x0001
|
||||
#define FE_ALL_EXCEPT 0x003D
|
||||
|
||||
int fetestexcept(int excepts);
|
||||
int feclearexcept(int excepts);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#else // !((defined(_WIN32) && defined(_MSC_VER)
|
||||
#if defined(__MINGW32__)
|
||||
#include <windows.h>
|
||||
#define sleep(X) Sleep(1000*X)
|
||||
|
||||
#endif
|
||||
#define MAKE_HEX_FLOAT(x,y,z) x
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) x
|
||||
#define MAKE_HEX_LONG(x,y,z) x
|
||||
|
||||
#endif // !((defined(_WIN32) && defined(_MSC_VER)
|
||||
|
||||
|
||||
#endif // _COMPAT_H_
|
||||
1198
test_common/harness/conversions.c
Normal file
1198
test_common/harness/conversions.c
Normal file
File diff suppressed because it is too large
Load Diff
127
test_common/harness/conversions.h
Normal file
127
test_common/harness/conversions.h
Normal file
@@ -0,0 +1,127 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _conversions_h
|
||||
#define _conversions_h
|
||||
|
||||
#include "errorHelpers.h"
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include "compat.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Note: the next three all have to match in size and order!! */
|
||||
|
||||
enum ExplicitTypes
|
||||
{
|
||||
kBool = 0,
|
||||
kChar,
|
||||
kUChar,
|
||||
kUnsignedChar,
|
||||
kShort,
|
||||
kUShort,
|
||||
kUnsignedShort,
|
||||
kInt,
|
||||
kUInt,
|
||||
kUnsignedInt,
|
||||
kLong,
|
||||
kULong,
|
||||
kUnsignedLong,
|
||||
kFloat,
|
||||
kHalf,
|
||||
kDouble,
|
||||
kNumExplicitTypes
|
||||
};
|
||||
|
||||
typedef enum ExplicitTypes ExplicitType;
|
||||
|
||||
enum RoundingTypes
|
||||
{
|
||||
kRoundToEven = 0,
|
||||
kRoundToZero,
|
||||
kRoundToPosInf,
|
||||
kRoundToNegInf,
|
||||
kRoundToNearest,
|
||||
|
||||
kNumRoundingTypes,
|
||||
|
||||
kDefaultRoundingType = kRoundToNearest
|
||||
};
|
||||
|
||||
typedef enum RoundingTypes RoundingType;
|
||||
|
||||
extern void print_type_to_string(ExplicitType type, void *data, char* string);
|
||||
extern size_t get_explicit_type_size( ExplicitType type );
|
||||
extern const char * get_explicit_type_name( ExplicitType type );
|
||||
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
|
||||
|
||||
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
|
||||
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
|
||||
|
||||
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
|
||||
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
|
||||
extern float read_as_float( void *inRaw, ExplicitType inType );
|
||||
|
||||
extern float get_random_float(float low, float high, MTdata d);
|
||||
extern double get_random_double(double low, double high, MTdata d);
|
||||
extern float any_float( MTdata d );
|
||||
extern double any_double( MTdata d );
|
||||
|
||||
extern int random_in_range( int minV, int maxV, MTdata d );
|
||||
|
||||
size_t get_random_size_t(size_t low, size_t high, MTdata d);
|
||||
|
||||
// Note: though this takes a double, this is for use with single precision tests
|
||||
static inline int IsFloatSubnormal( float x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf(x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int IsDoubleSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs( x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _conversions_h
|
||||
|
||||
|
||||
585
test_common/harness/errorHelpers.c
Normal file
585
test_common/harness/errorHelpers.c
Normal file
@@ -0,0 +1,585 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "errorHelpers.h"
|
||||
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
const char *IGetErrorString( int clErrorCode )
|
||||
{
|
||||
switch( clErrorCode )
|
||||
{
|
||||
case CL_SUCCESS: return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
|
||||
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
|
||||
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
default: return "(unknown)";
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetChannelOrderName( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R: return "CL_R";
|
||||
case CL_A: return "CL_A";
|
||||
case CL_Rx: return "CL_Rx";
|
||||
case CL_RG: return "CL_RG";
|
||||
case CL_RA: return "CL_RA";
|
||||
case CL_RGx: return "CL_RGx";
|
||||
case CL_RGB: return "CL_RGB";
|
||||
case CL_RGBx: return "CL_RGBx";
|
||||
case CL_RGBA: return "CL_RGBA";
|
||||
case CL_ARGB: return "CL_ARGB";
|
||||
case CL_BGRA: return "CL_BGRA";
|
||||
case CL_INTENSITY: return "CL_INTENSITY";
|
||||
case CL_LUMINANCE: return "CL_LUMINANCE";
|
||||
#if defined CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
|
||||
#endif
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
|
||||
#endif
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int IsChannelOrderSupported( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
return 1;
|
||||
#if defined CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetChannelTypeName( cl_channel_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_SNORM_INT8: return "CL_SNORM_INT8";
|
||||
case CL_SNORM_INT16: return "CL_SNORM_INT16";
|
||||
case CL_UNORM_INT8: return "CL_UNORM_INT8";
|
||||
case CL_UNORM_INT16: return "CL_UNORM_INT16";
|
||||
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
|
||||
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
|
||||
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
|
||||
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
|
||||
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
|
||||
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
|
||||
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
|
||||
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
|
||||
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
|
||||
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
|
||||
case CL_FLOAT: return "CL_FLOAT";
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
|
||||
#endif
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int IsChannelTypeSupported( cl_channel_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
case CL_UNORM_INT_101010:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
return 1;
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetAddressModeName( cl_addressing_mode mode )
|
||||
{
|
||||
switch( mode )
|
||||
{
|
||||
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
||||
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
||||
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
|
||||
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
|
||||
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetDeviceTypeName( cl_device_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
||||
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
|
||||
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
|
||||
{
|
||||
static char scratch[ 1024 ];
|
||||
size_t i, j;
|
||||
|
||||
if( buffer == NULL )
|
||||
buffer = scratch;
|
||||
|
||||
unsigned char *p = (unsigned char *)dataBuffer;
|
||||
char *bPtr;
|
||||
|
||||
buffer[ 0 ] = 0;
|
||||
bPtr = buffer;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
if( i > 0 )
|
||||
{
|
||||
bPtr[ 0 ] = ' ';
|
||||
bPtr++;
|
||||
}
|
||||
for( j = 0; j < typeSize; j++ )
|
||||
{
|
||||
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
|
||||
bPtr += 2;
|
||||
}
|
||||
p += typeSize;
|
||||
}
|
||||
bPtr[ 0 ] = 0;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
#define scalbnf(_a, _i ) ldexpf( _a, _i )
|
||||
#define scalbn(_a, _i ) ldexp( _a, _i )
|
||||
#define scalbnl(_a, _i ) ldexpl( _a, _i )
|
||||
#endif
|
||||
|
||||
static float Ulp_Error_Half_Float( float test, double reference );
|
||||
static inline float half2float( cl_ushort half );
|
||||
|
||||
// taken from math tests
|
||||
#define HALF_MIN_EXP -13
|
||||
#define HALF_MANT_DIG 11
|
||||
static float Ulp_Error_Half_Float( float test, double reference )
|
||||
{
|
||||
union{ double d; uint64_t u; }u; u.d = reference;
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
double testVal = test;
|
||||
if( u.u & 0x000fffffffffffffULL )
|
||||
{ // Non-power of two and NaN
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( (double) test == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) (testVal - reference );
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
// Taken from vLoadHalf test
|
||||
static inline float half2float( cl_ushort us )
|
||||
{
|
||||
uint32_t u = us;
|
||||
uint32_t sign = (u << 16) & 0x80000000;
|
||||
int32_t exponent = (u & 0x7c00) >> 10;
|
||||
uint32_t mantissa = (u & 0x03ff) << 13;
|
||||
union{ unsigned int u; float f;}uu;
|
||||
|
||||
if( exponent == 0 )
|
||||
{
|
||||
if( mantissa == 0 )
|
||||
return sign ? -0.0f : 0.0f;
|
||||
|
||||
int shift = __builtin_clz( mantissa ) - 8;
|
||||
exponent -= shift-1;
|
||||
mantissa <<= shift;
|
||||
mantissa &= 0x007fffff;
|
||||
}
|
||||
else
|
||||
if( exponent == 31)
|
||||
{
|
||||
uu.u = mantissa | sign;
|
||||
if( mantissa )
|
||||
uu.u |= 0x7fc00000;
|
||||
else
|
||||
uu.u |= 0x7f800000;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
exponent += 127 - 15;
|
||||
exponent <<= 23;
|
||||
|
||||
exponent |= mantissa;
|
||||
uu.u = exponent | sign;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
float Ulp_Error_Half( cl_ushort test, float reference )
|
||||
{
|
||||
return Ulp_Error_Half_Float( half2float(test), reference );
|
||||
}
|
||||
|
||||
|
||||
float Ulp_Error( float test, double reference )
|
||||
{
|
||||
union{ double d; uint64_t u; }u; u.d = reference;
|
||||
double testVal = test;
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( testVal == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) (testVal - reference );
|
||||
}
|
||||
|
||||
if( isinf( testVal) )
|
||||
{ // infinite test value, but finite (but possibly overflowing in float) reference.
|
||||
//
|
||||
// The function probably overflowed prematurely here. Formally, the spec says this is
|
||||
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
|
||||
// that the internal precision of some half_pow implementations would have to be 29+ bits
|
||||
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
|
||||
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
|
||||
// after rounding to single is 4*32 = 128, which will ultimately result in premature
|
||||
// overflow, even though a good faith representation would be correct to within 2**-29
|
||||
// interally.
|
||||
|
||||
// In the interest of not requiring the implementation go to extraordinary lengths to
|
||||
// deliver a half precision function, we allow premature overflow within the limit
|
||||
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
|
||||
// 2**128, the next value that would appear in the number line if float had sufficient range.
|
||||
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
|
||||
|
||||
// Note that the same hack may not work in long double, which is not guaranteed to have
|
||||
// more range than double. It is not clear that premature overflow should be tolerated for
|
||||
// double.
|
||||
}
|
||||
|
||||
if( u.u & 0x000fffffffffffffULL )
|
||||
{ // Non-power of two and NaN
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
float Ulp_Error_Double( double test, long double reference )
|
||||
{
|
||||
// Deal with long double = double
|
||||
// On most systems long double is a higher precision type than double. They provide either
|
||||
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
|
||||
// That is sufficient to represent the accuracy of a floating point result to many more bits
|
||||
// than double and we can calculate sub-ulp errors. This is the standard system for which this
|
||||
// test suite is designed.
|
||||
//
|
||||
// On some systems double and long double are the same thing. Then we run into a problem,
|
||||
// because our representation of the infinitely precise result (passed in as reference above)
|
||||
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
|
||||
// reported error by half an ulp to take this into account. A more correct and permanent fix
|
||||
// would be to undertake refactoring the reference code to return results in this format:
|
||||
//
|
||||
// typedef struct DoubleReference
|
||||
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
|
||||
// double correctlyRoundedResult; // as best we can
|
||||
// double ulps; // plus a fractional amount to account for the difference
|
||||
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
|
||||
//
|
||||
// This would provide a useful higher-than-double precision format for everyone that we can use,
|
||||
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
||||
// that use a head to tail double double for long double.
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
|
||||
int x;
|
||||
long double testVal = test;
|
||||
if( 0.5L != frexpl( reference, &x) )
|
||||
{ // Non-power of two and NaN
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( testVal == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) ( testVal - reference );
|
||||
}
|
||||
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
||||
|
||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
||||
|
||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
|
||||
{
|
||||
int error;
|
||||
size_t size_ret;
|
||||
|
||||
// Does the program object exist?
|
||||
if (program != NULL) {
|
||||
|
||||
// Was the number of devices given
|
||||
if (num_devices == 0) {
|
||||
|
||||
// If zero devices were specified then allocate and query the device list from the context
|
||||
cl_context context;
|
||||
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
|
||||
test_error( error, "Unable to query program's context" );
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
|
||||
test_error( error, "Unable to query context's device size" );
|
||||
num_devices = size_ret / sizeof(cl_device_id);
|
||||
device_list = (cl_device_id *) malloc(size_ret);
|
||||
if (device_list == NULL) {
|
||||
print_error( error, "malloc failed" );
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
|
||||
test_error( error, "Unable to query context's devices" );
|
||||
|
||||
}
|
||||
|
||||
// For each device in the device_list
|
||||
unsigned int i;
|
||||
for (i = 0; i < num_devices; i++) {
|
||||
|
||||
// Get the build status
|
||||
cl_build_status build_status;
|
||||
error = clGetProgramBuildInfo(program,
|
||||
device_list[i],
|
||||
CL_PROGRAM_BUILD_STATUS,
|
||||
sizeof(build_status),
|
||||
&build_status,
|
||||
&size_ret);
|
||||
test_error( error, "Unable to query build status" );
|
||||
|
||||
// If the build failed then log the status, and allocate the build log, log it and free it
|
||||
if (build_status != CL_BUILD_SUCCESS) {
|
||||
|
||||
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
|
||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
|
||||
test_error( error, "Unable to query build log size" );
|
||||
char *build_log = (char *) malloc(size_ret);
|
||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
|
||||
test_error( error, "Unable to query build log" );
|
||||
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
|
||||
free(build_log);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Was the number of devices given
|
||||
if (num_devices == 0) {
|
||||
|
||||
// If zero devices were specified then free the device list
|
||||
free(device_list);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
149
test_common/harness/errorHelpers.h
Normal file
149
test_common/harness/errorHelpers.h
Normal file
@@ -0,0 +1,149 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _errorHelpers_h
|
||||
#define _errorHelpers_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
|
||||
// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
|
||||
// functions as described below. This is helpful for integration into an automated testing
|
||||
// system.
|
||||
#if USE_ATF
|
||||
// export BUILD_WITH_ATF=1
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define log_info ATFLogInfo
|
||||
#define log_error ATFLogError
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
|
||||
#define test_finish() ATFTestFinish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
|
||||
#define vlog ATFLogInfo
|
||||
#define vlog_error ATFLogError
|
||||
#else
|
||||
#define test_start()
|
||||
#define log_info printf
|
||||
#define log_error printf
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better", _number )
|
||||
#define test_finish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better" , _number)
|
||||
#ifdef _WIN32
|
||||
#ifdef __MINGW32__
|
||||
// Use __mingw_printf since it supports "%a" format specifier
|
||||
#define vlog __mingw_printf
|
||||
#define vlog_error __mingw_printf
|
||||
#else
|
||||
// Use home-baked function that treats "%a" as "%f"
|
||||
static int vlog_win32(const char *format, ...);
|
||||
#define vlog vlog_win32
|
||||
#define vlog_error vlog_win32
|
||||
#endif
|
||||
#else
|
||||
#define vlog_error printf
|
||||
#define vlog printf
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ct_assert(b) ct_assert_i(b, __LINE__)
|
||||
#define ct_assert_i(b, line) ct_assert_ii(b, line)
|
||||
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
|
||||
|
||||
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
|
||||
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
|
||||
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
|
||||
|
||||
// expected error code vs. what we got
|
||||
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
|
||||
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
|
||||
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
|
||||
extern const char *IGetErrorString( int clErrorCode );
|
||||
|
||||
extern float Ulp_Error_Half( cl_ushort test, float reference );
|
||||
extern float Ulp_Error( float test, double reference );
|
||||
extern float Ulp_Error_Double( double test, long double reference );
|
||||
|
||||
extern const char *GetChannelTypeName( cl_channel_type type );
|
||||
extern int IsChannelTypeSupported( cl_channel_type type );
|
||||
extern const char *GetChannelOrderName( cl_channel_order order );
|
||||
extern int IsChannelOrderSupported( cl_channel_order order );
|
||||
extern const char *GetAddressModeName( cl_addressing_mode mode );
|
||||
|
||||
extern const char *GetDeviceTypeName( cl_device_type type );
|
||||
|
||||
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
|
||||
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
|
||||
|
||||
#if defined (_WIN32) && !defined(__MINGW32__)
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static int vlog_win32(const char *format, ...)
|
||||
{
|
||||
const char *new_format = format;
|
||||
|
||||
if (strstr(format, "%a")) {
|
||||
char *temp;
|
||||
if ((temp = strdup(format)) == NULL) {
|
||||
printf("vlog_win32: Failed to allocate memory for strdup\n");
|
||||
return -1;
|
||||
}
|
||||
new_format = temp;
|
||||
while (*temp) {
|
||||
// replace %a with %f
|
||||
if ((*temp == '%') && (*(temp+1) == 'a')) {
|
||||
*(temp+1) = 'f';
|
||||
}
|
||||
temp++;
|
||||
}
|
||||
}
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
vprintf(new_format, args);
|
||||
va_end(args);
|
||||
|
||||
if (new_format != format) {
|
||||
free((void*)new_format);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _errorHelpers_h
|
||||
|
||||
|
||||
89
test_common/harness/fpcontrol.h
Normal file
89
test_common/harness/fpcontrol.h
Normal file
@@ -0,0 +1,89 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _fpcontrol_h
|
||||
#define _fpcontrol_h
|
||||
|
||||
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
|
||||
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
|
||||
// in integer code, and have found this is the only way to correctly verify operation.
|
||||
//
|
||||
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
|
||||
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
|
||||
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
||||
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
||||
typedef int FPU_mode_type;
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
extern __thread fpu_control_t fpu_control;
|
||||
#endif
|
||||
// Set the reference hardware floating point unit to FTZ mode
|
||||
static inline void ForceFTZ( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr( *mode | 0x8040);
|
||||
#elif defined( __PPC__ )
|
||||
*mode = fpu_control;
|
||||
fpu_control |= _FPU_MASK_NI;
|
||||
#elif defined ( __arm__ )
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
||||
#else
|
||||
#error ForceFTZ needs an implentation
|
||||
#endif
|
||||
}
|
||||
|
||||
// Disable the denorm flush to zero
|
||||
static inline void DisableFTZ( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr( *mode & ~0x8040);
|
||||
#elif defined( __PPC__ )
|
||||
*mode = fpu_control;
|
||||
fpu_control &= ~_FPU_MASK_NI;
|
||||
#elif defined ( __arm__ )
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
||||
#else
|
||||
#error DisableFTZ needs an implentation
|
||||
#endif
|
||||
}
|
||||
|
||||
// Restore the reference hardware to floating point state indicated by *mode
|
||||
static inline void RestoreFPState( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
_mm_setcsr( *mode );
|
||||
#elif defined( __PPC__)
|
||||
fpu_control = *mode;
|
||||
#elif defined (__arm__)
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
||||
#else
|
||||
#error RestoreFPState needs an implementation
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#error ForceFTZ and RestoreFPState need implentations
|
||||
#endif
|
||||
|
||||
#endif
|
||||
53
test_common/harness/genericThread.cpp
Normal file
53
test_common/harness/genericThread.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "genericThread.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#else // !_WIN32
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
void * genericThread::IStaticReflector( void * data )
|
||||
{
|
||||
genericThread *t = (genericThread *)data;
|
||||
return t->IRun();
|
||||
}
|
||||
|
||||
bool genericThread::Start( void )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
|
||||
return ( mHandle != NULL );
|
||||
#else // !_WIN32
|
||||
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
|
||||
return ( error == 0 );
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
void * genericThread::Join( void )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
|
||||
return NULL;
|
||||
#else // !_WIN32
|
||||
void * retVal;
|
||||
int error = pthread_join( (pthread_t)mHandle, &retVal );
|
||||
if( error != 0 )
|
||||
retVal = NULL;
|
||||
return retVal;
|
||||
#endif // !_WIN32
|
||||
}
|
||||
42
test_common/harness/genericThread.h
Normal file
42
test_common/harness/genericThread.h
Normal file
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _genericThread_h
|
||||
#define _genericThread_h
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
class genericThread
|
||||
{
|
||||
public:
|
||||
|
||||
virtual ~genericThread() {}
|
||||
|
||||
bool Start( void );
|
||||
void * Join( void );
|
||||
|
||||
protected:
|
||||
|
||||
virtual void * IRun( void ) = 0;
|
||||
|
||||
private:
|
||||
|
||||
void* mHandle;
|
||||
|
||||
static void * IStaticReflector( void * data );
|
||||
};
|
||||
|
||||
#endif // _genericThread_h
|
||||
|
||||
249
test_common/harness/imageHelpers.cpp
Normal file
249
test_common/harness/imageHelpers.cpp
Normal file
@@ -0,0 +1,249 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "imageHelpers.h"
|
||||
|
||||
size_t get_format_type_size( const cl_image_format *format )
|
||||
{
|
||||
return get_channel_data_type_size( format->image_channel_data_type );
|
||||
}
|
||||
|
||||
size_t get_channel_data_type_size( cl_channel_type channelType )
|
||||
{
|
||||
switch( channelType )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return 1;
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return sizeof( cl_short );
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
return sizeof( cl_int );
|
||||
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_SHORT_565_REV:
|
||||
case CL_UNORM_SHORT_555_REV:
|
||||
#endif
|
||||
return 2;
|
||||
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_8888:
|
||||
case CL_UNORM_INT_8888_REV:
|
||||
return 4;
|
||||
#endif
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_101010_REV:
|
||||
#endif
|
||||
return 4;
|
||||
|
||||
case CL_FLOAT:
|
||||
return sizeof( cl_float );
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_format_channel_count( const cl_image_format *format )
|
||||
{
|
||||
return get_channel_order_channel_count( format->image_channel_order );
|
||||
}
|
||||
|
||||
size_t get_channel_order_channel_count( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
return 1;
|
||||
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
return 2;
|
||||
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
return 3;
|
||||
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
#ifdef CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
#endif
|
||||
#ifdef CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
#endif
|
||||
return 4;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int is_format_signed( const cl_image_format *format )
|
||||
{
|
||||
switch( format->image_channel_data_type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_pixel_size( cl_image_format *format )
|
||||
{
|
||||
switch( format->image_channel_data_type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return get_format_channel_count( format );
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return get_format_channel_count( format ) * sizeof( cl_ushort );
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
return get_format_channel_count( format ) * sizeof( cl_int );
|
||||
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_SHORT_565_REV:
|
||||
case CL_UNORM_SHORT_555_REV:
|
||||
#endif
|
||||
return 2;
|
||||
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_8888:
|
||||
case CL_UNORM_INT_8888_REV:
|
||||
return 4;
|
||||
#endif
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_101010_REV:
|
||||
#endif
|
||||
return 4;
|
||||
|
||||
case CL_FLOAT:
|
||||
return get_format_channel_count( format ) * sizeof( cl_float );
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
|
||||
{
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Make sure each image format is supported */
|
||||
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
|
||||
return error;
|
||||
|
||||
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
|
||||
{
|
||||
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
|
||||
{
|
||||
*outFormat = formatList[ i ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
|
||||
{
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Make sure each image format is supported */
|
||||
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
|
||||
return error;
|
||||
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
formatList[ i ].image_channel_data_type == CL_FLOAT ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
|
||||
{
|
||||
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
|
||||
{
|
||||
*outFormat = formatList[ i ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
37
test_common/harness/imageHelpers.h
Normal file
37
test_common/harness/imageHelpers.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _imageHelpers_h
|
||||
#define _imageHelpers_h
|
||||
|
||||
#include "errorHelpers.h"
|
||||
|
||||
|
||||
extern size_t get_format_type_size( const cl_image_format *format );
|
||||
extern size_t get_channel_data_type_size( cl_channel_type channelType );
|
||||
extern size_t get_format_channel_count( const cl_image_format *format );
|
||||
extern size_t get_channel_order_channel_count( cl_channel_order order );
|
||||
extern int is_format_signed( const cl_image_format *format );
|
||||
extern size_t get_pixel_size( cl_image_format *format );
|
||||
|
||||
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
|
||||
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
||||
|
||||
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
|
||||
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
||||
|
||||
|
||||
#endif // _imageHelpers_h
|
||||
|
||||
684
test_common/harness/kernelHelpers.c
Normal file
684
test_common/harness/kernelHelpers.c
Normal file
@@ -0,0 +1,684 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "imageHelpers.h"
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
#include "mingw_compat.h"
|
||||
#endif
|
||||
|
||||
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
/* Create the program object from source */
|
||||
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
||||
if( *outProgram == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "clCreateProgramWithSource failed" );
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
int buildProgramFailed = 0;
|
||||
int printedSource = 0;
|
||||
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
unsigned int i;
|
||||
print_error(error, "clBuildProgram failed");
|
||||
buildProgramFailed = 1;
|
||||
printedSource = 1;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
}
|
||||
|
||||
// Verify the build status on all devices
|
||||
cl_uint deviceCount = 0;
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
log_error("No devices found for program.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
|
||||
if( NULL == devices )
|
||||
return -1;
|
||||
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_uint z;
|
||||
for( z = 0; z < deviceCount; z++ )
|
||||
{
|
||||
char deviceName[4096] = "";
|
||||
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
|
||||
if (error != CL_SUCCESS || deviceName[0] == '\0') {
|
||||
log_error("Device \"%d\" failed to return a name\n", z);
|
||||
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
||||
}
|
||||
|
||||
cl_build_status buildStatus;
|
||||
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
|
||||
char log[10240] = "";
|
||||
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
|
||||
|
||||
char statusString[64] = "";
|
||||
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
||||
sprintf(statusString, "CL_BUILD_SUCCESS");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
||||
sprintf(statusString, "CL_BUILD_NONE");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
||||
sprintf(statusString, "CL_BUILD_ERROR");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
||||
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
||||
else
|
||||
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
|
||||
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
|
||||
if (error != CL_SUCCESS || log[0]=='\0'){
|
||||
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
free( devices );
|
||||
return error;
|
||||
} else {
|
||||
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
||||
free( devices );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// In this case we've already printed out the code above.
|
||||
if (!printedSource)
|
||||
{
|
||||
unsigned int i;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
printedSource = 1;
|
||||
}
|
||||
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
|
||||
log_error( "%s\n", log );
|
||||
log_error( "\n----------\n" );
|
||||
free( devices );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* And create a kernel from it */
|
||||
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
||||
if( *outKernel == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "Unable to create kernel" );
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
|
||||
free( devices );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_device_version( cl_device_id id, size_t* major, size_t* minor)
|
||||
{
|
||||
cl_char buffer[ 4098 ];
|
||||
size_t length;
|
||||
|
||||
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
|
||||
cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
|
||||
test_error( error, "Unable to get device version string" );
|
||||
|
||||
char *p1 = (char *)buffer + strlen( "OpenCL " );
|
||||
char *p2;
|
||||
while( *p1 == ' ' )
|
||||
p1++;
|
||||
*major = strtol( p1, &p2, 10 );
|
||||
error = *p2 != '.';
|
||||
test_error(error, "ERROR: Version number must contain a decimal point!");
|
||||
*minor = strtol( ++p2, NULL, 10 );
|
||||
return error;
|
||||
}
|
||||
|
||||
int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
|
||||
{
|
||||
cl_device_id *devices;
|
||||
size_t size, maxCommonSize = 0;
|
||||
int numDevices, i, j, error;
|
||||
cl_uint numDims;
|
||||
size_t outSize;
|
||||
size_t sizeLimit[]={1,1,1};
|
||||
|
||||
|
||||
/* Assume fewer than 16 devices will be returned */
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
|
||||
test_error( error, "Unable to obtain list of devices size for context" );
|
||||
devices = (cl_device_id *)malloc(outSize);
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
|
||||
test_error( error, "Unable to obtain list of devices for context" );
|
||||
|
||||
numDevices = (int)( outSize / sizeof( cl_device_id ) );
|
||||
|
||||
for( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
||||
sizeLimit[0] = 1;
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
if (outLimits != NULL)
|
||||
{
|
||||
if (i == 0) {
|
||||
for (j=0; j<3; j++)
|
||||
outLimits[j] = sizeLimit[j];
|
||||
} else {
|
||||
for (j=0; j<(int)numDims; j++) {
|
||||
if (sizeLimit[j] < outLimits[j])
|
||||
outLimits[j] = sizeLimit[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(devices);
|
||||
|
||||
*outMaxSize = (unsigned int)maxCommonSize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t globalThreadSize, size_t *outMaxSize )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
|
||||
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
|
||||
the modulo test will succeed and break the loop anyway */
|
||||
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
int i, j;
|
||||
for (i=0 ; i<2; i++) {
|
||||
if (globalThreadSizes[i] > remainingSize)
|
||||
sizeForThisOne = remainingSize;
|
||||
else
|
||||
sizeForThisOne = globalThreadSizes[i];
|
||||
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
|
||||
outMaxSizes[i] = sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
for (j=0; j<=i; j++)
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
int i, j;
|
||||
for (i=0 ; i<3; i++) {
|
||||
if (globalThreadSizes[i] > remainingSize)
|
||||
sizeForThisOne = remainingSize;
|
||||
else
|
||||
sizeForThisOne = globalThreadSizes[i];
|
||||
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
|
||||
outMaxSizes[i] = sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
for (j=0; j<=i; j++)
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
int is_extension_available( cl_device_id device, const char *extensionName )
|
||||
{
|
||||
char *extString;
|
||||
size_t size = 0;
|
||||
int err;
|
||||
int result = 0;
|
||||
|
||||
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
|
||||
{
|
||||
log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( 0 == size )
|
||||
return 0;
|
||||
|
||||
extString = (char*) malloc( size );
|
||||
if( NULL == extString )
|
||||
{
|
||||
log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
|
||||
{
|
||||
log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
free( extString );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( strstr( extString, extensionName ) )
|
||||
result = 1;
|
||||
|
||||
free( extString );
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Helper to determine if a device supports an image format */
|
||||
int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
|
||||
{
|
||||
cl_image_format *list;
|
||||
cl_uint count = 0;
|
||||
cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
|
||||
if( count == 0 )
|
||||
return 0;
|
||||
|
||||
list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
|
||||
if( NULL == list )
|
||||
{
|
||||
log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
|
||||
if( error )
|
||||
{
|
||||
log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
free( list );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// iterate looking for a match.
|
||||
cl_uint i;
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
|
||||
fmt->image_channel_order == list[ i ].image_channel_order )
|
||||
break;
|
||||
}
|
||||
|
||||
free( list );
|
||||
return ( i < count ) ? true : false;
|
||||
}
|
||||
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt )
|
||||
{
|
||||
size_t chanCount;
|
||||
switch( fmt->image_channel_order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
chanCount = 1;
|
||||
break;
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
chanCount = 2;
|
||||
break;
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
chanCount = 3;
|
||||
break;
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
#ifdef CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
#endif
|
||||
#ifdef CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
#endif
|
||||
chanCount = 4;
|
||||
break;
|
||||
default:
|
||||
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
|
||||
switch( fmt->image_channel_data_type )
|
||||
{
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
return 2;
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
return 4;
|
||||
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return chanCount;
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return chanCount * 2;
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_FLOAT:
|
||||
return chanCount * 4;
|
||||
|
||||
default:
|
||||
log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int verifyImageSupport( cl_device_id device )
|
||||
{
|
||||
if( checkForImageSupport( device ) )
|
||||
{
|
||||
log_error( "ERROR: Device does not supported images as required by this test!\n" );
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkForImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkFor3DImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
char profile[128];
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
|
||||
test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
|
||||
if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
|
||||
{
|
||||
size_t width = -1L;
|
||||
size_t height = -1L;
|
||||
size_t depth = -1L;
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
|
||||
|
||||
if( 0 == (height | width | depth ))
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void * align_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
#error "Please add support OS for aligned malloc"
|
||||
#endif
|
||||
}
|
||||
|
||||
void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
return free(ptr);
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_free(ptr);
|
||||
#else
|
||||
#error "Please add support OS for aligned free"
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t get_min_alignment(cl_context context)
|
||||
{
|
||||
static cl_uint align_size = 0;
|
||||
|
||||
if( 0 == align_size )
|
||||
{
|
||||
cl_device_id * devices;
|
||||
size_t devices_size = 0;
|
||||
cl_uint result = 0;
|
||||
cl_int error;
|
||||
int i;
|
||||
|
||||
error = clGetContextInfo (context,
|
||||
CL_CONTEXT_DEVICES,
|
||||
0,
|
||||
NULL,
|
||||
&devices_size);
|
||||
test_error_ret(error, "clGetContextInfo failed", 0);
|
||||
|
||||
devices = (cl_device_id*)malloc(devices_size);
|
||||
if (devices == NULL) {
|
||||
print_error( error, "malloc failed" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = clGetContextInfo (context,
|
||||
CL_CONTEXT_DEVICES,
|
||||
devices_size,
|
||||
(void*)devices,
|
||||
NULL);
|
||||
test_error_ret(error, "clGetContextInfo failed", 0);
|
||||
|
||||
for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
|
||||
{
|
||||
cl_uint alignment = 0;
|
||||
|
||||
error = clGetDeviceInfo (devices[i],
|
||||
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
||||
sizeof(cl_uint),
|
||||
(void*)&alignment,
|
||||
NULL);
|
||||
|
||||
if (error == CL_SUCCESS)
|
||||
{
|
||||
alignment >>= 3; // convert bits to bytes
|
||||
result = (alignment > result) ? alignment : result;
|
||||
}
|
||||
else
|
||||
print_error( error, "clGetDeviceInfo failed" );
|
||||
}
|
||||
|
||||
align_size = result;
|
||||
free(devices);
|
||||
}
|
||||
|
||||
return align_size;
|
||||
}
|
||||
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device )
|
||||
{
|
||||
char profileStr[128] = "";
|
||||
cl_device_fp_config single = 0;
|
||||
int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
|
||||
if( error )
|
||||
test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
|
||||
|
||||
if( single & CL_FP_ROUND_TO_NEAREST )
|
||||
return CL_FP_ROUND_TO_NEAREST;
|
||||
|
||||
if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
|
||||
test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
|
||||
|
||||
// Make sure we are an embedded device before allowing a pass
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
|
||||
test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
|
||||
|
||||
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
|
||||
test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
|
||||
|
||||
return CL_FP_ROUND_TO_ZERO;
|
||||
}
|
||||
|
||||
int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
|
||||
{
|
||||
cl_command_queue_properties realProps;
|
||||
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
|
||||
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
|
||||
|
||||
return ( realProps & prop ) ? 1 : 0;
|
||||
}
|
||||
|
||||
int printDeviceHeader( cl_device_id device )
|
||||
{
|
||||
char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
|
||||
int error;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
|
||||
|
||||
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
|
||||
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
|
||||
( error == CL_SUCCESS ) ? cLangVersion : "" );
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
131
test_common/harness/kernelHelpers.h
Normal file
131
test_common/harness/kernelHelpers.h
Normal file
@@ -0,0 +1,131 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _kernelHelpers_h
|
||||
#define _kernelHelpers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
/*
|
||||
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
|
||||
*
|
||||
* const char *source = {
|
||||
* INIT_OPENCL_DEBUG_INFO
|
||||
* "__kernel void foo( int x )\n"
|
||||
* "{\n"
|
||||
* " ...\n"
|
||||
* "}\n"
|
||||
* };
|
||||
*/
|
||||
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
|
||||
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
|
||||
#ifndef STRINGIFY_VALUE
|
||||
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
|
||||
#endif
|
||||
#ifndef STRINGIFY
|
||||
#define STRINGIFY(_x) #_x
|
||||
#endif
|
||||
|
||||
/* Helper that creates a single program and kernel from a single-kernel program source */
|
||||
extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
||||
|
||||
/* Helper to get major/minor number for a device */
|
||||
extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
|
||||
|
||||
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
|
||||
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
extern int is_extension_available( cl_device_id device, const char *extensionName );
|
||||
|
||||
/* Helper to determine if a device supports an image format */
|
||||
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
|
||||
|
||||
/* Helper to get pixel size for a pixel format */
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
|
||||
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
|
||||
extern int verifyImageSupport( cl_device_id device );
|
||||
|
||||
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
|
||||
extern int checkForImageSupport( cl_device_id device );
|
||||
extern int checkFor3DImageSupport( cl_device_id device );
|
||||
|
||||
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
|
||||
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
|
||||
|
||||
/* Helper for aligned memory allocation */
|
||||
void * align_malloc(size_t size, size_t alignment);
|
||||
void align_free(void *);
|
||||
|
||||
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
|
||||
size_t get_min_alignment(cl_context context);
|
||||
|
||||
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
|
||||
|
||||
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
|
||||
if( checkForImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
|
||||
if( checkFor3DImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
/* Prints out the standard device header for all tests given the device to print for */
|
||||
extern int printDeviceHeader( cl_device_id device );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // _kernelHelpers_h
|
||||
59
test_common/harness/mingw_compat.c
Normal file
59
test_common/harness/mingw_compat.c
Normal file
@@ -0,0 +1,59 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if defined(__MINGW32__)
|
||||
|
||||
#include "mingw_compat.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
//This function is unavailable on various mingw compilers,
|
||||
//especially 64 bit so implementing it here
|
||||
const char *basename_dot=".";
|
||||
char*
|
||||
basename(char *path)
|
||||
{
|
||||
char *p = path, *b = NULL;
|
||||
int len = strlen(path);
|
||||
|
||||
if (path == NULL) {
|
||||
return (char*)basename_dot;
|
||||
}
|
||||
|
||||
// Not absolute path on windows
|
||||
if (path[1] != ':') {
|
||||
return path;
|
||||
}
|
||||
|
||||
// Trim trailing path seperators
|
||||
if (path[len - 1] == '\\' ||
|
||||
path[len - 1] == '/' ) {
|
||||
len--;
|
||||
path[len] = '\0';
|
||||
}
|
||||
|
||||
while (len) {
|
||||
while((*p != '\\' || *p != '/') && len) {
|
||||
p++;
|
||||
len--;
|
||||
}
|
||||
p++;
|
||||
b = p;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
#endif
|
||||
31
test_common/harness/mingw_compat.h
Normal file
31
test_common/harness/mingw_compat.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef MINGW_COMPAT_H
|
||||
#define MINGW_COMPAT_H
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
char *basename(char *path);
|
||||
#include <malloc.h>
|
||||
|
||||
#if defined(__MINGW64__)
|
||||
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
|
||||
#define __mingw_aligned_malloc _aligned_malloc
|
||||
#define __mingw_aligned_free _aligned_free
|
||||
#include <stddef.h>
|
||||
#endif //(__MINGW64__)
|
||||
|
||||
#endif //(__MINGW32__)
|
||||
#endif // MINGW_COMPAT_H
|
||||
749
test_common/harness/msvc9.c
Normal file
749
test_common/harness/msvc9.c
Normal file
@@ -0,0 +1,749 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
|
||||
#include "compat.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <assert.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// rint, rintf
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
float copysignf( float x, float y )
|
||||
{
|
||||
union{ cl_uint u; float f; }ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
double copysign( double x, double y )
|
||||
{
|
||||
union{ cl_ulong u; double f; }ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
long double copysignl( long double x, long double y )
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
||||
}ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
float rintf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysignf( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
double rint(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysign( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long double rintl(long double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
|
||||
{
|
||||
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
|
||||
long double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysignl( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ilogb, ilogbf, ilogbl
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
#ifndef FP_ILOGB0
|
||||
#define FP_ILOGB0 INT_MIN
|
||||
#endif
|
||||
|
||||
#ifndef FP_ILOGBNAN
|
||||
#define FP_ILOGBNAN INT_MIN
|
||||
#endif
|
||||
|
||||
int ilogb (double x)
|
||||
{
|
||||
union{ double f; cl_ulong u;} u;
|
||||
u.f = x;
|
||||
|
||||
cl_ulong absx = u.u & CL_LONG_MAX;
|
||||
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
|
||||
{
|
||||
switch( absx )
|
||||
{
|
||||
case 0:
|
||||
return FP_ILOGB0;
|
||||
case 0x7ff0000000000000ULL:
|
||||
return INT_MAX;
|
||||
default:
|
||||
if( absx > 0x7ff0000000000000ULL )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
// subnormal
|
||||
u.u = absx | 0x3ff0000000000000ULL;
|
||||
u.f -= 1.0;
|
||||
return (u.u >> 52) - (1023 + 1022);
|
||||
}
|
||||
}
|
||||
|
||||
return (absx >> 52) - 1023;
|
||||
}
|
||||
|
||||
|
||||
int ilogbf (float x)
|
||||
{
|
||||
union{ float f; cl_uint u;} u;
|
||||
u.f = x;
|
||||
|
||||
cl_uint absx = u.u & 0x7fffffff;
|
||||
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
|
||||
{
|
||||
switch( absx )
|
||||
{
|
||||
case 0:
|
||||
return FP_ILOGB0;
|
||||
case 0x7f800000U:
|
||||
return INT_MAX;
|
||||
default:
|
||||
if( absx > 0x7f800000 )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
// subnormal
|
||||
u.u = absx | 0x3f800000U;
|
||||
u.f -= 1.0f;
|
||||
return (u.u >> 23) - (127 + 126);
|
||||
}
|
||||
}
|
||||
|
||||
return (absx >> 23) - 127;
|
||||
}
|
||||
|
||||
int ilogbl (long double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
||||
} u;
|
||||
u.f = x;
|
||||
|
||||
int exp = u.u.sexp & 0x7fff;
|
||||
if( 0 == exp )
|
||||
{
|
||||
if( 0 == u.u.m )
|
||||
return FP_ILOGB0;
|
||||
|
||||
//subnormal
|
||||
u.u.sexp = 0x3fff;
|
||||
u.f -= 1.0f;
|
||||
exp = u.u.sexp & 0x7fff;
|
||||
|
||||
return exp - (0x3fff + 0x3ffe);
|
||||
}
|
||||
else if( 0x7fff == exp )
|
||||
{
|
||||
if( u.u.m & CL_LONG_MAX )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
return exp - 0x3fff;
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// fmax, fmin, fmaxf, fminf
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
static void GET_BITS_SP32(float fx, unsigned int* ux)
|
||||
{
|
||||
volatile union {float f; unsigned int u;} _bitsy;
|
||||
_bitsy.f = (fx);
|
||||
*ux = _bitsy.u;
|
||||
}
|
||||
/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
|
||||
/* { */
|
||||
/* volatile union {float f; unsigned int i;} _bitsy; */
|
||||
/* _bitsy.f = (fx); */
|
||||
/* *ux = _bitsy.i; */
|
||||
/* } */
|
||||
static void PUT_BITS_SP32(unsigned int ux, float* fx)
|
||||
{
|
||||
volatile union {float f; unsigned int u;} _bitsy;
|
||||
_bitsy.u = (ux);
|
||||
*fx = _bitsy.f;
|
||||
}
|
||||
/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
|
||||
/* { */
|
||||
/* volatile union {float f; unsigned int i;} _bitsy; */
|
||||
/* _bitsy.i = (ux); */
|
||||
/* *fx = _bitsy.f; */
|
||||
/* } */
|
||||
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
|
||||
{
|
||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
||||
_bitsy.d = (dx);
|
||||
*lx = _bitsy.l;
|
||||
}
|
||||
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
|
||||
{
|
||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
||||
_bitsy.l = (lx);
|
||||
*dx = _bitsy.d;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int SIGNBIT_DP64(double x )
|
||||
{
|
||||
int hx;
|
||||
_GET_HIGH_WORD(hx,x);
|
||||
return((hx>>31));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* fmax(x, y) returns the larger (more positive) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
/* This works so long as the compiler knows that (x != x) means
|
||||
that x is NaN; gcc does. */
|
||||
double fmax(double x, double y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x >= y ? x : y;
|
||||
}
|
||||
|
||||
|
||||
/* fmin(x, y) returns the smaller (more negative) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
double fmin(double x, double y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x <= y ? x : y;
|
||||
}
|
||||
|
||||
|
||||
float fmaxf( float x, float y )
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x >= y ? x : y;
|
||||
}
|
||||
|
||||
/* fminf(x, y) returns the smaller (more negative) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
float fminf(float x, float y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x <= y ? x : y;
|
||||
}
|
||||
|
||||
long double scalblnl(long double x, long n)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double d;
|
||||
struct{ cl_ulong m; cl_ushort sexp;}u;
|
||||
}u;
|
||||
u.u.m = CL_LONG_MIN;
|
||||
|
||||
if( x == 0.0L || n < -2200)
|
||||
return copysignl( 0.0L, x );
|
||||
|
||||
if( n > 2200 )
|
||||
return INFINITY;
|
||||
|
||||
if( n < 0 )
|
||||
{
|
||||
u.u.sexp = 0x3fff - 1022;
|
||||
while( n <= -1022 )
|
||||
{
|
||||
x *= u.d;
|
||||
n += 1022;
|
||||
}
|
||||
u.u.sexp = 0x3fff + n;
|
||||
x *= u.d;
|
||||
return x;
|
||||
}
|
||||
|
||||
if( n > 0 )
|
||||
{
|
||||
u.u.sexp = 0x3fff + 1023;
|
||||
while( n >= 1023 )
|
||||
{
|
||||
x *= u.d;
|
||||
n -= 1023;
|
||||
}
|
||||
u.u.sexp = 0x3fff + n;
|
||||
x *= u.d;
|
||||
return x;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// log2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
const static cl_double log_e_base2 = 1.4426950408889634074;
|
||||
const static cl_double log_10_base2 = 3.3219280948873623478;
|
||||
|
||||
//double log10(double x);
|
||||
|
||||
double log2(double x)
|
||||
{
|
||||
return 1.44269504088896340735992468100189214 * log(x);
|
||||
}
|
||||
|
||||
long double log2l(long double x)
|
||||
{
|
||||
return 1.44269504088896340735992468100189214L * log(x);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// misc functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
// This function is commented out because the Windows implementation should never call munmap.
|
||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||
int munmap(void *addr, size_t len)
|
||||
{
|
||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||
|
||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||
}
|
||||
*/
|
||||
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
LARGE_INTEGER current;
|
||||
QueryPerformanceCounter(¤t);
|
||||
return (uint64_t)current.QuadPart;
|
||||
}
|
||||
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
static double PerformanceFrequency = 0.0;
|
||||
|
||||
if (PerformanceFrequency == 0.0) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
PerformanceFrequency = (double) frequency.QuadPart;
|
||||
}
|
||||
|
||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||
}
|
||||
|
||||
float make_nan()
|
||||
{
|
||||
/* This is the IEEE 754 single-precision format:
|
||||
unsigned int mantissa: 22;
|
||||
unsigned int quiet_nan: 1;
|
||||
unsigned int exponent: 8;
|
||||
unsigned int negative: 1;
|
||||
*/
|
||||
//const static unsigned
|
||||
static const int32_t _nan = 0x7fc00000;
|
||||
return *(const float*)(&_nan);
|
||||
}
|
||||
|
||||
float nanf( const char* str)
|
||||
{
|
||||
cl_uint u = atoi( str );
|
||||
u |= 0x7fc00000U;
|
||||
return *( float*)(&u);
|
||||
}
|
||||
|
||||
|
||||
double nan( const char* str)
|
||||
{
|
||||
cl_ulong u = atoi( str );
|
||||
u |= 0x7ff8000000000000ULL;
|
||||
return *( double*)(&u);
|
||||
}
|
||||
|
||||
// double check this implementatation
|
||||
long double nanl( const char* str)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
||||
}u;
|
||||
u.u.sexp = 0x7fff;
|
||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
||||
|
||||
return u.f;
|
||||
}
|
||||
|
||||
double trunc(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
||||
{
|
||||
cl_long rounded = x;
|
||||
x = copysign( (double) rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float truncf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
cl_int rounded = x;
|
||||
x = copysignf( (float) rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long lround(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 0.5 )
|
||||
return 0;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||
{
|
||||
absx += 0.5;
|
||||
cl_long rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysign( absx, x );
|
||||
}
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
return (long) x;
|
||||
}
|
||||
|
||||
long lroundf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 0.5f )
|
||||
return 0;
|
||||
|
||||
if( absx < 8388608.0f )
|
||||
{
|
||||
absx += 0.5f;
|
||||
cl_int rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignf( absx, x );
|
||||
}
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
return (long) x;
|
||||
}
|
||||
|
||||
double round(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 0.5 )
|
||||
return copysign( 0.0, x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||
{
|
||||
absx += 0.5;
|
||||
cl_long rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysign( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float roundf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 0.5f )
|
||||
return copysignf( 0.0f, x);
|
||||
|
||||
if( absx < 8388608.0f )
|
||||
{
|
||||
absx += 0.5f;
|
||||
cl_int rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignf( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long double roundl(long double x)
|
||||
{
|
||||
long double absx = fabsl(x);
|
||||
|
||||
if( absx < 0.5L )
|
||||
return copysignl( 0.0L, x);
|
||||
|
||||
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
|
||||
{
|
||||
absx += 0.5L;
|
||||
cl_ulong rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignl( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
int signbit(double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
double f;
|
||||
cl_ulong u;
|
||||
}u;
|
||||
u.f = x;
|
||||
return u.u >> 63;
|
||||
}
|
||||
|
||||
int signbitf(float x)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
cl_uint u;
|
||||
}u;
|
||||
u.f = x;
|
||||
return u.u >> 31;
|
||||
}
|
||||
|
||||
float cbrtf( float x )
|
||||
{
|
||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
||||
return copysignf( z, x );
|
||||
}
|
||||
|
||||
double cbrt( double x )
|
||||
{
|
||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
||||
}
|
||||
|
||||
float int2float (int32_t ix)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} u;
|
||||
u.i = ix;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
int32_t float2int (float fx)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} u;
|
||||
u.f = fx;
|
||||
return u.i;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER) && !defined(_WIN64)
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
int __builtin_clz(unsigned int pattern)
|
||||
{
|
||||
#if 0
|
||||
int res;
|
||||
__asm {
|
||||
mov eax, pattern
|
||||
bsr eax, eax
|
||||
mov res, eax
|
||||
}
|
||||
return 31 - res;
|
||||
#endif
|
||||
unsigned long index;
|
||||
unsigned char res = _BitScanReverse( &index, pattern);
|
||||
if (res) {
|
||||
return 8*sizeof(int) - 1 - index;
|
||||
} else {
|
||||
return 8*sizeof(int);
|
||||
}
|
||||
}
|
||||
#else
|
||||
int __builtin_clz(unsigned int pattern)
|
||||
{
|
||||
int count;
|
||||
if (pattern == 0u) {
|
||||
return 32;
|
||||
}
|
||||
count = 31;
|
||||
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
|
||||
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
|
||||
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
|
||||
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
|
||||
if (pattern >= 1u<<1) { count -= 1; }
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif //defined(_MSC_VER) && !defined(_WIN64)
|
||||
|
||||
#include <intrin.h>
|
||||
#include <emmintrin.h>
|
||||
long int lrint (double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
long int lrintf (float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
int usleep(int usec)
|
||||
{
|
||||
Sleep((usec + 999) / 1000);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fetestexcept(int excepts)
|
||||
{
|
||||
unsigned int status = _statusfp();
|
||||
return excepts & (
|
||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
||||
);
|
||||
}
|
||||
|
||||
int feclearexcept(int excepts)
|
||||
{
|
||||
_clearfp();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif //defined(_WIN32)
|
||||
274
test_common/harness/mt19937.c
Normal file
274
test_common/harness/mt19937.c
Normal file
@@ -0,0 +1,274 @@
|
||||
/*
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
||||
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
||||
|
||||
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mt19937.h"
|
||||
#include "mingw_compat.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
static void * align_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
#error "Please add support OS for aligned malloc"
|
||||
#endif
|
||||
}
|
||||
|
||||
static void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
return free(ptr);
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_free(ptr);
|
||||
#else
|
||||
#error "Please add support OS for aligned free"
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* Period parameters */
|
||||
#define N 624 /* vector code requires multiple of 4 here */
|
||||
#define M 397
|
||||
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
|
||||
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
|
||||
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
|
||||
|
||||
typedef struct _MTdata
|
||||
{
|
||||
cl_uint mt[N];
|
||||
#ifdef __SSE2__
|
||||
cl_uint cache[N];
|
||||
#endif
|
||||
cl_int mti;
|
||||
}_MTdata;
|
||||
|
||||
/* initializes mt[N] with a seed */
|
||||
MTdata init_genrand(cl_uint s)
|
||||
{
|
||||
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
|
||||
if( NULL != r )
|
||||
{
|
||||
cl_uint *mt = r->mt;
|
||||
int mti = 0;
|
||||
mt[0]= s; // & 0xffffffffUL;
|
||||
for (mti=1; mti<N; mti++) {
|
||||
mt[mti] = (cl_uint)
|
||||
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
|
||||
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
||||
/* In the previous versions, MSBs of the seed affect */
|
||||
/* only MSBs of the array mt[]. */
|
||||
/* 2002/01/09 modified by Makoto Matsumoto */
|
||||
// mt[mti] &= 0xffffffffUL;
|
||||
/* for >32 bit machines */
|
||||
}
|
||||
r->mti = mti;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void free_mtdata( MTdata d )
|
||||
{
|
||||
if(d)
|
||||
align_free(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata d)
|
||||
{
|
||||
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
||||
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
|
||||
#ifdef __SSE2__
|
||||
static volatile int init = 0;
|
||||
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
|
||||
#endif
|
||||
|
||||
|
||||
cl_uint *mt = d->mt;
|
||||
cl_uint y;
|
||||
|
||||
if (d->mti == N)
|
||||
{ /* generate N words at one time */
|
||||
int kk;
|
||||
|
||||
#ifdef __SSE2__
|
||||
if( 0 == init )
|
||||
{
|
||||
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
|
||||
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
|
||||
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
||||
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
|
||||
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
|
||||
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
|
||||
init = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
kk = 0;
|
||||
#ifdef __SSE2__
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-M; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
}
|
||||
#endif
|
||||
for ( ;kk<N-M;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
// advance to next aligned location
|
||||
for (;kk<N-1 && (kk & 3);kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-1; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;kk<N-1;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
|
||||
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
|
||||
#ifdef __SSE2__
|
||||
// Do the tempering ahead of time in vector code
|
||||
for( kk = 0; kk + 4 <= N; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
|
||||
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
|
||||
}
|
||||
#endif
|
||||
|
||||
d->mti = 0;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
y = d->cache[d->mti++];
|
||||
#else
|
||||
y = mt[d->mti++];
|
||||
|
||||
/* Tempering */
|
||||
y ^= (y >> 11);
|
||||
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
y ^= (y >> 18);
|
||||
#endif
|
||||
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
cl_ulong genrand_int64( MTdata d)
|
||||
{
|
||||
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967295.0);
|
||||
/* divided by 2^32-1 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3(MTdata d)
|
||||
{
|
||||
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53(MTdata d)
|
||||
{
|
||||
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
|
||||
return(a*67108864.0+b)*(1.0/9007199254740992.0);
|
||||
}
|
||||
99
test_common/harness/mt19937.h
Normal file
99
test_common/harness/mt19937.h
Normal file
@@ -0,0 +1,99 @@
|
||||
|
||||
/*
|
||||
* mt19937.h
|
||||
*
|
||||
* Mersenne Twister.
|
||||
*
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
||||
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
||||
*/
|
||||
|
||||
#ifndef MT19937_H
|
||||
#define MT19937_H 1
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/cl_platform.h>
|
||||
#else
|
||||
#include <CL/cl_platform.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Interfaces here have been modified from original sources so that they
|
||||
* are safe to call reentrantly, so long as a different MTdata is used
|
||||
* on each thread.
|
||||
*/
|
||||
|
||||
typedef struct _MTdata *MTdata;
|
||||
|
||||
/* Create the random number generator with seed */
|
||||
MTdata init_genrand( cl_uint /*seed*/ );
|
||||
|
||||
/* release memory used by a MTdata private data */
|
||||
void free_mtdata( MTdata /*data*/ );
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
|
||||
cl_ulong genrand_int64( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53( MTdata /*data*/ );
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MT19937_H */
|
||||
49
test_common/harness/ref_counting.h
Normal file
49
test_common/harness/ref_counting.h
Normal file
@@ -0,0 +1,49 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _ref_counting_h
|
||||
#define _ref_counting_h
|
||||
|
||||
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type );
|
||||
|
||||
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount_new; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type ); \
|
||||
if( c##_refCount != c##_refCount_new ) \
|
||||
{ \
|
||||
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
|
||||
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
|
||||
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
|
||||
|
||||
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
||||
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
||||
|
||||
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
|
||||
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
|
||||
|
||||
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
|
||||
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
|
||||
|
||||
#endif // _ref_counting_h
|
||||
175
test_common/harness/rounding_mode.c
Normal file
175
test_common/harness/rounding_mode.c
Normal file
@@ -0,0 +1,175 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "rounding_mode.h"
|
||||
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
const int *p = int_rounds;
|
||||
if( outType == kfloat || outType == kdouble )
|
||||
p = flt_rounds;
|
||||
int oldRound = fegetround();
|
||||
fesetround( p[r] );
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
default:
|
||||
abort(); // ??!
|
||||
}
|
||||
return kDefaultRoundingMode; //never happens
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
int oldRound = fegetround();
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
}
|
||||
|
||||
return kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#else
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
||||
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
|
||||
unsigned int oldRound;
|
||||
|
||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
||||
if (err) {
|
||||
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
|
||||
return kDefaultRoundingMode; //what else never happens
|
||||
}
|
||||
|
||||
oldRound &= _MCW_RC;
|
||||
|
||||
RoundingMode old =
|
||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
||||
(oldRound == _RC_UP)? kRoundUp :
|
||||
(oldRound == _RC_DOWN)? kRoundDown :
|
||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
||||
kDefaultRoundingMode;
|
||||
|
||||
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
|
||||
return old; //returning old rounding mode
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
unsigned int oldRound;
|
||||
|
||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
||||
oldRound &= _MCW_RC;
|
||||
return
|
||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
||||
(oldRound == _RC_UP)? kRoundUp :
|
||||
(oldRound == _RC_DOWN)? kRoundDown :
|
||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
||||
kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
|
||||
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
|
||||
// software by testing against FLT_MIN or DBL_MIN in that file.
|
||||
//
|
||||
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
|
||||
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
|
||||
// operators do (e.g. add, subtract, multiply, divide, etc.)
|
||||
//
|
||||
// Configuring hardware to FTZ mode varies by platform.
|
||||
// CAUTION: Some C implementations may also fail to behave properly in this mode.
|
||||
//
|
||||
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
|
||||
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
|
||||
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
|
||||
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
|
||||
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
|
||||
// in which case, these function are at liberty to do nothing.
|
||||
//
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
#endif
|
||||
void *FlushToZero( void )
|
||||
{
|
||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ int i; void *p; }u = { _mm_getcsr() };
|
||||
_mm_setcsr( u.i | 0x8040 );
|
||||
return u.p;
|
||||
#elif defined( __arm__ )
|
||||
// processor is already in FTZ mode -- do nothing
|
||||
return NULL;
|
||||
#elif defined( __PPC__ )
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
flags |= _FPU_MASK_NI;
|
||||
_FPU_SETCW(flags);
|
||||
return NULL;
|
||||
#else
|
||||
#error Unknown arch
|
||||
#endif
|
||||
#else
|
||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||
#endif
|
||||
}
|
||||
|
||||
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
|
||||
void UnFlushToZero( void *p)
|
||||
{
|
||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ void *p; int i; }u = { p };
|
||||
_mm_setcsr( u.i );
|
||||
#elif defined( __arm__ )
|
||||
// processor is already in FTZ mode -- do nothing
|
||||
#elif defined( __PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
flags &= ~_FPU_MASK_NI;
|
||||
_FPU_SETCW(flags);
|
||||
#else
|
||||
#error Unknown arch
|
||||
#endif
|
||||
#else
|
||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||
#endif
|
||||
}
|
||||
73
test_common/harness/rounding_mode.h
Normal file
73
test_common/harness/rounding_mode.h
Normal file
@@ -0,0 +1,73 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __ROUNDING_MODE_H__
|
||||
#define __ROUNDING_MODE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if (defined(_WIN32) && defined (_MSC_VER))
|
||||
// need for _controlfp_s and rouinding modes in RoundingMode
|
||||
#include <float.h>
|
||||
#include "errorHelpers.h"
|
||||
#include "testHarness.h"
|
||||
#else
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
kDefaultRoundingMode = 0,
|
||||
kRoundToNearestEven,
|
||||
kRoundUp,
|
||||
kRoundDown,
|
||||
kRoundTowardZero,
|
||||
|
||||
kRoundingModeCount
|
||||
}RoundingMode;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
kuchar = 0,
|
||||
kchar = 1,
|
||||
kushort = 2,
|
||||
kshort = 3,
|
||||
kuint = 4,
|
||||
kint = 5,
|
||||
kfloat = 6,
|
||||
kdouble = 7,
|
||||
kulong = 8,
|
||||
klong = 9,
|
||||
|
||||
//This goes last
|
||||
kTypeCount
|
||||
}Type;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern RoundingMode set_round( RoundingMode r, Type outType );
|
||||
extern RoundingMode get_round( void );
|
||||
extern void *FlushToZero( void );
|
||||
extern void UnFlushToZero( void *p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* __ROUNDING_MODE_H__ */
|
||||
812
test_common/harness/testHarness.c
Normal file
812
test_common/harness/testHarness.c
Normal file
@@ -0,0 +1,812 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testHarness.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "fpcontrol.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
int gTestsPassed = 0;
|
||||
int gTestsFailed = 0;
|
||||
cl_uint gRandomSeed = 0;
|
||||
cl_uint gReSeed = 0;
|
||||
|
||||
int gFlushDenormsToZero = 0;
|
||||
int gInfNanSupport = 1;
|
||||
int gIsEmbedded = 0;
|
||||
int gIsOpenCL_C_1_0_Device = 0;
|
||||
int gIsOpenCL_1_0_Device = 0;
|
||||
int gHasLong = 1;
|
||||
|
||||
#define DEFAULT_NUM_ELEMENTS 0x4000
|
||||
|
||||
int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
|
||||
{
|
||||
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
|
||||
( imageSupportRequired ) ? verifyImageSupport : NULL );
|
||||
}
|
||||
|
||||
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
|
||||
DeviceCheckFn deviceCheckFn )
|
||||
{
|
||||
test_start();
|
||||
|
||||
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
cl_uint num_platforms = 0;
|
||||
cl_platform_id *platforms;
|
||||
cl_device_id device;
|
||||
int num_elements = DEFAULT_NUM_ELEMENTS;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_uint choosen_device_index = 0;
|
||||
cl_uint choosen_platform_index = 0;
|
||||
|
||||
int err, ret;
|
||||
char *endPtr;
|
||||
unsigned int i;
|
||||
int based_on_env_var = 0;
|
||||
|
||||
|
||||
/* Check for environment variable to set device type */
|
||||
char *env_mode = getenv( "CL_DEVICE_TYPE" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
based_on_env_var = 1;
|
||||
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
else
|
||||
{
|
||||
log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
{
|
||||
// report on any unusual library search path indirection
|
||||
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
|
||||
|
||||
// report on any unusual framework search path indirection
|
||||
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
|
||||
}
|
||||
#endif
|
||||
|
||||
env_mode = getenv( "CL_DEVICE_INDEX" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
choosen_device_index = atoi(env_mode);
|
||||
}
|
||||
|
||||
env_mode = getenv( "CL_PLATFORM_INDEX" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
choosen_platform_index = atoi(env_mode);
|
||||
}
|
||||
|
||||
/* Process the command line arguments */
|
||||
|
||||
/* Special case: just list the tests */
|
||||
if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
|
||||
{
|
||||
log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
|
||||
log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
|
||||
log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
|
||||
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
|
||||
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
|
||||
|
||||
for( i = 0; i < num_fns - 1; i++ )
|
||||
{
|
||||
log_info( "\t\t%s\n", fnNames[ i ] );
|
||||
}
|
||||
test_finish();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* How are we supposed to seed the random # generators? */
|
||||
if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
|
||||
{
|
||||
log_info(" Initializing random seed based on the clock.\n");
|
||||
gRandomSeed = (unsigned)clock();
|
||||
gReSeed = 1;
|
||||
argc--;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(" Initializing random seed to 0.\n");
|
||||
}
|
||||
|
||||
/* Do we have an integer to specify the number of elements to pass to tests? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
|
||||
if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
|
||||
{
|
||||
/* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
|
||||
/* (hence why we stored the result in ret first) */
|
||||
num_elements = ret;
|
||||
log_info( "Testing with num_elements of %d\n", num_elements );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do we have a CPU/GPU specification? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_GPU;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Did we choose a specific device index? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
|
||||
{
|
||||
choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Did we choose a specific platform index? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
|
||||
{
|
||||
choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
switch( device_type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
|
||||
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
|
||||
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
|
||||
default: log_error( "Requesting unknown device "); return -1;
|
||||
}
|
||||
log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
|
||||
log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#define kHasSSE3 0x00000008
|
||||
#define kHasSupplementalSSE3 0x00000100
|
||||
#define kHasSSE4_1 0x00000400
|
||||
#define kHasSSE4_2 0x00000800
|
||||
/* check our environment for a hint to disable SSE variants */
|
||||
{
|
||||
const char *env = getenv( "CL_MAX_SSE" );
|
||||
if( env )
|
||||
{
|
||||
extern int _cpu_capabilities;
|
||||
int mask = 0;
|
||||
if( 0 == strcasecmp( env, "SSE4.1" ) )
|
||||
mask = kHasSSE4_2;
|
||||
else if( 0 == strcasecmp( env, "SSSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1;
|
||||
else if( 0 == strcasecmp( env, "SSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
|
||||
else if( 0 == strcasecmp( env, "SSE2" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
|
||||
else
|
||||
{
|
||||
log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
|
||||
return -2;
|
||||
}
|
||||
|
||||
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
|
||||
_cpu_capabilities &= ~mask;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Get the platform */
|
||||
err = clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
if (err) {
|
||||
print_error(err, "clGetPlatformIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
|
||||
if (!platforms || choosen_platform_index >= num_platforms) {
|
||||
log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
if (err) {
|
||||
print_error(err, "clGetPlatformIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Get the number of requested devices */
|
||||
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0, NULL, &num_devices );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
|
||||
if (!devices || choosen_device_index >= num_devices) {
|
||||
log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Get the requested device */
|
||||
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, num_devices, devices, NULL );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
device = devices[choosen_device_index];
|
||||
free(devices);
|
||||
devices = NULL;
|
||||
free(platforms);
|
||||
platforms = NULL;
|
||||
|
||||
if( printDeviceHeader( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_fp_config fpconfig = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
|
||||
log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
|
||||
log_info( "sizeof( void*) = %d (host)\n", (int) sizeof( void* ) );
|
||||
|
||||
//detect whether profile of the device is embedded
|
||||
char profile[1024] = "";
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if (err)
|
||||
{
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
|
||||
|
||||
//detect the floating point capabilities
|
||||
cl_device_fp_config floatCapabilities = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
|
||||
if (err)
|
||||
{
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check for problems that only embedded will have
|
||||
if( gIsEmbedded )
|
||||
{
|
||||
//If the device is embedded, we need to detect if the device supports Infinity and NaN
|
||||
if ((floatCapabilities & CL_FP_INF_NAN) == 0)
|
||||
gInfNanSupport = 0;
|
||||
|
||||
// check the extensions list to see if ulong and long are supported
|
||||
size_t extensionsStringSize = 0;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
|
||||
{
|
||||
print_error( err, "Unable to get extensions string size for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
char *extensions_string = (char*) malloc(extensionsStringSize);
|
||||
if( NULL == extensions_string )
|
||||
{
|
||||
print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
|
||||
{
|
||||
print_error( err, "Unable to get extensions string for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( extensions_string[extensionsStringSize-1] != '\0' )
|
||||
{
|
||||
log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( NULL == strstr( extensions_string, "cles_khr_int64" ))
|
||||
gHasLong = 0;
|
||||
|
||||
free(extensions_string);
|
||||
}
|
||||
|
||||
if( getenv( "OPENCL_1_0_DEVICE" ) )
|
||||
{
|
||||
char c_version[1024];
|
||||
gIsOpenCL_1_0_Device = 1;
|
||||
memset( c_version, 0, sizeof( c_version ) );
|
||||
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
|
||||
{
|
||||
log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
|
||||
{
|
||||
gIsOpenCL_C_1_0_Device = 1;
|
||||
log_info( "Device is a OpenCL C 1.0 device\n" );
|
||||
}
|
||||
else
|
||||
log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
|
||||
}
|
||||
|
||||
cl_uint device_address_bits = 0;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
|
||||
{
|
||||
print_error( err, "Unable to obtain device address bits" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( device_address_bits )
|
||||
log_info( "sizeof( void*) = %d (device)\n", device_address_bits/8 );
|
||||
else
|
||||
{
|
||||
log_error("Invalid device address bit size returned by device.\n");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* If we have a device checking function, run it */
|
||||
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (num_elements <= 0)
|
||||
num_elements = DEFAULT_NUM_ELEMENTS;
|
||||
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore the old FP mode before leaving.
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements )
|
||||
{
|
||||
int ret, argIndex;
|
||||
unsigned int i;
|
||||
int fn_to_test = -1; // initialized to test all.
|
||||
// unsigned int threadSize;
|
||||
char partial[512] = { 0 };
|
||||
|
||||
|
||||
/* Now that we have an environment, go through our arguments and run tests that match each argument */
|
||||
if( argc == 1 )
|
||||
{
|
||||
/* No actual arguments, so just run all tests */
|
||||
ret = callTestFunctions( fnList, num_fns - 1, fnNames,
|
||||
device, forceNoContextCreation, num_elements, -1, NULL, queueProps );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Go through each argument and use it to process a list of functions to run */
|
||||
ret = 0;
|
||||
for( argIndex = 1; argIndex < argc; argIndex++ )
|
||||
{
|
||||
/* Are we a partial test? */
|
||||
fn_to_test = -1;
|
||||
if( strchr( argv[argIndex], '*' ) != NULL )
|
||||
{
|
||||
/* Yes, store the partial test for later */
|
||||
strcpy( partial, argv[argIndex] );
|
||||
strchr( partial, '*' )[0] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Nope, loop through looking for an exact name match */
|
||||
for (i=0; i<num_fns; i++)
|
||||
{
|
||||
if (strcmp(argv[argIndex], fnNames[i]) == 0)
|
||||
{
|
||||
fn_to_test = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == num_fns)
|
||||
{
|
||||
log_error("invalid test name: %s \n", argv[argIndex]);
|
||||
ret = 1;
|
||||
continue; /* Keep processing other arguments */
|
||||
}
|
||||
else if( ( fn_to_test == (int)num_fns - 1 ) && ( strcmp( fnNames[i], "all" ) == 0 ) )
|
||||
{
|
||||
fn_to_test = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Execute this particular test loop (remember to remove 1 from the function count for the lack of "all" at the end!) */
|
||||
ret += callTestFunctions( fnList, num_fns - 1, fnNames,
|
||||
device, forceNoContextCreation, num_elements,
|
||||
fn_to_test, partial, queueProps );
|
||||
}
|
||||
}
|
||||
|
||||
if (gTestsFailed == 0) {
|
||||
if (gTestsPassed > 1)
|
||||
log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
|
||||
else if (gTestsPassed > 0)
|
||||
log_info("PASSED test.\n");
|
||||
} else if (gTestsFailed > 0) {
|
||||
if (gTestsFailed+gTestsPassed > 1)
|
||||
log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
|
||||
else
|
||||
log_error("FAILED test.\n");
|
||||
}
|
||||
|
||||
test_finish();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// The actual function that loops through tests and executes them
|
||||
int callTestFunctions( basefn functionList[], int numFunctions,
|
||||
const char *functionNames[],
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse,
|
||||
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps )
|
||||
{
|
||||
int numErrors = 0, found = 0, i;
|
||||
|
||||
if( functionIndexToCall >= numFunctions )
|
||||
{
|
||||
log_error( "ERROR: Invalid function index to test!\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (functionIndexToCall == -1)
|
||||
{
|
||||
for (i=0; i<numFunctions; i++)
|
||||
{
|
||||
/* If we're matching partial names, skip any that don't match */
|
||||
if( partialName != NULL && strncmp( functionNames[i], partialName, strlen( partialName ) ) != 0 )
|
||||
continue;
|
||||
|
||||
/* Skip any unimplemented tests */
|
||||
if (functionList[i] == 0)
|
||||
{
|
||||
log_info("%s test currently not implemented\n", functionNames[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
found = 1;
|
||||
numErrors += callSingleTestFunction( functionList[i], functionNames[i], deviceToUse, forceNoContextCreation, numElementsToUse, queueProps );
|
||||
}
|
||||
if( found == 0 && partialName != NULL )
|
||||
{
|
||||
log_error( "ERROR: Wildcard test name does not match any tests: %s\n", partialName );
|
||||
return numErrors + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Run a single test */
|
||||
if (functionList[functionIndexToCall])
|
||||
{
|
||||
numErrors += callSingleTestFunction( functionList[functionIndexToCall], functionNames[functionIndexToCall],
|
||||
deviceToUse, forceNoContextCreation, numElementsToUse, queueProps );
|
||||
}
|
||||
else
|
||||
log_info("%s test currently not implemented\n", functionNames[functionIndexToCall]);
|
||||
}
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
||||
{
|
||||
log_info( "%s\n", errinfo );
|
||||
}
|
||||
|
||||
// Actual function execution
|
||||
int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps )
|
||||
{
|
||||
int numErrors = 0, ret;
|
||||
cl_int error;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue queue = NULL;
|
||||
|
||||
/* Create a context to work with, unless we're told not to */
|
||||
if( !forceNoContextCreation )
|
||||
{
|
||||
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
|
||||
if (!context)
|
||||
{
|
||||
print_error( error, "Unable to create testing context" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
queue = clCreateCommandQueue( context, deviceToUse, queueProps, &error );
|
||||
if( queue == NULL )
|
||||
{
|
||||
print_error( error, "Unable to create testing command queue" );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the test and print the result */
|
||||
log_info( "%s...\n", functionName );
|
||||
fflush( stdout );
|
||||
|
||||
ret = functionToCall( deviceToUse, context, queue, numElementsToUse); //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
|
||||
if( ret == TEST_NOT_IMPLEMENTED )
|
||||
{
|
||||
/* Tests can also let us know they're not implemented yet */
|
||||
log_info("%s test currently not implemented\n\n", functionName);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Print result */
|
||||
if( ret == 0 ) {
|
||||
log_info( "%s passed\n", functionName );
|
||||
gTestsPassed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
numErrors++;
|
||||
log_error( "%s FAILED\n", functionName );
|
||||
gTestsFailed++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release the context */
|
||||
if( !forceNoContextCreation )
|
||||
{
|
||||
int error = clFinish(queue);
|
||||
if (error) {
|
||||
log_error("clFinish failed: %d", error);
|
||||
numErrors++;
|
||||
}
|
||||
clReleaseCommandQueue( queue );
|
||||
clReleaseContext( context );
|
||||
}
|
||||
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
void checkDeviceTypeOverride( cl_device_type *inOutType )
|
||||
{
|
||||
/* Check if we are forced to CPU mode */
|
||||
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
|
||||
if( force_cpu != NULL )
|
||||
{
|
||||
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_DEFAULT;
|
||||
}
|
||||
|
||||
switch( *inOutType )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
|
||||
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
|
||||
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
|
||||
default: break;
|
||||
}
|
||||
log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
{
|
||||
// report on any unusual library search path indirection
|
||||
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
|
||||
|
||||
// report on any unusual framework search path indirection
|
||||
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
|
||||
{
|
||||
uint32_t pat = ((uint32_t*) src_pattern)[0];
|
||||
size_t count = bytes / 4;
|
||||
size_t i;
|
||||
uint32_t *d = (uint32_t*)dest;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
d[i] = pat;
|
||||
|
||||
d += i;
|
||||
|
||||
bytes &= 3;
|
||||
if( bytes )
|
||||
memcpy( d, src_pattern, bytes );
|
||||
}
|
||||
#endif
|
||||
|
||||
extern cl_device_type GetDeviceType( cl_device_id d )
|
||||
{
|
||||
cl_device_type result = -1;
|
||||
cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
|
||||
if( CL_SUCCESS != err )
|
||||
log_error( "ERROR: Unable to get device type for device %p\n", d );
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
cl_device_id GetOpposingDevice( cl_device_id device )
|
||||
{
|
||||
cl_int error;
|
||||
cl_device_id *otherDevices;
|
||||
cl_uint actualCount;
|
||||
cl_platform_id plat;
|
||||
|
||||
// Get the platform of the device to use for getting a list of devices
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get device's platform" );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Get a list of all devices
|
||||
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get list of devices size" );
|
||||
return NULL;
|
||||
}
|
||||
otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
|
||||
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get list of devices" );
|
||||
free(otherDevices);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( actualCount == 1 )
|
||||
{
|
||||
free(otherDevices);
|
||||
return device; // NULL means error, returning self means we couldn't find another one
|
||||
}
|
||||
|
||||
// Loop and just find one that isn't the one we were given
|
||||
cl_uint i;
|
||||
for( i = 0; i < actualCount; i++ )
|
||||
{
|
||||
if( otherDevices[ i ] != device )
|
||||
{
|
||||
cl_device_type newType;
|
||||
error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get device type for other device" );
|
||||
free(otherDevices);
|
||||
return NULL;
|
||||
}
|
||||
cl_device_id result = otherDevices[ i ];
|
||||
free(otherDevices);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Should never get here
|
||||
free(otherDevices);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
104
test_common/harness/testHarness.h
Normal file
104
test_common/harness/testHarness.h
Normal file
@@ -0,0 +1,104 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testHarness_h
|
||||
#define _testHarness_h
|
||||
|
||||
#include "threadTesting.h"
|
||||
#include "clImageHelper.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern cl_uint gReSeed;
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
|
||||
// setup work, and then call each function in turn as dictatated by the passed arguments.
|
||||
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
|
||||
|
||||
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
|
||||
typedef int (*DeviceCheckFn)( cl_device_id device );
|
||||
|
||||
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
|
||||
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
|
||||
|
||||
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
|
||||
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
|
||||
|
||||
// Call this function if you need to do all the setup work yourself, and just need the function list called/
|
||||
// managed.
|
||||
// functionIndexToCall can be a valid index into the function list, or -1 to run all of them.
|
||||
// partialName can be a string to partially match function names against and only execute functions who
|
||||
// match, or NULL to not restrict execution (ignored if functionIndexToCall is not -1)
|
||||
// functionList is the actual array of functions
|
||||
// numFunctions is the number of functions in the list (which should NOT have NULL at the end for "all")
|
||||
// functionNames is an array of strings representing the name of each function, to be used in partial matching
|
||||
// contextProps are used to create a testing context for each test
|
||||
// deviceToUse, deviceGroupToUse and numElementsToUse are all just passed to each test function
|
||||
|
||||
extern int callTestFunctions( basefn functionList[], int numFunctions,
|
||||
const char *functionNames[],
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse,
|
||||
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps );
|
||||
|
||||
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
|
||||
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
||||
|
||||
///// Miscellaneous steps
|
||||
|
||||
// Given a pre-existing device type choice, check the environment for an override, then print what
|
||||
// choice was made and how (and return the overridden choice, if there is one)
|
||||
extern void checkDeviceTypeOverride( cl_device_type *inOutType );
|
||||
|
||||
// standard callback function for context pfn_notify
|
||||
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
|
||||
|
||||
extern cl_device_type GetDeviceType( cl_device_id );
|
||||
|
||||
// Given a device (most likely passed in by the harness, but not required), will attempt to find
|
||||
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
|
||||
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
|
||||
// is the only device available, the SAME device is returned, so check!
|
||||
extern cl_device_id GetOpposingDevice( cl_device_id device );
|
||||
|
||||
|
||||
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
|
||||
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
|
||||
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
||||
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
|
||||
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
void memset_pattern4(void *, const void *, size_t);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _testHarness_h
|
||||
|
||||
|
||||
51
test_common/harness/test_mt19937.c
Normal file
51
test_common/harness/test_mt19937.c
Normal file
@@ -0,0 +1,51 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main( void )
|
||||
{
|
||||
MTdata d = init_genrand(42);
|
||||
int i;
|
||||
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
|
||||
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
|
||||
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
|
||||
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
|
||||
int errcount = 0;
|
||||
|
||||
for( i = 0; i < 65536; i++ )
|
||||
{
|
||||
cl_uint u = genrand_int32( d );
|
||||
if( 0 == (i & 4095) )
|
||||
{
|
||||
if( u != reference[i>>12] )
|
||||
{
|
||||
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
|
||||
errcount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
if( errcount )
|
||||
printf("mt19937 test failed.\n");
|
||||
else
|
||||
printf("mt19937 test passed.\n");
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
106
test_common/harness/threadTesting.c
Normal file
106
test_common/harness/threadTesting.c
Normal file
@@ -0,0 +1,106 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#if 0 // Disabed for now
|
||||
|
||||
typedef struct
|
||||
{
|
||||
basefn mFunction;
|
||||
cl_device_id mDevice;
|
||||
cl_context mContext;
|
||||
int mNumElements;
|
||||
} TestFnArgs;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Thread-based testing. Spawns a new thread to run the given test function,
|
||||
// then waits for it to complete. The entire idea is that, if the thread crashes,
|
||||
// we can catch it and report it as a failure instead of crashing the entire suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void *test_thread_wrapper( void *data )
|
||||
{
|
||||
TestFnArgs *args;
|
||||
int retVal;
|
||||
cl_context context;
|
||||
|
||||
args = (TestFnArgs *)data;
|
||||
|
||||
/* Create a new context to use (contexts can't cross threads) */
|
||||
context = clCreateContext(NULL, args->mDeviceGroup);
|
||||
if( context == NULL )
|
||||
{
|
||||
log_error("clCreateContext failed for new thread\n");
|
||||
return (void *)(-1);
|
||||
}
|
||||
|
||||
/* Call function */
|
||||
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
|
||||
|
||||
clReleaseContext( context );
|
||||
|
||||
return (void *)retVal;
|
||||
}
|
||||
|
||||
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int error;
|
||||
pthread_t threadHdl;
|
||||
void *retVal;
|
||||
TestFnArgs args;
|
||||
|
||||
|
||||
args.mFunction = fnToTest;
|
||||
args.mDeviceGroup = deviceGroup;
|
||||
args.mDevice = device;
|
||||
args.mContext = context;
|
||||
args.mNumElements = numElements;
|
||||
|
||||
|
||||
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to create thread for testing!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Thread has been started, now just wait for it to complete (or crash) */
|
||||
error = pthread_join( threadHdl, &retVal );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to join testing thread!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int)((intptr_t)retVal);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
32
test_common/harness/threadTesting.h
Normal file
32
test_common/harness/threadTesting.h
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _threadTesting_h
|
||||
#define _threadTesting_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#define TEST_NOT_IMPLEMENTED -99
|
||||
|
||||
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
|
||||
#endif // _threadTesting_h
|
||||
|
||||
|
||||
481
test_common/harness/typeWrappers.cpp
Normal file
481
test_common/harness/typeWrappers.cpp
Normal file
@@ -0,0 +1,481 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "typeWrappers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdlib.h>
|
||||
#include "clImageHelper.h"
|
||||
|
||||
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#define kPageSize 4096
|
||||
#include <sys/mman.h>
|
||||
#include <stdlib.h>
|
||||
#elif defined(__linux__)
|
||||
#include <unistd.h>
|
||||
#define kPageSize (getpagesize())
|
||||
#endif
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
for( row = 0; row < height; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
for( row = 0; row < height*depth; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
backingStoreSize = rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
backingStoreSize = arraySize * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
|
||||
break;
|
||||
}
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
|
||||
for( row = 0; row < sz; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******
|
||||
* clProtectedArray implementation
|
||||
*******/
|
||||
clProtectedArray::clProtectedArray()
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
}
|
||||
|
||||
clProtectedArray::clProtectedArray( size_t sizeInBytes )
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
Allocate( sizeInBytes );
|
||||
}
|
||||
|
||||
clProtectedArray::~clProtectedArray()
|
||||
{
|
||||
if( mBuffer != NULL ) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap( mBuffer, mRealSize );
|
||||
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
||||
#else
|
||||
free( mBuffer );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void clProtectedArray::Allocate( size_t sizeInBytes )
|
||||
{
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
// Allocate enough space to: round up our actual allocation to an even number of pages
|
||||
// and allocate two pages on either side
|
||||
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
|
||||
mRealSize = mRoundedSize + kPageSize * 2;
|
||||
|
||||
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
|
||||
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
mValidBuffer = mBuffer + kPageSize;
|
||||
|
||||
// Protect guard area from access
|
||||
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
|
||||
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
|
||||
#else
|
||||
mRoundedSize = mRealSize = sizeInBytes;
|
||||
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
333
test_common/harness/typeWrappers.h
Normal file
333
test_common/harness/typeWrappers.h
Normal file
@@ -0,0 +1,333 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _typeWrappers_h
|
||||
#define _typeWrappers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "compat.h"
|
||||
#include <stdio.h>
|
||||
#include "mt19937.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "kernelHelpers.h"
|
||||
|
||||
extern "C" cl_uint gReSeed;
|
||||
extern "C" cl_uint gRandomSeed;
|
||||
|
||||
/* cl_context wrapper */
|
||||
|
||||
class clContextWrapper
|
||||
{
|
||||
public:
|
||||
clContextWrapper() { mContext = NULL; }
|
||||
clContextWrapper( cl_context program ) { mContext = program; }
|
||||
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
|
||||
|
||||
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
|
||||
operator cl_context() { return mContext; }
|
||||
|
||||
cl_context * operator&() { return &mContext; }
|
||||
|
||||
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_context mContext;
|
||||
};
|
||||
|
||||
/* cl_program wrapper */
|
||||
|
||||
class clProgramWrapper
|
||||
{
|
||||
public:
|
||||
clProgramWrapper() { mProgram = NULL; }
|
||||
clProgramWrapper( cl_program program ) { mProgram = program; }
|
||||
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
|
||||
|
||||
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
|
||||
operator cl_program() { return mProgram; }
|
||||
|
||||
cl_program * operator&() { return &mProgram; }
|
||||
|
||||
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_program mProgram;
|
||||
};
|
||||
|
||||
/* cl_kernel wrapper */
|
||||
|
||||
class clKernelWrapper
|
||||
{
|
||||
public:
|
||||
clKernelWrapper() { mKernel = NULL; }
|
||||
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
|
||||
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
|
||||
|
||||
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
|
||||
operator cl_kernel() { return mKernel; }
|
||||
|
||||
cl_kernel * operator&() { return &mKernel; }
|
||||
|
||||
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_kernel mKernel;
|
||||
};
|
||||
|
||||
/* cl_mem (stream) wrapper */
|
||||
|
||||
class clMemWrapper
|
||||
{
|
||||
public:
|
||||
clMemWrapper() { mMem = NULL; }
|
||||
clMemWrapper( cl_mem mem ) { mMem = mem; }
|
||||
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
|
||||
|
||||
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_mem() { return mMem; }
|
||||
|
||||
cl_mem * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_mem mMem;
|
||||
};
|
||||
|
||||
class clProtectedImage
|
||||
{
|
||||
public:
|
||||
clProtectedImage() { image = NULL; backingStore = NULL; }
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
|
||||
~clProtectedImage()
|
||||
{
|
||||
if( image != NULL )
|
||||
clReleaseMemObject( image );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
if(backingStore)
|
||||
munmap(backingStore, backingStoreSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
|
||||
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
|
||||
|
||||
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
|
||||
operator cl_mem() { return image; }
|
||||
|
||||
cl_mem * operator&() { return ℑ }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return image == rhs; }
|
||||
|
||||
protected:
|
||||
void *backingStore;
|
||||
size_t backingStoreSize;
|
||||
cl_mem image;
|
||||
};
|
||||
|
||||
/* cl_command_queue wrapper */
|
||||
|
||||
class clCommandQueueWrapper
|
||||
{
|
||||
public:
|
||||
clCommandQueueWrapper() { mMem = NULL; }
|
||||
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
|
||||
~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
|
||||
|
||||
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_command_queue() { return mMem; }
|
||||
|
||||
cl_command_queue * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_command_queue mMem;
|
||||
};
|
||||
|
||||
/* cl_sampler wrapper */
|
||||
class clSamplerWrapper
|
||||
{
|
||||
public:
|
||||
clSamplerWrapper() { mMem = NULL; }
|
||||
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
|
||||
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
|
||||
|
||||
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_sampler() { return mMem; }
|
||||
|
||||
cl_sampler * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_sampler mMem;
|
||||
};
|
||||
|
||||
/* cl_event wrapper */
|
||||
class clEventWrapper
|
||||
{
|
||||
public:
|
||||
clEventWrapper() { mMem = NULL; }
|
||||
clEventWrapper( cl_event mem ) { mMem = mem; }
|
||||
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
|
||||
|
||||
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_event() { return mMem; }
|
||||
|
||||
cl_event * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_event mMem;
|
||||
};
|
||||
|
||||
/* Generic protected memory buffer, for verifying access within bounds */
|
||||
class clProtectedArray
|
||||
{
|
||||
public:
|
||||
clProtectedArray();
|
||||
clProtectedArray( size_t sizeInBytes );
|
||||
virtual ~clProtectedArray();
|
||||
|
||||
void Allocate( size_t sizeInBytes );
|
||||
|
||||
operator void *() { return (void *)mValidBuffer; }
|
||||
operator const void *() const { return (const void *)mValidBuffer; }
|
||||
|
||||
protected:
|
||||
|
||||
char * mBuffer;
|
||||
char * mValidBuffer;
|
||||
size_t mRealSize, mRoundedSize;
|
||||
};
|
||||
|
||||
class RandomSeed
|
||||
{
|
||||
public:
|
||||
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
|
||||
~RandomSeed()
|
||||
{
|
||||
if( gReSeed )
|
||||
gRandomSeed = genrand_int32( mtData );
|
||||
free_mtdata(mtData);
|
||||
}
|
||||
|
||||
operator MTdata () {return mtData;}
|
||||
|
||||
protected:
|
||||
MTdata mtData;
|
||||
};
|
||||
|
||||
template <typename T> class BufferOwningPtr
|
||||
{
|
||||
BufferOwningPtr(BufferOwningPtr const &); // do not implement
|
||||
void operator=(BufferOwningPtr const &); // do not implement
|
||||
|
||||
void *ptr;
|
||||
void *map;
|
||||
size_t mapsize; // Bytes allocated total, pointed to by map.
|
||||
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
|
||||
bool aligned;
|
||||
public:
|
||||
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
|
||||
explicit BufferOwningPtr(void *p, void *m, size_t s)
|
||||
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
|
||||
{
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
~BufferOwningPtr() {
|
||||
if (map) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap(map, mapsize);
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
|
||||
if (map){
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap(map, mapsize);
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#else
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
|
||||
abort();
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
map = m;
|
||||
mapsize = mapsize_;
|
||||
allocsize = allocsize_;
|
||||
aligned = aligned_;
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
operator T*() { return (T*)ptr; }
|
||||
|
||||
size_t getSize() const { return allocsize; };
|
||||
};
|
||||
|
||||
#endif // _typeWrappers_h
|
||||
|
||||
|
||||
Reference in New Issue
Block a user