Initial open source release of OpenCL 1.2 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 19:04:36 +05:30
parent 6911ba5116
commit f74871b7a3
563 changed files with 202074 additions and 0 deletions

26
test_common/Makefile Normal file
View File

@@ -0,0 +1,26 @@
PRODUCTS = harness/\
# utils/
TOP=$(shell pwd)
all: $(PRODUCTS)
clean:
@for testdir in $(dir $(PRODUCTS)) ; \
do ( \
echo "==================================================================================" ; \
echo "Cleaning $$testdir" ; \
echo "==================================================================================" ; \
cd $$testdir && make clean \
); \
done \
$(PRODUCTS):
@echo "==================================================================================" ;
@echo "(`date "+%H:%M:%S"`) Make $@" ;
@echo "==================================================================================" ;
cd $(dir $@) && make
.PHONY: clean $(PRODUCTS) all

View File

@@ -0,0 +1,52 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _gl_headers_h
#define _gl_headers_h
#if defined( __APPLE__ )
#include <OpenGL/OpenGL.h>
#if defined(CGL_VERSION_1_3)
#include <OpenGL/gl3.h>
#include <OpenGL/gl3ext.h>
#else
#include <OpenGL/gl.h>
#include <OpenGL/glext.h>
#endif
#include <GLUT/glut.h>
#else
#ifdef _WIN32
#include <windows.h>
#endif
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glext.h>
#ifdef _WIN32
#include <GL/glut.h>
#else
#include <GL/freeglut.h>
#endif
#endif
#ifdef _WIN32
GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
// No glutGetProcAddress in the standard glut v3.7.
#define glutGetProcAddress(procName) wglGetProcAddress(procName)
#endif
#endif // __gl_headers_h

1622
test_common/gl/helpers.cpp Normal file

File diff suppressed because it is too large Load Diff

288
test_common/gl/helpers.h Normal file
View File

@@ -0,0 +1,288 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _helpers_h
#define _helpers_h
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#if !defined (__APPLE__)
#include <CL/cl.h>
#include "gl_headers.h"
#include <CL/cl_gl.h>
#else
#include "gl_headers.h"
#endif
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/threadTesting.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/mt19937.h"
typedef cl_mem
(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context context,
cl_mem_flags flags,
GLuint bufobj,
int * errcode_ret);
typedef cl_mem
(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context context ,
cl_mem_flags flags ,
GLenum target ,
GLint miplevel ,
GLuint texture ,
cl_int * errcode_ret) ;
typedef cl_mem
(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context context ,
cl_mem_flags flags ,
GLenum target ,
GLint miplevel ,
GLuint texture ,
cl_int * errcode_ret) ;
typedef cl_mem
(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context context ,
cl_mem_flags flags ,
GLenum target ,
GLint miplevel ,
GLuint texture ,
cl_int * errcode_ret) ;
typedef cl_mem
(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context context ,
cl_mem_flags flags ,
GLuint renderbuffer ,
cl_int * errcode_ret) ;
typedef cl_int
(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem memobj ,
cl_gl_object_type * gl_object_type ,
GLuint * gl_object_name) ;
typedef cl_int
(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem memobj ,
cl_gl_texture_info param_name ,
size_t param_value_size ,
void * param_value ,
size_t * param_value_size_ret) ;
typedef cl_int
(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue command_queue ,
cl_uint num_objects ,
const cl_mem * mem_objects ,
cl_uint num_events_in_wait_list ,
const cl_event * event_wait_list ,
cl_event * event) ;
typedef cl_int
(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue command_queue ,
cl_uint num_objects ,
const cl_mem * mem_objects ,
cl_uint num_events_in_wait_list ,
const cl_event * event_wait_list ,
cl_event * event) ;
extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
class glBufferWrapper
{
public:
glBufferWrapper() { mBuffer = 0; }
glBufferWrapper( GLuint b ) { mBuffer = b; }
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
operator GLuint() { return mBuffer; }
operator GLuint *() { return &mBuffer; }
GLuint * operator&() { return &mBuffer; }
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
protected:
GLuint mBuffer;
};
class glTextureWrapper
{
public:
glTextureWrapper() { mHandle = 0; }
glTextureWrapper( GLuint b ) { mHandle = b; }
~glTextureWrapper() {
if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
}
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
operator GLuint() { return mHandle; }
operator GLuint *() { return &mHandle; }
GLuint * operator&() { return &mHandle; }
bool operator==( GLuint rhs ) { return mHandle == rhs; }
protected:
// The texture handle.
GLuint mHandle;
};
class glRenderbufferWrapper
{
public:
glRenderbufferWrapper() { mBuffer = 0; }
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
operator GLuint() { return mBuffer; }
operator GLuint *() { return &mBuffer; }
GLuint * operator&() { return &mBuffer; }
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
protected:
GLuint mBuffer;
};
class glFramebufferWrapper
{
public:
glFramebufferWrapper() { mBuffer = 0; }
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
operator GLuint() { return mBuffer; }
operator GLuint *() { return &mBuffer; }
GLuint * operator&() { return &mBuffer; }
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
protected:
GLuint mBuffer;
};
// Helper functions (defined in helpers.cpp)
extern void * CreateGLTexture1DArray( size_t width, size_t length,
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID, int *outError,
bool allocateMem, MTdata d);
extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID, int *outError,
bool allocateMem, MTdata d);
extern void * CreateGLTextureBuffer( size_t width,
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
bool allocateMem, MTdata d);
extern void * CreateGLTexture1D(size_t width,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID,
int *outError, bool allocateMem, MTdata d );
extern void * CreateGLTexture2D( size_t width, size_t height,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID,
int *outError, bool allocateMem, MTdata d );
extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID,
int *outError, MTdata d, bool allocateMem = true );
extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
GLenum glFormat, GLenum glInternalFormat,
GLenum glType, ExplicitType typeToReadAs,
size_t outWidth, size_t outHeight );
extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
GLuint *outFramebuffer,
GLuint *outRenderbuffer );
extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type,
GLuint *outFramebuffer,
GLuint *outRenderbuffer,
int *outError, MTdata d, bool allocateMem );
extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
GLenum attachment, GLenum glFormat,
GLenum glInternalFormat, GLenum glType,
ExplicitType typeToReadAs,
size_t outWidth, size_t outHeight );
extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
extern const char *GetGLTypeName( GLenum type );
extern const char *GetGLAttachmentName( GLenum att );
extern const char *GetGLTargetName( GLenum tgt );
extern const char *GetGLBaseFormatName( GLenum baseformat );
extern const char *GetGLFormatName( GLenum format );
extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
extern GLenum GetGLFormat(GLenum internalFormat);
extern GLenum GetGLTypeForExplicitType(ExplicitType type);
extern size_t GetGLTypeSize(GLenum type);
extern ExplicitType GetExplicitTypeForGLType(GLenum type);
extern GLenum get_base_gl_target( GLenum target );
extern int init_clgl_ext( void );
#endif // _helpers_h

48
test_common/gl/setup.h Normal file
View File

@@ -0,0 +1,48 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _setup_h
#define _setup_h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gl_headers.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
// subclass internally, then return it as a definition for GLEnvironment::Create
class GLEnvironment
{
public:
GLEnvironment() {}
virtual ~GLEnvironment() {}
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
virtual cl_context CreateCLContext( void ) = 0;
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
static GLEnvironment * Instance( void );
};
#endif // _setup_h

View File

@@ -0,0 +1,156 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "setup.h"
#include "../../test_common/harness/errorHelpers.h"
#include <OpenGL/CGLDevice.h>
class OSXGLEnvironment : public GLEnvironment
{
public:
OSXGLEnvironment()
{
mCGLContext = NULL;
}
virtual int Init( int *argc, char **argv, int use_opengl_32 )
{
if (!use_opengl_32) {
// Create a GLUT window to render into
glutInit( argc, argv );
glutInitWindowSize( 512, 512 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow( "OpenCL <-> OpenGL Test" );
}
else {
CGLPixelFormatAttribute attribs[] = {
kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
kCGLPFAAllowOfflineRenderers,
kCGLPFANoRecovery,
kCGLPFAAccelerated,
kCGLPFADoubleBuffer,
(CGLPixelFormatAttribute)0
};
CGLError err;
CGLPixelFormatObj pix;
GLint npix;
err = CGLChoosePixelFormat (attribs, &pix, &npix);
if(err != kCGLNoError)
{
log_error("Failed to choose pixel format\n");
return -1;
}
err = CGLCreateContext(pix, NULL, &mCGLContext);
if(err != kCGLNoError)
{
log_error("Failed to create GL context\n");
return -1;
}
CGLSetCurrentContext(mCGLContext);
}
return 0;
}
virtual cl_context CreateCLContext( void )
{
int error;
if( mCGLContext == NULL )
mCGLContext = CGLGetCurrentContext();
CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
if (error) {
print_error(error, "clCreateContext failed");
return NULL;
}
// Verify that all devices in the context support the required extension
cl_device_id devices[64];
size_t size_out;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
if (error) {
print_error(error, "clGetContextInfo failed");
return NULL;
}
char extensions[8192];
for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return NULL;
}
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
return NULL;
}
}
return context;
}
virtual int SupportsCLGLInterop( cl_device_type device_type )
{
int found_valid_device = 0;
cl_device_id devices[64];
cl_uint num_of_devices;
int error;
error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
if (error) {
print_error(error, "clGetDeviceIDs failed");
return -1;
}
char extensions[8192];
for (int i=0; i<(int)num_of_devices; i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return -1;
}
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
} else {
log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
found_valid_device = 1;
}
}
return found_valid_device;
}
virtual ~OSXGLEnvironment()
{
CGLDestroyContext( mCGLContext );
}
CGLContextObj mCGLContext;
};
GLEnvironment * GLEnvironment::Instance( void )
{
static OSXGLEnvironment * env = NULL;
if( env == NULL )
env = new OSXGLEnvironment();
return env;
}

View File

@@ -0,0 +1,204 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#define GL_GLEXT_PROTOTYPES
#include "setup.h"
#include "testBase.h"
#include "../../test_common/harness/errorHelpers.h"
#include <GL/gl.h>
#include <GL/glut.h>
#include <GL/glext.h>
#include <GL/glut.h>
#include <CL/cl_ext.h>
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties *properties,
cl_gl_context_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
// Rename references to this dynamically linked function to avoid
// collision with static link version
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
#define MAX_DEVICES 32
class WGLEnvironment : public GLEnvironment
{
private:
cl_device_id m_devices[MAX_DEVICES];
int m_device_count;
cl_platform_id m_platform;
public:
WGLEnvironment()
{
m_device_count = 0;
m_platform = 0;
}
virtual int Init( int *argc, char **argv, int use_opengl_32 )
{
// Create a GLUT window to render into
glutInit( argc, argv );
glutInitWindowSize( 512, 512 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow( "OpenCL <-> OpenGL Test" );
glewInit();
return 0;
}
virtual cl_context CreateCLContext( void )
{
HGLRC hGLRC = wglGetCurrentContext();
HDC hDC = wglGetCurrentDC();
cl_context_properties properties[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
CL_GL_CONTEXT_KHR, (cl_context_properties) hGLRC,
CL_WGL_HDC_KHR, (cl_context_properties) hDC,
0
};
cl_device_id devices[MAX_DEVICES];
size_t dev_size;
cl_int status;
if (!hGLRC || !hDC) {
print_error(CL_INVALID_CONTEXT, "No GL context bound");
return 0;
}
if (!clGetGLContextInfoKHR) {
// As OpenCL for the platforms. Warn if more than one platform found,
// since this might not be the platform we want. By default, we simply
// use the first returned platform.
cl_uint nplatforms;
cl_platform_id platform;
clGetPlatformIDs(0, NULL, &nplatforms);
clGetPlatformIDs(1, &platform, NULL);
if (nplatforms > 1) {
log_info("clGetPlatformIDs returned multiple values. This is not "
"an error, but might result in obtaining incorrect function "
"pointers if you do not want the first returned platform.\n");
// Show them the platform name, in case it is a problem.
size_t size;
char *name;
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
name = (char*)malloc(size);
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
log_info("Using platform with name: %s \n", name);
free(name);
}
clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
if (!clGetGLContextInfoKHR) {
print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
}
}
status = clGetGLContextInfoKHR(properties,
CL_DEVICES_FOR_GL_CONTEXT_KHR,
sizeof(devices),
devices,
&dev_size);
if (status != CL_SUCCESS) {
print_error(status, "clGetGLContextInfoKHR failed");
return 0;
}
dev_size /= sizeof(cl_device_id);
log_info("GL context supports %d compute devices\n", dev_size);
status = clGetGLContextInfoKHR(properties,
CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
sizeof(devices),
devices,
&dev_size);
if (status != CL_SUCCESS) {
print_error(status, "clGetGLContextInfoKHR failed");
return 0;
}
cl_device_id ctxDevice = m_devices[0];
if (dev_size > 0) {
log_info("GL context current device: 0x%x\n", devices[0]);
for (int i = 0; i < m_device_count; i++) {
if (m_devices[i] == devices[0]) {
ctxDevice = devices[0];
break;
}
}
} else {
log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
}
return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
}
virtual int SupportsCLGLInterop( cl_device_type device_type )
{
cl_device_id devices[MAX_DEVICES];
cl_uint num_of_devices;
int error;
error = clGetPlatformIDs(1, &m_platform, NULL);
if (error) {
print_error(error, "clGetPlatformIDs failed");
return -1;
}
error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
if (error) {
print_error(error, "clGetDeviceIDs failed");
return -1;
}
// Check all devices, search for one that supports cl_khr_gl_sharing
char extensions[8192];
for (int i=0; i<(int)num_of_devices; i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return -1;
}
if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
} else {
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
m_devices[m_device_count++] = devices[i];
}
}
return m_device_count > 0;
}
virtual ~WGLEnvironment()
{
}
};
GLEnvironment * GLEnvironment::Instance( void )
{
static WGLEnvironment * env = NULL;
if( env == NULL )
env = new WGLEnvironment();
return env;
}

View File

@@ -0,0 +1,122 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#define GL_GLEXT_PROTOTYPES
#include "setup.h"
#include "testBase.h"
#include "../../test_common/harness/errorHelpers.h"
#include <GL/gl.h>
#include <GL/glut.h>
#include <GL/glext.h>
#include <GL/freeglut.h>
#include <GL/glx.h>
#include <CL/cl_ext.h>
class X11GLEnvironment : public GLEnvironment
{
private:
cl_device_id m_devices[64];
cl_uint m_device_count;
public:
X11GLEnvironment()
{
m_device_count = 0;
}
virtual int Init( int *argc, char **argv, int use_opencl_32 )
{
// Create a GLUT window to render into
glutInit( argc, argv );
glutInitWindowSize( 512, 512 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow( "OpenCL <-> OpenGL Test" );
glewInit();
return 0;
}
virtual cl_context CreateCLContext( void )
{
GLXContext context = glXGetCurrentContext();
Display *dpy = glXGetCurrentDisplay();
cl_context_properties properties[] = {
CL_GL_CONTEXT_KHR, (cl_context_properties) context,
CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
0
};
cl_int status;
if (!context || !dpy) {
print_error(CL_INVALID_CONTEXT, "No GL context bound");
return 0;
}
return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
}
virtual int SupportsCLGLInterop( cl_device_type device_type )
{
int found_valid_device = 0;
cl_platform_id platform;
cl_device_id devices[64];
cl_uint num_of_devices;
int error;
error = clGetPlatformIDs(1, &platform, NULL);
if (error) {
print_error(error, "clGetPlatformIDs failed");
return -1;
}
error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
// If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
if (error == CL_DEVICE_NOT_FOUND)
return 0;
if (error) {
print_error(error, "clGetDeviceIDs failed");
return -1;
}
char extensions[8192];
for (int i=0; i<(int)num_of_devices; i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return -1;
}
if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
} else {
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
found_valid_device = 1;
m_devices[m_device_count++] = devices[i];
}
}
return found_valid_device;
}
virtual ~X11GLEnvironment()
{
}
};
GLEnvironment * GLEnvironment::Instance( void )
{
static X11GLEnvironment * env = NULL;
if( env == NULL )
env = new X11GLEnvironment();
return env;
}

View File

@@ -0,0 +1,18 @@
project
: requirements <include>.
<toolset>gcc:<cflags>"-xc++"
<toolset>msvc:<cflags>"/TP"
<warnings-as-errors>off
: usage-requirements <include>.
;
local harness.objs ;
for source in [ glob *.c *.cpp ]
{
harness.objs += [ obj $(source:B).obj : $(source) ] ;
}
alias harness : $(harness.objs)
: <use>/Runtime//OpenCL.lib :
: <library>/Runtime//OpenCL.lib
;

View File

@@ -0,0 +1,41 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = conversions.c \
errorHelpers.c \
genericThread.cpp \
imageHelpers.cpp \
kernelHelpers.c \
mt19937.c \
rounding_mode.c \
testHarness.c \
testHarness.cpp \
ThreadPool.c \
threadTesting.c \
typeWrappers.cpp
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
all: $(OBJECTS)
clean:
rm -f $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,899 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "ThreadPool.h"
#include "errorHelpers.h"
#include "fpcontrol.h"
#include <stdio.h>
#include <stdlib.h>
#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system
#if defined( _WIN32 )
#include <windows.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#include "mingw_compat.h"
#include <process.h>
#else // !_WIN32
#include <pthread.h>
#include <unistd.h>
#include <sys/errno.h>
#endif // !_WIN32
// declarations
#ifdef _WIN32
void ThreadPool_WorkerFunc( void *p );
#else
void *ThreadPool_WorkerFunc( void *p );
#endif
void ThreadPool_Init(void);
void ThreadPool_Exit(void);
#if defined (__MINGW32__)
// Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
CRITICAL_SECTION gAtomicLock;
#elif defined( __GNUC__ ) || defined( _MSC_VER)
#else
pthread_mutex_t gAtomicLock;
#endif
// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions.
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
{
#if defined (__MINGW32__)
// No atomics on Mingw32
EnterCriticalSection(&gAtomicLock);
cl_int old = *a;
*a = old + b;
LeaveCriticalSection(&gAtomicLock);
return old;
#elif defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
return __sync_fetch_and_add( a, b );
// do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize
#elif defined( _MSC_VER )
return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
#else
#warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
if( pthread_mutex_lock(&gAtomicLock) )
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
cl_int old = *a;
*a = old + b;
if( pthread_mutex_unlock(&gAtomicLock) )
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
return old;
#endif
}
#if defined( _WIN32 )
// Uncomment the following line if Windows XP support is not required.
// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
#define _INIT_ONCE INIT_ONCE
#define _PINIT_ONCE PINIT_ONCE
#define _InitOnceExecuteOnce InitOnceExecuteOnce
#else // !HAS_INIT_ONCE_EXECUTE_ONCE
typedef volatile LONG _INIT_ONCE;
typedef _INIT_ONCE *_PINIT_ONCE;
typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
#define _INIT_ONCE_UNINITIALIZED 0
#define _INIT_ONCE_IN_PROGRESS 1
#define _INIT_ONCE_DONE 2
static BOOL _InitOnceExecuteOnce(
_PINIT_ONCE InitOnce,
_PINIT_ONCE_FN InitFn,
PVOID Parameter,
LPVOID *Context
)
{
while ( *InitOnce != _INIT_ONCE_DONE )
{
if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
{
InitFn( InitOnce, Parameter, Context );
*InitOnce = _INIT_ONCE_DONE;
return TRUE;
}
Sleep( 1 );
}
return TRUE;
}
#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
// Uncomment the following line if Windows XP support is not required.
// #define HAS_CONDITION_VARIABLE 1
#if defined(HAS_CONDITION_VARIABLE)
#define _CONDITION_VARIABLE CONDITION_VARIABLE
#define _InitializeConditionVariable InitializeConditionVariable
#define _SleepConditionVariableCS SleepConditionVariableCS
#define _WakeAllConditionVariable WakeAllConditionVariable
#else // !HAS_CONDITION_VARIABLE
typedef struct
{
HANDLE mEvent; // Used to park the thread.
CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
volatile cl_int mWaiters; // Number of threads waiting on this cond var.
volatile cl_int mGeneration; // Wait generation count.
volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event.
} _CONDITION_VARIABLE;
typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
{
cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
InitializeCriticalSection( cond_var->mLock );
cond_var->mWaiters = 0;
cond_var->mGeneration = 0;
#if !defined ( NDEBUG )
cond_var->mReleaseCount = 0;
#endif // !NDEBUG
}
static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
{
EnterCriticalSection( cond_var->mLock );
cl_int generation = cond_var->mGeneration;
++cond_var->mWaiters;
LeaveCriticalSection( cond_var->mLock );
LeaveCriticalSection( cond_lock );
while ( TRUE )
{
WaitForSingleObject( cond_var->mEvent, INFINITE );
EnterCriticalSection( cond_var->mLock );
BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
LeaveCriticalSection( cond_var->mLock );
if ( done )
{
break;
}
}
EnterCriticalSection( cond_lock );
EnterCriticalSection( cond_var->mLock );
if ( --cond_var->mReleaseCount == 0 )
{
ResetEvent( cond_var->mEvent );
}
--cond_var->mWaiters;
LeaveCriticalSection( cond_var->mLock );
}
static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
{
EnterCriticalSection( cond_var->mLock );
if (cond_var->mWaiters > 0 )
{
++cond_var->mGeneration;
cond_var->mReleaseCount = cond_var->mWaiters;
SetEvent( cond_var->mEvent );
}
LeaveCriticalSection( cond_var->mLock );
}
#endif // !HAS_CONDITION_VARIABLE
#endif // _WIN32
#define MAX_COUNT (1<<29)
// Global state to coordinate whether the threads have been launched successfully or not
#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
static _INIT_ONCE threadpool_init_control;
#elif defined (_WIN32) // MingW of XP
static int threadpool_init_control;
#else // Posix platforms
pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
#endif
cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time,
// because we are too lazy to set up a queue here, and don't expect to need one.
#if defined( _WIN32 )
CRITICAL_SECTION gThreadPoolLock[1];
#else // !_WIN32
pthread_mutex_t gThreadPoolLock;
#endif // !_WIN32
// Condition variable to park ThreadPool threads when not working
#if defined( _WIN32 )
CRITICAL_SECTION cond_lock[1];
_CONDITION_VARIABLE cond_var[1];
#else // !_WIN32
pthread_mutex_t cond_lock;
pthread_cond_t cond_var;
#endif // !_WIN32
volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run.
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
// State that only changes when the threadpool is not working.
volatile TPFuncPtr gFunc_ptr = NULL;
volatile void *gUserInfo = NULL;
volatile cl_int gJobCount = 0;
// State that may change while the thread pool is working
volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
// Condition variable to park caller while waiting
#if defined( _WIN32 )
HANDLE caller_event;
#else // !_WIN32
pthread_mutex_t caller_cond_lock;
pthread_cond_t caller_cond_var;
#endif // !_WIN32
volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
// The total number of threads launched.
volatile cl_int gThreadCount = 0;
#ifdef _WIN32
void ThreadPool_WorkerFunc( void *p )
#else
void *ThreadPool_WorkerFunc( void *p )
#endif
{
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
ThreadPool_AtomicAdd( &gRunning, 1 );
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
while( MAX_COUNT > item )
{
cl_int err;
// check for more work to do
if( 0 >= item )
{
// log_info( "Thread %d has run out of work.\n", threadID );
// No work to do. Attempt to block waiting for work
#if defined( _WIN32 )
EnterCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_lock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
goto exit;
}
#endif // !_WIN32
cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
if( 1 == remaining )
{ // last thread out signal the main thread to wake up
#if defined( _WIN32 )
SetEvent( caller_event );
#else // !_WIN32
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
goto exit;
}
if( (err = pthread_cond_broadcast( &caller_cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
goto exit;
}
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
goto exit;
}
#endif // !_WIN32
}
// loop in case we are woken only to discover that some other thread already did all the work
while( 0 >= item )
{
#if defined( _WIN32 )
_SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
pthread_mutex_unlock( &cond_lock);
goto exit;
}
#endif // !_WIN32
// try again to get a valid item id
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
if( MAX_COUNT <= item ) // exit if we are done
{
#if defined( _WIN32 )
LeaveCriticalSection( cond_lock );
#else // !_WIN32
pthread_mutex_unlock( &cond_lock);
#endif // !_WIN32
goto exit;
}
}
ThreadPool_AtomicAdd( &gRunning, 1 );
// log_info( "Thread %d has found work.\n", threadID);
#if defined( _WIN32 )
LeaveCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_unlock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
goto exit;
}
#endif // !_WIN32
}
// we have a valid item, so do the work
if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error
{
// log_info( "Thread %d doing job %d\n", threadID, item - 1);
#if defined(__APPLE__) && defined(__arm__)
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
// Call the user's function with this item ID
err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state
RestoreFPState( &oldMode );
#endif
if( err )
{
#if (__MINGW32__)
EnterCriticalSection(&gAtomicLock);
if( jobError == CL_SUCCESS );
jobError = err;
gRunCount = 0;
LeaveCriticalSection(&gAtomicLock);
#elif defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
// set the new error if we are the first one there.
__sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
// drop run count to 0
gRunCount = 0;
__sync_synchronize();
#elif defined( _MSC_VER )
// set the new error if we are the first one there.
_InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
// drop run count to 0
gRunCount = 0;
_mm_mfence();
#else
if( pthread_mutex_lock(&gAtomicLock) )
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
if( jobError == CL_SUCCESS );
jobError = err;
gRunCount = 0;
if( pthread_mutex_unlock(&gAtomicLock) )
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
#endif
}
}
// get the next item
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
}
exit:
log_info( "ThreadPool: thread %d exiting.\n", threadID );
ThreadPool_AtomicAdd( &gThreadCount, -1 );
#if !defined(_WIN32)
return NULL;
#endif
}
// SetThreadCount() may be used to artifically set the number of worker threads
// If the value is 0 (the default) the number of threads will be determined based on
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
// that we still get some testing for thread safety.
//
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
// code will run single threaded, but will report an error to indicate that the test
// is invalid. This option is intended for debugging purposes only. It is suggested
// as a convention that test apps set the thread count to 1 in response to the -m flag.
//
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
// otherwise the behavior is indefined.
void SetThreadCount( int count )
{
if( threadPoolInitErr == CL_SUCCESS )
{
log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
abort();
}
gThreadCount = count;
}
void ThreadPool_Init(void)
{
cl_int i;
int err;
volatile cl_uint threadID = 0;
// Check for manual override of multithreading code. We add this for better debuggability.
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
{
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
gThreadCount = 1;
return;
}
// Figure out how many threads to run -- check first for non-zero to give the implementation the chance
if( 0 == gThreadCount )
{
#if defined(_MSC_VER) || defined (__MINGW64__)
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
DWORD length = 0;
GetLogicalProcessorInformation( NULL, &length );
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
{
if( ptr->Relationship == RelationProcessorCore )
{
// Count the number of bits in ProcessorMask (number of logical cores)
ULONG mask = ptr->ProcessorMask;
while( mask )
{
++gThreadCount;
mask &= mask - 1; // Remove 1 bit at a time
}
}
++ptr;
}
free(buffer);
}
#elif defined (__MINGW32__)
{
#warning How about this, instead of hard coding it to 2?
SYSTEM_INFO sysinfo;
GetSystemInfo( &sysinfo );
gThreadCount = sysinfo.dwNumberOfProcessors;
}
#else // !_WIN32
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
#endif // !_WIN32
// Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
if( 1 == gThreadCount )
gThreadCount = 2;
}
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
if( gThreadCount < 2 )
{
log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
gThreadCount = 1;
return;
}
#if defined( _WIN32 )
InitializeCriticalSection( gThreadPoolLock );
InitializeCriticalSection( cond_lock );
_InitializeConditionVariable( cond_var );
caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
#elif defined (__GNUC__)
// Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
// with some flavors of gcc compilers.
pthread_cond_init(&cond_var, NULL);
pthread_mutex_init(&cond_lock ,NULL);
pthread_cond_init(&caller_cond_var, NULL);
pthread_mutex_init(&caller_cond_lock, NULL);
pthread_mutex_init(&gThreadPoolLock, NULL);
#endif
#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
pthread_mutex_initialize(gAtomicLock);
#elif defined (__MINGW32__)
InitializeCriticalSection(&gAtomicLock);
#endif
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
// That would cause a deadlock.
#if !defined( _WIN32 )
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
gThreadCount = 1;
return;
}
#endif // !_WIN32
// init threads
for( i = 0; i < gThreadCount; i++ )
{
#if defined( _WIN32 )
uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
err = ( handle == 0 );
#else // !_WIN32
pthread_t tid = 0;
err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
#endif // !_WIN32
if( err )
{
log_error( "Error %d launching thread %d\n", err, i );
threadPoolInitErr = err;
gThreadCount = i;
break;
}
}
atexit( ThreadPool_Exit );
// block until they are done launching.
do
{
#if defined( _WIN32 )
WaitForSingleObject( caller_event, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
pthread_mutex_unlock( &caller_cond_lock);
return;
}
#endif // !_WIN32
}
while( gRunCount != -gThreadCount );
#if !defined( _WIN32 )
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
return;
}
#endif // !_WIN32
threadPoolInitErr = CL_SUCCESS;
}
#if defined(_MSC_VER)
static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
{
ThreadPool_Init();
return TRUE;
}
#endif
void ThreadPool_Exit(void)
{
int err, count;
gRunCount = CL_INT_MAX;
#if defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
__sync_synchronize();
#elif defined( _MSC_VER )
_mm_mfence();
#else
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
#endif
// spin waiting for threads to die
for (count = 0; 0 != gThreadCount && count < 1000; count++)
{
#if defined( _WIN32 )
_WakeAllConditionVariable( cond_var );
Sleep(1);
#else // !_WIN32
if( (err = pthread_cond_broadcast( &cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
break;
}
usleep(1000);
#endif // !_WIN32
}
if( gThreadCount )
log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
else
log_info( "Thread pool exited in a orderly fashion.\n" );
}
// Blocking API that farms out count jobs to a thread pool.
// It may return with some work undone if func_ptr() returns a non-zero
// result.
//
// This function obviously has its shortcommings. Only one call to ThreadPool_Do
// can be running at a time. It is not intended for general purpose use.
// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
// all available then it would make more sense to use those features.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo )
{
cl_int newErr;
cl_int err = 0;
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
#elif defined (_WIN32)
if (threadpool_init_control == 0) {
#warning This is buggy and race prone. Find a better way.
ThreadPool_Init();
threadpool_init_control = 1;
}
#else //posix platform
err = pthread_once( &threadpool_init_control, ThreadPool_Init );
if( err )
{
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
return err;
}
#endif
// Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
if( threadPoolInitErr )
{
cl_uint currentJob = 0;
cl_int result = CL_SUCCESS;
#if defined(__APPLE__) && defined(__arm__)
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
for( currentJob = 0; currentJob < count; currentJob++ )
if((result = func_ptr( currentJob, 0, userInfo )))
{
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state before leaving
RestoreFPState( &oldMode );
#endif
return result;
}
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state before leaving
RestoreFPState( &oldMode );
#endif
return CL_SUCCESS;
}
if( count >= MAX_COUNT )
{
log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
return -1;
}
// Enter critical region
#if defined( _WIN32 )
EnterCriticalSection( gThreadPoolLock );
#else // !_WIN32
if( (err = pthread_mutex_lock( &gThreadPoolLock )))
{
switch (err)
{
case EDEADLK:
log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
break;
case EINVAL:
log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
break;
default:
break;
}
return err;
}
#endif // !_WIN32
// Start modifying the job state observable by worker threads
#if defined( _WIN32 )
EnterCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_lock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
// That would cause a deadlock.
#if !defined( _WIN32 )
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// Prime the worker threads to get going
jobError = CL_SUCCESS;
gRunCount = gJobCount = count;
gFunc_ptr = func_ptr;
gUserInfo = userInfo;
#if defined( _WIN32 )
_WakeAllConditionVariable( cond_var );
LeaveCriticalSection( cond_lock );
#else // !_WIN32
if( (err = pthread_cond_broadcast( &cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
if((err = pthread_mutex_unlock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// block until they are done. It would be slightly more efficient to do some of the work here though.
do
{
#if defined( _WIN32 )
WaitForSingleObject( caller_event, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
pthread_mutex_unlock( &caller_cond_lock);
goto exit;
}
#endif // !_WIN32
}
while( gRunning );
#if !defined(_WIN32)
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
err = jobError;
exit:
// exit critical region
#if defined( _WIN32 )
LeaveCriticalSection( gThreadPoolLock );
#else // !_WIN32
newErr = pthread_mutex_unlock( &gThreadPoolLock );
if( newErr)
{
log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
return err;
}
#endif // !_WIN32
return err;
}
cl_uint GetThreadCount( void )
{
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
#elif defined (_WIN32)
if (threadpool_init_control == 0) {
#warning This is buggy and race prone. Find a better way.
ThreadPool_Init();
threadpool_init_control = 1;
}
#else
cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
if( err )
{
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
return err;
}
#endif // !_WIN32
if( gThreadCount < 1 )
return 1;
return gThreadCount;
}
#else
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
#endif
//
// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
// of OpenCL API, while also checking
//
// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
// It is not okay to use this for conformance testing!!!
//
// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code.
//
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
{
cl_uint r = *a;
// since this fallback code path is not multithreaded, we just do a regular add here
// If your operating system supports memory-barrier-atomics, use those here
*a = r + b;
return r;
}
// Blocking API that farms out count jobs to a thread pool.
// It may return with some work undone if func_ptr() returns a non-zero
// result.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo )
{
cl_uint currentJob = 0;
cl_int result = CL_SUCCESS;
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
// THIS FUNCTION IS NOT INTENDED FOR USE!!
log_error( "ERROR: Test must be multithreaded!\n" );
exit(-1);
#else
static int spewCount = 0;
if( 0 == spewCount )
{
log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
spewCount = 1;
}
#endif
// The multithreaded code should mimic this behavior:
for( currentJob = 0; currentJob < count; currentJob++ )
if((result = func_ptr( currentJob, 0, userInfo )))
return result;
return CL_SUCCESS;
}
cl_uint GetThreadCount( void )
{
return 1;
}
void SetThreadCount( int count )
{
if( count > 1 )
log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
}
#endif

View File

@@ -0,0 +1,76 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#if defined( __APPLE__ )
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#if defined(__cplusplus)
extern "C" {
#endif
//
// An atomic add operator
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
// Your function prototype
//
// A function pointer to the function you want to execute in a multithreaded context. No
// synchronization primitives are provided, other than the atomic add above. You may not
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
// work, however.
//
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
// the work is done.
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
// Some workitems may not run if a non-zero result is returned from func_ptr().
// This function may not be called from a TPFuncPtr.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo );
// Returns the number of worker threads that underlie the threadpool. The value passed
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
// This is safe to call from a TPFuncPtr.
cl_uint GetThreadCount( void );
// SetThreadCount() may be used to artifically set the number of worker threads
// If the value is 0 (the default) the number of threads will be determined based on
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
// that we still get some testing for thread safety.
//
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
// code will run single threaded, but will report an error to indicate that the test
// is invalid. This option is intended for debugging purposes only. It is suggested
// as a convention that test apps set the thread count to 1 in response to the -m flag.
//
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
void SetThreadCount( int count );
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* THREAD_POOL_H */

View File

@@ -0,0 +1,253 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef test_conformance_clImageHelper_h
#define test_conformance_clImageHelper_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <stdio.h>
#include "errorHelpers.h"
#ifdef __cplusplus
extern "C" {
#endif
// helper function to replace clCreateImage2D , to make the existing code use
// the functions of version 1.2 and veriosn 1.1 respectively
inline cl_mem create_image_2d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_row_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage = NULL;
#ifdef CL_VERSION_1_2
cl_image_desc image_desc_dest;
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
image_desc_dest.image_width = image_width;
image_desc_dest.image_height = image_height;
image_desc_dest.image_depth= 0;// not usedfor 2d
image_desc_dest.image_array_size = 0;// not used for 2d
image_desc_dest.image_row_pitch = image_row_pitch;
image_desc_dest.image_slice_pitch = 0;
image_desc_dest.num_mip_levels = 0;
image_desc_dest.num_samples = 0;
image_desc_dest.buffer = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
#else
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
}
#endif
return mImage;
}
inline cl_mem create_image_3d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_depth,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
#ifdef CL_VERSION_1_2
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
image_desc.image_width = image_width;
image_desc.image_height = image_height;
image_desc.image_depth = image_depth;
image_desc.image_array_size = 0;// not used for one image
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
#else
mImage = clCreateImage3D( context,
flags, image_format,
image_width,
image_height,
image_depth,
image_row_pitch,
image_slice_pitch,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
}
#endif
return mImage;
}
inline cl_mem create_image_2d_array (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_array_size,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
image_desc.image_width = image_width;
image_desc.image_height = image_height;
image_desc.image_depth = 1;
image_desc.image_array_size = image_array_size;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
inline cl_mem create_image_1d_array (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_array_size,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
image_desc.image_width = image_width;
image_desc.image_height = 1;
image_desc.image_depth = 1;
image_desc.image_array_size = image_array_size;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
inline cl_mem create_image_1d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_row_pitch,
void *host_ptr,
cl_mem buffer,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
image_desc.image_width = image_width;
image_desc.image_height = 1;
image_desc.image_depth = 1;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = 0;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = buffer;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,200 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _COMPAT_H_
#define _COMPAT_H_
#if defined(_WIN32) && defined (_MSC_VER)
#include <Windows.h>
#include <Winbase.h>
#include <CL/cl.h>
#include <float.h>
#include <xmmintrin.h>
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
#define isfinite(x) _finite(x)
#if !defined(__cplusplus)
typedef char bool;
#define inline
#else
extern "C" {
#endif
typedef unsigned char uint8_t;
typedef char int8_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#define MAXPATHLEN MAX_PATH
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
#define INFINITY (FLT_MAX + FLT_MAX)
//#define NAN (INFINITY | 1)
//const static int PINFBITPATT_SP32 = INFINITY;
#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif
#define isnan( x ) ((x) != (x))
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
double rint( double x);
float rintf( float x);
long double rintl( long double x);
float cbrtf( float );
double cbrt( double );
int ilogb( double x);
int ilogbf (float x);
int ilogbl(long double x);
double fmax(double x, double y);
double fmin(double x, double y);
float fmaxf( float x, float y );
float fminf(float x, float y);
double log2(double x);
long double log2l(long double x);
double exp2(double x);
long double exp2l(long double x);
double fdim(double x, double y);
float fdimf(float x, float y);
long double fdiml(long double x, long double y);
double remquo( double x, double y, int *quo);
float remquof( float x, float y, int *quo);
long double remquol( long double x, long double y, int *quo);
long double scalblnl(long double x, long n);
inline long long
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
// end of math functions
uint64_t ReadTime( void );
double SubtractTime( uint64_t endTime, uint64_t startTime );
#define sleep(X) Sleep(1000*X)
#define snprintf sprintf_s
//#define hypotl _hypot
float make_nan();
float nanf( const char* str);
double nan( const char* str);
long double nanl( const char* str);
//#if defined USE_BOOST
//#include <boost/math/tr1.hpp>
//double hypot(double x, double y);
float hypotf(float x, float y);
long double hypotl(long double x, long double y) ;
double lgamma(double x);
float lgammaf(float x);
double trunc(double x);
float truncf(float x);
double log1p(double x);
float log1pf(float x);
long double log1pl(long double x);
double copysign(double x, double y);
float copysignf(float x, float y);
long double copysignl(long double x, long double y);
long lround(double x);
long lroundf(float x);
//long lroundl(long double x)
double round(double x);
float roundf(float x);
long double roundl(long double x);
int signbit(double x);
int signbitf(float x);
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); }
//#endif // USE_BOOST
long int lrint (double flt);
long int lrintf (float flt);
float int2float (int32_t ix);
int32_t float2int (float fx);
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
int __builtin_clz(unsigned int pattern);
static const double zero= 0.00000000000000000000e+00;
#define NAN (INFINITY - INFINITY)
#define HUGE_VALF (float)HUGE_VAL
int usleep(int usec);
// reimplement fenv.h because windows doesn't have it
#define FE_INEXACT 0x0020
#define FE_UNDERFLOW 0x0010
#define FE_OVERFLOW 0x0008
#define FE_DIVBYZERO 0x0004
#define FE_INVALID 0x0001
#define FE_ALL_EXCEPT 0x003D
int fetestexcept(int excepts);
int feclearexcept(int excepts);
#ifdef __cplusplus
}
#endif
#else // !((defined(_WIN32) && defined(_MSC_VER)
#if defined(__MINGW32__)
#include <windows.h>
#define sleep(X) Sleep(1000*X)
#endif
#define MAKE_HEX_FLOAT(x,y,z) x
#define MAKE_HEX_DOUBLE(x,y,z) x
#define MAKE_HEX_LONG(x,y,z) x
#endif // !((defined(_WIN32) && defined(_MSC_VER)
#endif // _COMPAT_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,127 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _conversions_h
#define _conversions_h
#include "errorHelpers.h"
#include "mt19937.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>
#include <string.h>
#include <sys/types.h>
#include "compat.h"
#if defined(__cplusplus)
extern "C" {
#endif
/* Note: the next three all have to match in size and order!! */
enum ExplicitTypes
{
kBool = 0,
kChar,
kUChar,
kUnsignedChar,
kShort,
kUShort,
kUnsignedShort,
kInt,
kUInt,
kUnsignedInt,
kLong,
kULong,
kUnsignedLong,
kFloat,
kHalf,
kDouble,
kNumExplicitTypes
};
typedef enum ExplicitTypes ExplicitType;
enum RoundingTypes
{
kRoundToEven = 0,
kRoundToZero,
kRoundToPosInf,
kRoundToNegInf,
kRoundToNearest,
kNumRoundingTypes,
kDefaultRoundingType = kRoundToNearest
};
typedef enum RoundingTypes RoundingType;
extern void print_type_to_string(ExplicitType type, void *data, char* string);
extern size_t get_explicit_type_size( ExplicitType type );
extern const char * get_explicit_type_name( ExplicitType type );
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
extern float read_as_float( void *inRaw, ExplicitType inType );
extern float get_random_float(float low, float high, MTdata d);
extern double get_random_double(double low, double high, MTdata d);
extern float any_float( MTdata d );
extern double any_double( MTdata d );
extern int random_in_range( int minV, int maxV, MTdata d );
size_t get_random_size_t(size_t low, size_t high, MTdata d);
// Note: though this takes a double, this is for use with single precision tests
static inline int IsFloatSubnormal( float x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ float d; uint32_t u;}u;
u.d = fabsf(x);
return (u.u-1) < 0x007fffffU;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) FLT_MIN && x != 0.0;
#endif
}
static inline int IsDoubleSubnormal( double x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ double d; uint64_t u;}u;
u.d = fabs( x);
return (u.u-1) < 0x000fffffffffffffULL;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) DBL_MIN && x != 0.0;
#endif
}
#if defined(__cplusplus)
}
#endif
#endif // _conversions_h

View File

@@ -0,0 +1,585 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <math.h>
#include <float.h>
#include <string.h>
#include "errorHelpers.h"
#include "compat.h"
const char *IGetErrorString( int clErrorCode )
{
switch( clErrorCode )
{
case CL_SUCCESS: return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
default: return "(unknown)";
}
}
const char *GetChannelOrderName( cl_channel_order order )
{
switch( order )
{
case CL_R: return "CL_R";
case CL_A: return "CL_A";
case CL_Rx: return "CL_Rx";
case CL_RG: return "CL_RG";
case CL_RA: return "CL_RA";
case CL_RGx: return "CL_RGx";
case CL_RGB: return "CL_RGB";
case CL_RGBx: return "CL_RGBx";
case CL_RGBA: return "CL_RGBA";
case CL_ARGB: return "CL_ARGB";
case CL_BGRA: return "CL_BGRA";
case CL_INTENSITY: return "CL_INTENSITY";
case CL_LUMINANCE: return "CL_LUMINANCE";
#if defined CL_1RGB_APPLE
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
#endif
#if defined CL_BGR1_APPLE
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
#endif
default: return NULL;
}
}
int IsChannelOrderSupported( cl_channel_order order )
{
switch( order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_RG:
case CL_RA:
case CL_RGx:
case CL_RGB:
case CL_RGBx:
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
case CL_INTENSITY:
case CL_LUMINANCE:
return 1;
#if defined CL_1RGB_APPLE
case CL_1RGB_APPLE:
return 1;
#endif
#if defined CL_BGR1_APPLE
case CL_BGR1_APPLE:
return 1;
#endif
default:
return 0;
}
}
const char *GetChannelTypeName( cl_channel_type type )
{
switch( type )
{
case CL_SNORM_INT8: return "CL_SNORM_INT8";
case CL_SNORM_INT16: return "CL_SNORM_INT16";
case CL_UNORM_INT8: return "CL_UNORM_INT8";
case CL_UNORM_INT16: return "CL_UNORM_INT16";
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
case CL_FLOAT: return "CL_FLOAT";
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
#endif
default: return NULL;
}
}
int IsChannelTypeSupported( cl_channel_type type )
{
switch( type )
{
case CL_SNORM_INT8:
case CL_SNORM_INT16:
case CL_UNORM_INT8:
case CL_UNORM_INT16:
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
case CL_UNORM_INT_101010:
case CL_SIGNED_INT8:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT8:
case CL_UNSIGNED_INT16:
case CL_UNSIGNED_INT32:
case CL_HALF_FLOAT:
case CL_FLOAT:
return 1;
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
return 1;
#endif
default:
return 0;
}
}
const char *GetAddressModeName( cl_addressing_mode mode )
{
switch( mode )
{
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
default: return NULL;
}
}
const char *GetDeviceTypeName( cl_device_type type )
{
switch( type )
{
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
default: return NULL;
}
}
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
{
static char scratch[ 1024 ];
size_t i, j;
if( buffer == NULL )
buffer = scratch;
unsigned char *p = (unsigned char *)dataBuffer;
char *bPtr;
buffer[ 0 ] = 0;
bPtr = buffer;
for( i = 0; i < vecSize; i++ )
{
if( i > 0 )
{
bPtr[ 0 ] = ' ';
bPtr++;
}
for( j = 0; j < typeSize; j++ )
{
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
bPtr += 2;
}
p += typeSize;
}
bPtr[ 0 ] = 0;
return buffer;
}
#ifndef MAX
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
#endif
#if defined( _MSC_VER )
#define scalbnf(_a, _i ) ldexpf( _a, _i )
#define scalbn(_a, _i ) ldexp( _a, _i )
#define scalbnl(_a, _i ) ldexpl( _a, _i )
#endif
static float Ulp_Error_Half_Float( float test, double reference );
static inline float half2float( cl_ushort half );
// taken from math tests
#define HALF_MIN_EXP -13
#define HALF_MANT_DIG 11
static float Ulp_Error_Half_Float( float test, double reference )
{
union{ double d; uint64_t u; }u; u.d = reference;
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
double testVal = test;
if( u.u & 0x000fffffffffffffULL )
{ // Non-power of two and NaN
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
if( isinf( reference ) )
{
if( (double) test == reference )
return 0.0f;
return (float) (testVal - reference );
}
// reference is a normal power of two or a zero
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
// Taken from vLoadHalf test
static inline float half2float( cl_ushort us )
{
uint32_t u = us;
uint32_t sign = (u << 16) & 0x80000000;
int32_t exponent = (u & 0x7c00) >> 10;
uint32_t mantissa = (u & 0x03ff) << 13;
union{ unsigned int u; float f;}uu;
if( exponent == 0 )
{
if( mantissa == 0 )
return sign ? -0.0f : 0.0f;
int shift = __builtin_clz( mantissa ) - 8;
exponent -= shift-1;
mantissa <<= shift;
mantissa &= 0x007fffff;
}
else
if( exponent == 31)
{
uu.u = mantissa | sign;
if( mantissa )
uu.u |= 0x7fc00000;
else
uu.u |= 0x7f800000;
return uu.f;
}
exponent += 127 - 15;
exponent <<= 23;
exponent |= mantissa;
uu.u = exponent | sign;
return uu.f;
}
float Ulp_Error_Half( cl_ushort test, float reference )
{
return Ulp_Error_Half_Float( half2float(test), reference );
}
float Ulp_Error( float test, double reference )
{
union{ double d; uint64_t u; }u; u.d = reference;
double testVal = test;
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
if( isinf( reference ) )
{
if( testVal == reference )
return 0.0f;
return (float) (testVal - reference );
}
if( isinf( testVal) )
{ // infinite test value, but finite (but possibly overflowing in float) reference.
//
// The function probably overflowed prematurely here. Formally, the spec says this is
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
// that the internal precision of some half_pow implementations would have to be 29+ bits
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
// after rounding to single is 4*32 = 128, which will ultimately result in premature
// overflow, even though a good faith representation would be correct to within 2**-29
// interally.
// In the interest of not requiring the implementation go to extraordinary lengths to
// deliver a half precision function, we allow premature overflow within the limit
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
// 2**128, the next value that would appear in the number line if float had sufficient range.
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
// Note that the same hack may not work in long double, which is not guaranteed to have
// more range than double. It is not clear that premature overflow should be tolerated for
// double.
}
if( u.u & 0x000fffffffffffffULL )
{ // Non-power of two and NaN
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
float Ulp_Error_Double( double test, long double reference )
{
// Deal with long double = double
// On most systems long double is a higher precision type than double. They provide either
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
// That is sufficient to represent the accuracy of a floating point result to many more bits
// than double and we can calculate sub-ulp errors. This is the standard system for which this
// test suite is designed.
//
// On some systems double and long double are the same thing. Then we run into a problem,
// because our representation of the infinitely precise result (passed in as reference above)
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
// reported error by half an ulp to take this into account. A more correct and permanent fix
// would be to undertake refactoring the reference code to return results in this format:
//
// typedef struct DoubleReference
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
// double correctlyRoundedResult; // as best we can
// double ulps; // plus a fractional amount to account for the difference
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
//
// This would provide a useful higher-than-double precision format for everyone that we can use,
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
// that use a head to tail double double for long double.
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
int x;
long double testVal = test;
if( 0.5L != frexpl( reference, &x) )
{ // Non-power of two and NaN
if( isinf( reference ) )
{
if( testVal == reference )
return 0.0f;
return (float) ( testVal - reference );
}
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
// Scale the exponent of the error
float result = (float) scalbnl( testVal - reference, ulp_exp );
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
if( sizeof(long double) == sizeof( double ) )
result += copysignf( 0.5f, result);
return result;
}
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
// Scale the exponent of the error
float result = (float) scalbnl( testVal - reference, ulp_exp );
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
if( sizeof(long double) == sizeof( double ) )
result += copysignf( 0.5f, result);
return result;
}
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
{
int error;
size_t size_ret;
// Does the program object exist?
if (program != NULL) {
// Was the number of devices given
if (num_devices == 0) {
// If zero devices were specified then allocate and query the device list from the context
cl_context context;
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
test_error( error, "Unable to query program's context" );
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
test_error( error, "Unable to query context's device size" );
num_devices = size_ret / sizeof(cl_device_id);
device_list = (cl_device_id *) malloc(size_ret);
if (device_list == NULL) {
print_error( error, "malloc failed" );
return CL_OUT_OF_HOST_MEMORY;
}
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
test_error( error, "Unable to query context's devices" );
}
// For each device in the device_list
unsigned int i;
for (i = 0; i < num_devices; i++) {
// Get the build status
cl_build_status build_status;
error = clGetProgramBuildInfo(program,
device_list[i],
CL_PROGRAM_BUILD_STATUS,
sizeof(build_status),
&build_status,
&size_ret);
test_error( error, "Unable to query build status" );
// If the build failed then log the status, and allocate the build log, log it and free it
if (build_status != CL_BUILD_SUCCESS) {
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
test_error( error, "Unable to query build log size" );
char *build_log = (char *) malloc(size_ret);
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
test_error( error, "Unable to query build log" );
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
free(build_log);
}
}
// Was the number of devices given
if (num_devices == 0) {
// If zero devices were specified then free the device list
free(device_list);
}
}
return CL_SUCCESS;
}

View File

@@ -0,0 +1,149 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _errorHelpers_h
#define _errorHelpers_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#define LOWER_IS_BETTER 0
#define HIGHER_IS_BETTER 1
// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
// functions as described below. This is helpful for integration into an automated testing
// system.
#if USE_ATF
// export BUILD_WITH_ATF=1
#include <ATF/ATF.h>
#define test_start() ATFTestStart()
#define log_info ATFLogInfo
#define log_error ATFLogError
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
#define test_finish() ATFTestFinish()
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
#define vlog ATFLogInfo
#define vlog_error ATFLogError
#else
#define test_start()
#define log_info printf
#define log_error printf
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
_higherBetter?"higher is better":"lower is better", _number )
#define test_finish()
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
_higherBetter?"higher is better":"lower is better" , _number)
#ifdef _WIN32
#ifdef __MINGW32__
// Use __mingw_printf since it supports "%a" format specifier
#define vlog __mingw_printf
#define vlog_error __mingw_printf
#else
// Use home-baked function that treats "%a" as "%f"
static int vlog_win32(const char *format, ...);
#define vlog vlog_win32
#define vlog_error vlog_win32
#endif
#else
#define vlog_error printf
#define vlog printf
#endif
#endif
#define ct_assert(b) ct_assert_i(b, __LINE__)
#define ct_assert_i(b, line) ct_assert_ii(b, line)
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
// expected error code vs. what we got
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
extern const char *IGetErrorString( int clErrorCode );
extern float Ulp_Error_Half( cl_ushort test, float reference );
extern float Ulp_Error( float test, double reference );
extern float Ulp_Error_Double( double test, long double reference );
extern const char *GetChannelTypeName( cl_channel_type type );
extern int IsChannelTypeSupported( cl_channel_type type );
extern const char *GetChannelOrderName( cl_channel_order order );
extern int IsChannelOrderSupported( cl_channel_order order );
extern const char *GetAddressModeName( cl_addressing_mode mode );
extern const char *GetDeviceTypeName( cl_device_type type );
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
#if defined (_WIN32) && !defined(__MINGW32__)
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
static int vlog_win32(const char *format, ...)
{
const char *new_format = format;
if (strstr(format, "%a")) {
char *temp;
if ((temp = strdup(format)) == NULL) {
printf("vlog_win32: Failed to allocate memory for strdup\n");
return -1;
}
new_format = temp;
while (*temp) {
// replace %a with %f
if ((*temp == '%') && (*(temp+1) == 'a')) {
*(temp+1) = 'f';
}
temp++;
}
}
va_list args;
va_start(args, format);
vprintf(new_format, args);
va_end(args);
if (new_format != format) {
free((void*)new_format);
}
return 0;
}
#endif
#ifdef __cplusplus
}
#endif
#endif // _errorHelpers_h

View File

@@ -0,0 +1,89 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
// in integer code, and have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
typedef int FPU_mode_type;
#if defined( __i386__ ) || defined( __x86_64__ )
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
static inline void ForceFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode | 0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
#else
#error ForceFTZ needs an implentation
#endif
}
// Disable the denorm flush to zero
static inline void DisableFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode & ~0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
#else
#error DisableFTZ needs an implentation
#endif
}
// Restore the reference hardware to floating point state indicated by *mode
static inline void RestoreFPState( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
_mm_setcsr( *mode );
#elif defined( __PPC__)
fpu_control = *mode;
#elif defined (__arm__)
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
#else
#error RestoreFPState needs an implementation
#endif
}
#else
#error ForceFTZ and RestoreFPState need implentations
#endif
#endif

View File

@@ -0,0 +1,53 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "genericThread.h"
#if defined(_WIN32)
#include <windows.h>
#else // !_WIN32
#include <pthread.h>
#endif
void * genericThread::IStaticReflector( void * data )
{
genericThread *t = (genericThread *)data;
return t->IRun();
}
bool genericThread::Start( void )
{
#if defined(_WIN32)
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
return ( mHandle != NULL );
#else // !_WIN32
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
return ( error == 0 );
#endif // !_WIN32
}
void * genericThread::Join( void )
{
#if defined(_WIN32)
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
return NULL;
#else // !_WIN32
void * retVal;
int error = pthread_join( (pthread_t)mHandle, &retVal );
if( error != 0 )
retVal = NULL;
return retVal;
#endif // !_WIN32
}

View File

@@ -0,0 +1,42 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _genericThread_h
#define _genericThread_h
#include <stdio.h>
class genericThread
{
public:
virtual ~genericThread() {}
bool Start( void );
void * Join( void );
protected:
virtual void * IRun( void ) = 0;
private:
void* mHandle;
static void * IStaticReflector( void * data );
};
#endif // _genericThread_h

View File

@@ -0,0 +1,249 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "imageHelpers.h"
size_t get_format_type_size( const cl_image_format *format )
{
return get_channel_data_type_size( format->image_channel_data_type );
}
size_t get_channel_data_type_size( cl_channel_type channelType )
{
switch( channelType )
{
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
return 1;
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
case CL_HALF_FLOAT:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return sizeof( cl_short );
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
return sizeof( cl_int );
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_SHORT_565_REV:
case CL_UNORM_SHORT_555_REV:
#endif
return 2;
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_8888:
case CL_UNORM_INT_8888_REV:
return 4;
#endif
case CL_UNORM_INT_101010:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_101010_REV:
#endif
return 4;
case CL_FLOAT:
return sizeof( cl_float );
default:
return 0;
}
}
size_t get_format_channel_count( const cl_image_format *format )
{
return get_channel_order_channel_count( format->image_channel_order );
}
size_t get_channel_order_channel_count( cl_channel_order order )
{
switch( order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_INTENSITY:
case CL_LUMINANCE:
return 1;
case CL_RG:
case CL_RA:
case CL_RGx:
return 2;
case CL_RGB:
case CL_RGBx:
return 3;
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
#ifdef CL_1RGB_APPLE
case CL_1RGB_APPLE:
#endif
#ifdef CL_BGR1_APPLE
case CL_BGR1_APPLE:
#endif
return 4;
default:
return 0;
}
}
int is_format_signed( const cl_image_format *format )
{
switch( format->image_channel_data_type )
{
case CL_SNORM_INT8:
case CL_SIGNED_INT8:
case CL_SNORM_INT16:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_HALF_FLOAT:
case CL_FLOAT:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return 1;
default:
return 0;
}
}
size_t get_pixel_size( cl_image_format *format )
{
switch( format->image_channel_data_type )
{
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
return get_format_channel_count( format );
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
case CL_HALF_FLOAT:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return get_format_channel_count( format ) * sizeof( cl_ushort );
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
return get_format_channel_count( format ) * sizeof( cl_int );
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_SHORT_565_REV:
case CL_UNORM_SHORT_555_REV:
#endif
return 2;
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_8888:
case CL_UNORM_INT_8888_REV:
return 4;
#endif
case CL_UNORM_INT_101010:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_101010_REV:
#endif
return 4;
case CL_FLOAT:
return get_format_channel_count( format ) * sizeof( cl_float );
default:
return 0;
}
}
int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
{
cl_image_format formatList[ 128 ];
unsigned int outFormatCount, i;
int error;
/* Make sure each image format is supported */
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
return error;
/* Look for one that is an 8-bit format */
for( i = 0; i < outFormatCount; i++ )
{
if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
{
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
{
*outFormat = formatList[ i ];
return 0;
}
}
}
return -1;
}
int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
{
cl_image_format formatList[ 128 ];
unsigned int outFormatCount, i;
int error;
/* Make sure each image format is supported */
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
return error;
/* Look for one that is an 8-bit format */
for( i = 0; i < outFormatCount; i++ )
{
if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
formatList[ i ].image_channel_data_type == CL_FLOAT ||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
{
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
{
*outFormat = formatList[ i ];
return 0;
}
}
}
return -1;
}

View File

@@ -0,0 +1,37 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _imageHelpers_h
#define _imageHelpers_h
#include "errorHelpers.h"
extern size_t get_format_type_size( const cl_image_format *format );
extern size_t get_channel_data_type_size( cl_channel_type channelType );
extern size_t get_format_channel_count( const cl_image_format *format );
extern size_t get_channel_order_channel_count( cl_channel_order order );
extern int is_format_signed( const cl_image_format *format );
extern size_t get_pixel_size( cl_image_format *format );
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
#endif // _imageHelpers_h

View File

@@ -0,0 +1,684 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "kernelHelpers.h"
#include "errorHelpers.h"
#include "imageHelpers.h"
#if defined(__MINGW32__)
#include "mingw_compat.h"
#endif
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
{
int error = CL_SUCCESS;
/* Create the program object from source */
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
if( *outProgram == NULL || error != CL_SUCCESS)
{
print_error( error, "clCreateProgramWithSource failed" );
return error;
}
/* Compile the program */
int buildProgramFailed = 0;
int printedSource = 0;
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
if (error != CL_SUCCESS)
{
unsigned int i;
print_error(error, "clBuildProgram failed");
buildProgramFailed = 1;
printedSource = 1;
log_error( "Original source is: ------------\n" );
for( i = 0; i < numKernelLines; i++ )
log_error( "%s", kernelProgram[ i ] );
}
// Verify the build status on all devices
cl_uint deviceCount = 0;
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
if (error != CL_SUCCESS) {
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
return error;
}
if (deviceCount == 0) {
log_error("No devices found for program.\n");
return -1;
}
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
if( NULL == devices )
return -1;
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
if (error != CL_SUCCESS) {
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
free( devices );
return error;
}
cl_uint z;
for( z = 0; z < deviceCount; z++ )
{
char deviceName[4096] = "";
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
if (error != CL_SUCCESS || deviceName[0] == '\0') {
log_error("Device \"%d\" failed to return a name\n", z);
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
}
cl_build_status buildStatus;
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
if (error != CL_SUCCESS) {
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
free( devices );
return error;
}
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
char log[10240] = "";
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
char statusString[64] = "";
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
sprintf(statusString, "CL_BUILD_SUCCESS");
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
sprintf(statusString, "CL_BUILD_NONE");
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
sprintf(statusString, "CL_BUILD_ERROR");
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
else
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
if (error != CL_SUCCESS || log[0]=='\0'){
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
if (error) {
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
free( devices );
return error;
} else {
log_error("clGetProgramBuildInfo returned an empty log.\n");
free( devices );
return -1;
}
}
// In this case we've already printed out the code above.
if (!printedSource)
{
unsigned int i;
log_error( "Original source is: ------------\n" );
for( i = 0; i < numKernelLines; i++ )
log_error( "%s", kernelProgram[ i ] );
printedSource = 1;
}
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
log_error( "%s\n", log );
log_error( "\n----------\n" );
free( devices );
return -1;
}
}
/* And create a kernel from it */
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
if( *outKernel == NULL || error != CL_SUCCESS)
{
print_error( error, "Unable to create kernel" );
free( devices );
return error;
}
free( devices );
return 0;
}
int get_device_version( cl_device_id id, size_t* major, size_t* minor)
{
cl_char buffer[ 4098 ];
size_t length;
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get device version string" );
char *p1 = (char *)buffer + strlen( "OpenCL " );
char *p2;
while( *p1 == ' ' )
p1++;
*major = strtol( p1, &p2, 10 );
error = *p2 != '.';
test_error(error, "ERROR: Version number must contain a decimal point!");
*minor = strtol( ++p2, NULL, 10 );
return error;
}
int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
{
cl_device_id *devices;
size_t size, maxCommonSize = 0;
int numDevices, i, j, error;
cl_uint numDims;
size_t outSize;
size_t sizeLimit[]={1,1,1};
/* Assume fewer than 16 devices will be returned */
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
test_error( error, "Unable to obtain list of devices size for context" );
devices = (cl_device_id *)malloc(outSize);
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
test_error( error, "Unable to obtain list of devices for context" );
numDevices = (int)( outSize / sizeof( cl_device_id ) );
for( i = 0; i < numDevices; i++ )
{
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
test_error( error, "Unable to obtain max work group size for device" );
if( size < maxCommonSize || maxCommonSize == 0)
maxCommonSize = size;
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
if( size < maxCommonSize || maxCommonSize == 0)
maxCommonSize = size;
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
sizeLimit[0] = 1;
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
if (outLimits != NULL)
{
if (i == 0) {
for (j=0; j<3; j++)
outLimits[j] = sizeLimit[j];
} else {
for (j=0; j<(int)numDims; j++) {
if (sizeLimit[j] < outLimits[j])
outLimits[j] = sizeLimit[j];
}
}
}
}
free(devices);
*outMaxSize = (unsigned int)maxCommonSize;
return 0;
}
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
size_t globalThreadSize, size_t *outMaxSize )
{
size_t sizeLimit[3];
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
if( error != 0 )
return error;
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
the modulo test will succeed and break the loop anyway */
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
;
return 0;
}
int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
size_t *globalThreadSizes, size_t *outMaxSizes )
{
size_t sizeLimit[3];
size_t maxSize;
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
if( error != 0 )
return error;
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
sizes */
/* Simple case */
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
{
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
return 0;
}
}
size_t remainingSize, sizeForThisOne;
remainingSize = maxSize;
int i, j;
for (i=0 ; i<2; i++) {
if (globalThreadSizes[i] > remainingSize)
sizeForThisOne = remainingSize;
else
sizeForThisOne = globalThreadSizes[i];
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
outMaxSizes[i] = sizeForThisOne;
remainingSize = maxSize;
for (j=0; j<=i; j++)
remainingSize /=outMaxSizes[j];
}
return 0;
}
int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
size_t *globalThreadSizes, size_t *outMaxSizes )
{
size_t sizeLimit[3];
size_t maxSize;
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
if( error != 0 )
return error;
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
sizes */
/* Simple case */
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
{
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
return 0;
}
}
size_t remainingSize, sizeForThisOne;
remainingSize = maxSize;
int i, j;
for (i=0 ; i<3; i++) {
if (globalThreadSizes[i] > remainingSize)
sizeForThisOne = remainingSize;
else
sizeForThisOne = globalThreadSizes[i];
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
outMaxSizes[i] = sizeForThisOne;
remainingSize = maxSize;
for (j=0; j<=i; j++)
remainingSize /=outMaxSizes[j];
}
return 0;
}
/* Helper to determine if an extension is supported by a device */
int is_extension_available( cl_device_id device, const char *extensionName )
{
char *extString;
size_t size = 0;
int err;
int result = 0;
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
{
log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
return 0;
}
if( 0 == size )
return 0;
extString = (char*) malloc( size );
if( NULL == extString )
{
log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__, err );
return 0;
}
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
{
log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
free( extString );
return 0;
}
if( strstr( extString, extensionName ) )
result = 1;
free( extString );
return result;
}
/* Helper to determine if a device supports an image format */
int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
{
cl_image_format *list;
cl_uint count = 0;
cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
if( count == 0 )
return 0;
list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
if( NULL == list )
{
log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__, err );
return 0;
}
cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
if( error )
{
log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
free( list );
return 0;
}
// iterate looking for a match.
cl_uint i;
for( i = 0; i < count; i++ )
{
if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
fmt->image_channel_order == list[ i ].image_channel_order )
break;
}
free( list );
return ( i < count ) ? true : false;
}
size_t get_pixel_bytes( const cl_image_format *fmt );
size_t get_pixel_bytes( const cl_image_format *fmt )
{
size_t chanCount;
switch( fmt->image_channel_order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_INTENSITY:
case CL_LUMINANCE:
chanCount = 1;
break;
case CL_RG:
case CL_RA:
case CL_RGx:
chanCount = 2;
break;
case CL_RGB:
case CL_RGBx:
chanCount = 3;
break;
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
#ifdef CL_1RGB_APPLE
case CL_1RGB_APPLE:
#endif
#ifdef CL_BGR1_APPLE
case CL_BGR1_APPLE:
#endif
chanCount = 4;
break;
default:
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
abort();
break;
}
switch( fmt->image_channel_data_type )
{
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
return 2;
case CL_UNORM_INT_101010:
return 4;
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
return chanCount;
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_HALF_FLOAT:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return chanCount * 2;
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
case CL_FLOAT:
return chanCount * 4;
default:
log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
abort();
}
return 0;
}
int verifyImageSupport( cl_device_id device )
{
if( checkForImageSupport( device ) )
{
log_error( "ERROR: Device does not supported images as required by this test!\n" );
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
return 0;
}
int checkForImageSupport( cl_device_id device )
{
cl_uint i;
int error;
/* Check the device props to see if images are supported at all first */
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
test_error( error, "Unable to query device for image support" );
if( i == 0 )
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
/* So our support is good */
return 0;
}
int checkFor3DImageSupport( cl_device_id device )
{
cl_uint i;
int error;
/* Check the device props to see if images are supported at all first */
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
test_error( error, "Unable to query device for image support" );
if( i == 0 )
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
char profile[128];
error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
{
size_t width = -1L;
size_t height = -1L;
size_t depth = -1L;
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
if( 0 == (height | width | depth ))
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
/* So our support is good */
return 0;
}
void * align_malloc(size_t size, size_t alignment)
{
#if defined(_WIN32) && defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
void * ptr = NULL;
if (0 == posix_memalign(&ptr, alignment, size))
return ptr;
return NULL;
#elif defined(__MINGW32__)
return __mingw_aligned_malloc(size, alignment);
#else
#error "Please add support OS for aligned malloc"
#endif
}
void align_free(void * ptr)
{
#if defined(_WIN32) && defined(_MSC_VER)
_aligned_free(ptr);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
return free(ptr);
#elif defined(__MINGW32__)
return __mingw_aligned_free(ptr);
#else
#error "Please add support OS for aligned free"
#endif
}
size_t get_min_alignment(cl_context context)
{
static cl_uint align_size = 0;
if( 0 == align_size )
{
cl_device_id * devices;
size_t devices_size = 0;
cl_uint result = 0;
cl_int error;
int i;
error = clGetContextInfo (context,
CL_CONTEXT_DEVICES,
0,
NULL,
&devices_size);
test_error_ret(error, "clGetContextInfo failed", 0);
devices = (cl_device_id*)malloc(devices_size);
if (devices == NULL) {
print_error( error, "malloc failed" );
return 0;
}
error = clGetContextInfo (context,
CL_CONTEXT_DEVICES,
devices_size,
(void*)devices,
NULL);
test_error_ret(error, "clGetContextInfo failed", 0);
for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
{
cl_uint alignment = 0;
error = clGetDeviceInfo (devices[i],
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(cl_uint),
(void*)&alignment,
NULL);
if (error == CL_SUCCESS)
{
alignment >>= 3; // convert bits to bytes
result = (alignment > result) ? alignment : result;
}
else
print_error( error, "clGetDeviceInfo failed" );
}
align_size = result;
free(devices);
}
return align_size;
}
cl_device_fp_config get_default_rounding_mode( cl_device_id device )
{
char profileStr[128] = "";
cl_device_fp_config single = 0;
int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
if( error )
test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
if( single & CL_FP_ROUND_TO_NEAREST )
return CL_FP_ROUND_TO_NEAREST;
if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
// Make sure we are an embedded device before allowing a pass
if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
return CL_FP_ROUND_TO_ZERO;
}
int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
{
cl_command_queue_properties realProps;
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
return ( realProps & prop ) ? 1 : 0;
}
int printDeviceHeader( cl_device_id device )
{
char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
int error;
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
( error == CL_SUCCESS ) ? cLangVersion : "" );
return CL_SUCCESS;
}

View File

@@ -0,0 +1,131 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _kernelHelpers_h
#define _kernelHelpers_h
#include <stdio.h>
#include <stdlib.h>
#if defined (__MINGW32__)
#include <malloc.h>
#endif
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <math.h>
#include <string.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/*
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
*
* const char *source = {
* INIT_OPENCL_DEBUG_INFO
* "__kernel void foo( int x )\n"
* "{\n"
* " ...\n"
* "}\n"
* };
*/
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
#ifndef STRINGIFY_VALUE
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
#endif
#ifndef STRINGIFY
#define STRINGIFY(_x) #_x
#endif
/* Helper that creates a single program and kernel from a single-kernel program source */
extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
/* Helper to get major/minor number for a device */
extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
/* Helper to determine if an extension is supported by a device */
extern int is_extension_available( cl_device_id device, const char *extensionName );
/* Helper to determine if a device supports an image format */
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
/* Helper to get pixel size for a pixel format */
size_t get_pixel_bytes( const cl_image_format *fmt );
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
extern int verifyImageSupport( cl_device_id device );
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
extern int checkForImageSupport( cl_device_id device );
extern int checkFor3DImageSupport( cl_device_id device );
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
/* Helper for aligned memory allocation */
void * align_malloc(size_t size, size_t alignment);
void align_free(void *);
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
size_t get_min_alignment(cl_context context);
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
if( checkForImageSupport( device ) ) \
{ \
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
return 0; \
}
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
if( checkFor3DImageSupport( device ) ) \
{ \
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
return 0; \
}
/* Prints out the standard device header for all tests given the device to print for */
extern int printDeviceHeader( cl_device_id device );
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // _kernelHelpers_h

View File

@@ -0,0 +1,59 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#if defined(__MINGW32__)
#include "mingw_compat.h"
#include <stdio.h>
#include <string.h>
//This function is unavailable on various mingw compilers,
//especially 64 bit so implementing it here
const char *basename_dot=".";
char*
basename(char *path)
{
char *p = path, *b = NULL;
int len = strlen(path);
if (path == NULL) {
return (char*)basename_dot;
}
// Not absolute path on windows
if (path[1] != ':') {
return path;
}
// Trim trailing path seperators
if (path[len - 1] == '\\' ||
path[len - 1] == '/' ) {
len--;
path[len] = '\0';
}
while (len) {
while((*p != '\\' || *p != '/') && len) {
p++;
len--;
}
p++;
b = p;
}
return b;
}
#endif

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef MINGW_COMPAT_H
#define MINGW_COMPAT_H
#if defined(__MINGW32__)
char *basename(char *path);
#include <malloc.h>
#if defined(__MINGW64__)
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
#define __mingw_aligned_malloc _aligned_malloc
#define __mingw_aligned_free _aligned_free
#include <stddef.h>
#endif //(__MINGW64__)
#endif //(__MINGW32__)
#endif // MINGW_COMPAT_H

749
test_common/harness/msvc9.c Normal file
View File

@@ -0,0 +1,749 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#if defined(_WIN32) && defined (_MSC_VER)
#include "compat.h"
#include <math.h>
#include <float.h>
#include <assert.h>
#include <CL/cl_platform.h>
///////////////////////////////////////////////////////////////////
//
// rint, rintf
//
///////////////////////////////////////////////////////////////////
float copysignf( float x, float y )
{
union{ cl_uint u; float f; }ux, uy;
ux.f = x;
uy.f = y;
ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
return ux.f;
}
double copysign( double x, double y )
{
union{ cl_ulong u; double f; }ux, uy;
ux.f = x;
uy.f = y;
ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
return ux.f;
}
long double copysignl( long double x, long double y )
{
union
{
long double f;
struct{ cl_ulong m; cl_ushort sexp; }u;
}ux, uy;
ux.f = x;
uy.f = y;
ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
return ux.f;
}
float rintf(float x)
{
float absx = fabsf(x);
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
x = copysignf( rounded, x );
}
return x;
}
double rint(double x)
{
double absx = fabs(x);
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
x = copysign( rounded, x );
}
return x;
}
long double rintl(long double x)
{
double absx = fabs(x);
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
{
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
long double rounded = x + magic;
rounded -= magic;
x = copysignl( rounded, x );
}
return x;
}
///////////////////////////////////////////////////////////////////
//
// ilogb, ilogbf, ilogbl
//
///////////////////////////////////////////////////////////////////
#ifndef FP_ILOGB0
#define FP_ILOGB0 INT_MIN
#endif
#ifndef FP_ILOGBNAN
#define FP_ILOGBNAN INT_MIN
#endif
int ilogb (double x)
{
union{ double f; cl_ulong u;} u;
u.f = x;
cl_ulong absx = u.u & CL_LONG_MAX;
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
{
switch( absx )
{
case 0:
return FP_ILOGB0;
case 0x7ff0000000000000ULL:
return INT_MAX;
default:
if( absx > 0x7ff0000000000000ULL )
return FP_ILOGBNAN;
// subnormal
u.u = absx | 0x3ff0000000000000ULL;
u.f -= 1.0;
return (u.u >> 52) - (1023 + 1022);
}
}
return (absx >> 52) - 1023;
}
int ilogbf (float x)
{
union{ float f; cl_uint u;} u;
u.f = x;
cl_uint absx = u.u & 0x7fffffff;
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
{
switch( absx )
{
case 0:
return FP_ILOGB0;
case 0x7f800000U:
return INT_MAX;
default:
if( absx > 0x7f800000 )
return FP_ILOGBNAN;
// subnormal
u.u = absx | 0x3f800000U;
u.f -= 1.0f;
return (u.u >> 23) - (127 + 126);
}
}
return (absx >> 23) - 127;
}
int ilogbl (long double x)
{
union
{
long double f;
struct{ cl_ulong m; cl_ushort sexp; }u;
} u;
u.f = x;
int exp = u.u.sexp & 0x7fff;
if( 0 == exp )
{
if( 0 == u.u.m )
return FP_ILOGB0;
//subnormal
u.u.sexp = 0x3fff;
u.f -= 1.0f;
exp = u.u.sexp & 0x7fff;
return exp - (0x3fff + 0x3ffe);
}
else if( 0x7fff == exp )
{
if( u.u.m & CL_LONG_MAX )
return FP_ILOGBNAN;
return INT_MAX;
}
return exp - 0x3fff;
}
///////////////////////////////////////////////////////////////////
//
// fmax, fmin, fmaxf, fminf
//
///////////////////////////////////////////////////////////////////
static void GET_BITS_SP32(float fx, unsigned int* ux)
{
volatile union {float f; unsigned int u;} _bitsy;
_bitsy.f = (fx);
*ux = _bitsy.u;
}
/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
/* { */
/* volatile union {float f; unsigned int i;} _bitsy; */
/* _bitsy.f = (fx); */
/* *ux = _bitsy.i; */
/* } */
static void PUT_BITS_SP32(unsigned int ux, float* fx)
{
volatile union {float f; unsigned int u;} _bitsy;
_bitsy.u = (ux);
*fx = _bitsy.f;
}
/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
/* { */
/* volatile union {float f; unsigned int i;} _bitsy; */
/* _bitsy.i = (ux); */
/* *fx = _bitsy.f; */
/* } */
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
{
volatile union {double d; unsigned __int64 l;} _bitsy;
_bitsy.d = (dx);
*lx = _bitsy.l;
}
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
{
volatile union {double d; unsigned __int64 l;} _bitsy;
_bitsy.l = (lx);
*dx = _bitsy.d;
}
#if 0
int SIGNBIT_DP64(double x )
{
int hx;
_GET_HIGH_WORD(hx,x);
return((hx>>31));
}
#endif
/* fmax(x, y) returns the larger (more positive) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
/* This works so long as the compiler knows that (x != x) means
that x is NaN; gcc does. */
double fmax(double x, double y)
{
if( isnan(y) )
return x;
return x >= y ? x : y;
}
/* fmin(x, y) returns the smaller (more negative) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
double fmin(double x, double y)
{
if( isnan(y) )
return x;
return x <= y ? x : y;
}
float fmaxf( float x, float y )
{
if( isnan(y) )
return x;
return x >= y ? x : y;
}
/* fminf(x, y) returns the smaller (more negative) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
float fminf(float x, float y)
{
if( isnan(y) )
return x;
return x <= y ? x : y;
}
long double scalblnl(long double x, long n)
{
union
{
long double d;
struct{ cl_ulong m; cl_ushort sexp;}u;
}u;
u.u.m = CL_LONG_MIN;
if( x == 0.0L || n < -2200)
return copysignl( 0.0L, x );
if( n > 2200 )
return INFINITY;
if( n < 0 )
{
u.u.sexp = 0x3fff - 1022;
while( n <= -1022 )
{
x *= u.d;
n += 1022;
}
u.u.sexp = 0x3fff + n;
x *= u.d;
return x;
}
if( n > 0 )
{
u.u.sexp = 0x3fff + 1023;
while( n >= 1023 )
{
x *= u.d;
n -= 1023;
}
u.u.sexp = 0x3fff + n;
x *= u.d;
return x;
}
return x;
}
///////////////////////////////////////////////////////////////////
//
// log2
//
///////////////////////////////////////////////////////////////////
const static cl_double log_e_base2 = 1.4426950408889634074;
const static cl_double log_10_base2 = 3.3219280948873623478;
//double log10(double x);
double log2(double x)
{
return 1.44269504088896340735992468100189214 * log(x);
}
long double log2l(long double x)
{
return 1.44269504088896340735992468100189214L * log(x);
}
///////////////////////////////////////////////////////////////////
//
// misc functions
//
///////////////////////////////////////////////////////////////////
/*
// This function is commented out because the Windows implementation should never call munmap.
// If it is calling it, we have a bug. Please file a bugzilla.
int munmap(void *addr, size_t len)
{
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
return (int)VirtualAlloc( (LPVOID)addr, len,
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
}
*/
uint64_t ReadTime( void )
{
LARGE_INTEGER current;
QueryPerformanceCounter(&current);
return (uint64_t)current.QuadPart;
}
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
static double PerformanceFrequency = 0.0;
if (PerformanceFrequency == 0.0) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
PerformanceFrequency = (double) frequency.QuadPart;
}
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
}
float make_nan()
{
/* This is the IEEE 754 single-precision format:
unsigned int mantissa: 22;
unsigned int quiet_nan: 1;
unsigned int exponent: 8;
unsigned int negative: 1;
*/
//const static unsigned
static const int32_t _nan = 0x7fc00000;
return *(const float*)(&_nan);
}
float nanf( const char* str)
{
cl_uint u = atoi( str );
u |= 0x7fc00000U;
return *( float*)(&u);
}
double nan( const char* str)
{
cl_ulong u = atoi( str );
u |= 0x7ff8000000000000ULL;
return *( double*)(&u);
}
// double check this implementatation
long double nanl( const char* str)
{
union
{
long double f;
struct { cl_ulong m; cl_ushort sexp; }u;
}u;
u.u.sexp = 0x7fff;
u.u.m = 0x8000000000000000ULL | atoi( str );
return u.f;
}
double trunc(double x)
{
double absx = fabs(x);
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
{
cl_long rounded = x;
x = copysign( (double) rounded, x );
}
return x;
}
float truncf(float x)
{
float absx = fabsf(x);
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
cl_int rounded = x;
x = copysignf( (float) rounded, x );
}
return x;
}
long lround(double x)
{
double absx = fabs(x);
if( absx < 0.5 )
return 0;
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
{
absx += 0.5;
cl_long rounded = absx;
absx = rounded;
x = copysign( absx, x );
}
if( x >= (double) LONG_MAX )
return LONG_MAX;
return (long) x;
}
long lroundf(float x)
{
float absx = fabsf(x);
if( absx < 0.5f )
return 0;
if( absx < 8388608.0f )
{
absx += 0.5f;
cl_int rounded = absx;
absx = rounded;
x = copysignf( absx, x );
}
if( x >= (float) LONG_MAX )
return LONG_MAX;
return (long) x;
}
double round(double x)
{
double absx = fabs(x);
if( absx < 0.5 )
return copysign( 0.0, x);
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
{
absx += 0.5;
cl_long rounded = absx;
absx = rounded;
x = copysign( absx, x );
}
return x;
}
float roundf(float x)
{
float absx = fabsf(x);
if( absx < 0.5f )
return copysignf( 0.0f, x);
if( absx < 8388608.0f )
{
absx += 0.5f;
cl_int rounded = absx;
absx = rounded;
x = copysignf( absx, x );
}
return x;
}
long double roundl(long double x)
{
long double absx = fabsl(x);
if( absx < 0.5L )
return copysignl( 0.0L, x);
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
{
absx += 0.5L;
cl_ulong rounded = absx;
absx = rounded;
x = copysignl( absx, x );
}
return x;
}
int signbit(double x)
{
union
{
double f;
cl_ulong u;
}u;
u.f = x;
return u.u >> 63;
}
int signbitf(float x)
{
union
{
float f;
cl_uint u;
}u;
u.f = x;
return u.u >> 31;
}
float cbrtf( float x )
{
float z = pow( fabs((double) x), 1.0 / 3.0 );
return copysignf( z, x );
}
double cbrt( double x )
{
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
}
float int2float (int32_t ix)
{
union {
float f;
int32_t i;
} u;
u.i = ix;
return u.f;
}
int32_t float2int (float fx)
{
union {
float f;
int32_t i;
} u;
u.f = fx;
return u.i;
}
#if defined(_MSC_VER) && !defined(_WIN64)
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
int __builtin_clz(unsigned int pattern)
{
#if 0
int res;
__asm {
mov eax, pattern
bsr eax, eax
mov res, eax
}
return 31 - res;
#endif
unsigned long index;
unsigned char res = _BitScanReverse( &index, pattern);
if (res) {
return 8*sizeof(int) - 1 - index;
} else {
return 8*sizeof(int);
}
}
#else
int __builtin_clz(unsigned int pattern)
{
int count;
if (pattern == 0u) {
return 32;
}
count = 31;
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
if (pattern >= 1u<<1) { count -= 1; }
return count;
}
#endif //defined(_MSC_VER) && !defined(_WIN64)
#include <intrin.h>
#include <emmintrin.h>
long int lrint (double x)
{
double absx = fabs(x);
if( x >= (double) LONG_MAX )
return LONG_MAX;
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
long int lrintf (float x)
{
float absx = fabsf(x);
if( x >= (float) LONG_MAX )
return LONG_MAX;
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
int usleep(int usec)
{
Sleep((usec + 999) / 1000);
return 0;
}
int fetestexcept(int excepts)
{
unsigned int status = _statusfp();
return excepts & (
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
((status & _SW_INVALID) ? FE_INVALID : 0)
);
}
int feclearexcept(int excepts)
{
_clearfp();
return 0;
}
#endif //defined(_WIN32)

View File

@@ -0,0 +1,274 @@
/*
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
*/
#include <stdio.h>
#include <stdlib.h>
#include "mt19937.h"
#include "mingw_compat.h"
#ifdef __SSE2__
#include <emmintrin.h>
#endif
static void * align_malloc(size_t size, size_t alignment)
{
#if defined(_WIN32) && defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
void * ptr = NULL;
if (0 == posix_memalign(&ptr, alignment, size))
return ptr;
return NULL;
#elif defined(__MINGW32__)
return __mingw_aligned_malloc(size, alignment);
#else
#error "Please add support OS for aligned malloc"
#endif
}
static void align_free(void * ptr)
{
#if defined(_WIN32) && defined(_MSC_VER)
_aligned_free(ptr);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
return free(ptr);
#elif defined(__MINGW32__)
return __mingw_aligned_free(ptr);
#else
#error "Please add support OS for aligned free"
#endif
}
/* Period parameters */
#define N 624 /* vector code requires multiple of 4 here */
#define M 397
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
typedef struct _MTdata
{
cl_uint mt[N];
#ifdef __SSE2__
cl_uint cache[N];
#endif
cl_int mti;
}_MTdata;
/* initializes mt[N] with a seed */
MTdata init_genrand(cl_uint s)
{
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
if( NULL != r )
{
cl_uint *mt = r->mt;
int mti = 0;
mt[0]= s; // & 0xffffffffUL;
for (mti=1; mti<N; mti++) {
mt[mti] = (cl_uint)
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
/* In the previous versions, MSBs of the seed affect */
/* only MSBs of the array mt[]. */
/* 2002/01/09 modified by Makoto Matsumoto */
// mt[mti] &= 0xffffffffUL;
/* for >32 bit machines */
}
r->mti = mti;
}
return r;
}
void free_mtdata( MTdata d )
{
if(d)
align_free(d);
}
/* generates a random number on [0,0xffffffff]-interval */
cl_uint genrand_int32( MTdata d)
{
/* mag01[x] = x * MATRIX_A for x=0,1 */
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
#ifdef __SSE2__
static volatile int init = 0;
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
#endif
cl_uint *mt = d->mt;
cl_uint y;
if (d->mti == N)
{ /* generate N words at one time */
int kk;
#ifdef __SSE2__
if( 0 == init )
{
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
init = 1;
}
#endif
kk = 0;
#ifdef __SSE2__
// vector loop
for( ; kk + 4 <= N-M; kk += 4 )
{
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
_mm_store_si128( (__m128i*) (mt + kk ), vr );
}
#endif
for ( ;kk<N-M;kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
#ifdef __SSE2__
// advance to next aligned location
for (;kk<N-1 && (kk & 3);kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
// vector loop
for( ; kk + 4 <= N-1; kk += 4 )
{
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
_mm_store_si128( (__m128i*) (mt + kk ), vr );
}
#endif
for (;kk<N-1;kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
#ifdef __SSE2__
// Do the tempering ahead of time in vector code
for( kk = 0; kk + 4 <= N; kk += 4 )
{
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
}
#endif
d->mti = 0;
}
#ifdef __SSE2__
y = d->cache[d->mti++];
#else
y = mt[d->mti++];
/* Tempering */
y ^= (y >> 11);
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
y ^= (y >> 18);
#endif
return y;
}
cl_ulong genrand_int64( MTdata d)
{
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
}
/* generates a random number on [0,1]-real-interval */
double genrand_real1(MTdata d)
{
return genrand_int32(d)*(1.0/4294967295.0);
/* divided by 2^32-1 */
}
/* generates a random number on [0,1)-real-interval */
double genrand_real2(MTdata d)
{
return genrand_int32(d)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on (0,1)-real-interval */
double genrand_real3(MTdata d)
{
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on [0,1) with 53-bit resolution*/
double genrand_res53(MTdata d)
{
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
return(a*67108864.0+b)*(1.0/9007199254740992.0);
}

View File

@@ -0,0 +1,99 @@
/*
* mt19937.h
*
* Mersenne Twister.
*
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
*/
#ifndef MT19937_H
#define MT19937_H 1
#if defined( __APPLE__ )
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* Interfaces here have been modified from original sources so that they
* are safe to call reentrantly, so long as a different MTdata is used
* on each thread.
*/
typedef struct _MTdata *MTdata;
/* Create the random number generator with seed */
MTdata init_genrand( cl_uint /*seed*/ );
/* release memory used by a MTdata private data */
void free_mtdata( MTdata /*data*/ );
/* generates a random number on [0,0xffffffff]-interval */
cl_uint genrand_int32( MTdata /*data*/);
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
cl_ulong genrand_int64( MTdata /*data*/);
/* generates a random number on [0,1]-real-interval */
double genrand_real1( MTdata /*data*/);
/* generates a random number on [0,1)-real-interval */
double genrand_real2( MTdata /*data*/);
/* generates a random number on (0,1)-real-interval */
double genrand_real3( MTdata /*data*/);
/* generates a random number on [0,1) with 53-bit resolution*/
double genrand_res53( MTdata /*data*/ );
#ifdef __cplusplus
}
#endif
#endif /* MT19937_H */

View File

@@ -0,0 +1,49 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _ref_counting_h
#define _ref_counting_h
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
cl_uint c##_refCount; \
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
test_error( error, "Unable to check reference count for " #type );
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
cl_uint c##_refCount_new; \
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
test_error( error, "Unable to check reference count for " #type ); \
if( c##_refCount != c##_refCount_new ) \
{ \
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
return -1; \
}
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
#endif // _ref_counting_h

View File

@@ -0,0 +1,175 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "rounding_mode.h"
#if !(defined(_WIN32) && defined(_MSC_VER))
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
const int *p = int_rounds;
if( outType == kfloat || outType == kdouble )
p = flt_rounds;
int oldRound = fegetround();
fesetround( p[r] );
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
default:
abort(); // ??!
}
return kDefaultRoundingMode; //never happens
}
RoundingMode get_round( void )
{
int oldRound = fegetround();
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
}
return kDefaultRoundingMode;
}
#else
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
if (err) {
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
return kDefaultRoundingMode; //what else never happens
}
oldRound &= _MCW_RC;
RoundingMode old =
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
return old; //returning old rounding mode
}
RoundingMode get_round( void )
{
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
oldRound &= _MCW_RC;
return
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
}
#endif
//
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
// software by testing against FLT_MIN or DBL_MIN in that file.
//
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
// operators do (e.g. add, subtract, multiply, divide, etc.)
//
// Configuring hardware to FTZ mode varies by platform.
// CAUTION: Some C implementations may also fail to behave properly in this mode.
//
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
// in which case, these function are at liberty to do nothing.
//
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
#endif
void *FlushToZero( void )
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ int i; void *p; }u = { _mm_getcsr() };
_mm_setcsr( u.i | 0x8040 );
return u.p;
#elif defined( __arm__ )
// processor is already in FTZ mode -- do nothing
return NULL;
#elif defined( __PPC__ )
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags |= _FPU_MASK_NI;
_FPU_SETCW(flags);
return NULL;
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
void UnFlushToZero( void *p)
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ void *p; int i; }u = { p };
_mm_setcsr( u.i );
#elif defined( __arm__ )
// processor is already in FTZ mode -- do nothing
#elif defined( __PPC__)
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags &= ~_FPU_MASK_NI;
_FPU_SETCW(flags);
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}

View File

@@ -0,0 +1,73 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __ROUNDING_MODE_H__
#define __ROUNDING_MODE_H__
#include <stdlib.h>
#if (defined(_WIN32) && defined (_MSC_VER))
// need for _controlfp_s and rouinding modes in RoundingMode
#include <float.h>
#include "errorHelpers.h"
#include "testHarness.h"
#else
#include <fenv.h>
#endif
typedef enum
{
kDefaultRoundingMode = 0,
kRoundToNearestEven,
kRoundUp,
kRoundDown,
kRoundTowardZero,
kRoundingModeCount
}RoundingMode;
typedef enum
{
kuchar = 0,
kchar = 1,
kushort = 2,
kshort = 3,
kuint = 4,
kint = 5,
kfloat = 6,
kdouble = 7,
kulong = 8,
klong = 9,
//This goes last
kTypeCount
}Type;
#ifdef __cplusplus
extern "C" {
#endif
extern RoundingMode set_round( RoundingMode r, Type outType );
extern RoundingMode get_round( void );
extern void *FlushToZero( void );
extern void UnFlushToZero( void *p);
#ifdef __cplusplus
}
#endif
#endif /* __ROUNDING_MODE_H__ */

View File

@@ -0,0 +1,812 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <math.h>
#include <string.h>
#include "threadTesting.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
#include "fpcontrol.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <time.h>
#if !defined (__APPLE__)
#include <CL/cl.h>
#endif
#include "compat.h"
int gTestsPassed = 0;
int gTestsFailed = 0;
cl_uint gRandomSeed = 0;
cl_uint gReSeed = 0;
int gFlushDenormsToZero = 0;
int gInfNanSupport = 1;
int gIsEmbedded = 0;
int gIsOpenCL_C_1_0_Device = 0;
int gIsOpenCL_1_0_Device = 0;
int gHasLong = 1;
#define DEFAULT_NUM_ELEMENTS 0x4000
int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
{
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
( imageSupportRequired ) ? verifyImageSupport : NULL );
}
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
DeviceCheckFn deviceCheckFn )
{
test_start();
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
cl_uint num_platforms = 0;
cl_platform_id *platforms;
cl_device_id device;
int num_elements = DEFAULT_NUM_ELEMENTS;
cl_uint num_devices = 0;
cl_device_id *devices = NULL;
cl_uint choosen_device_index = 0;
cl_uint choosen_platform_index = 0;
int err, ret;
char *endPtr;
unsigned int i;
int based_on_env_var = 0;
/* Check for environment variable to set device type */
char *env_mode = getenv( "CL_DEVICE_TYPE" );
if( env_mode != NULL )
{
based_on_env_var = 1;
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
device_type = CL_DEVICE_TYPE_GPU;
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
device_type = CL_DEVICE_TYPE_CPU;
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
device_type = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
device_type = CL_DEVICE_TYPE_DEFAULT;
else
{
log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
abort();
}
}
#if defined( __APPLE__ )
{
// report on any unusual library search path indirection
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
if( libSearchPath )
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
// report on any unusual framework search path indirection
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
if( libSearchPath )
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
}
#endif
env_mode = getenv( "CL_DEVICE_INDEX" );
if( env_mode != NULL )
{
choosen_device_index = atoi(env_mode);
}
env_mode = getenv( "CL_PLATFORM_INDEX" );
if( env_mode != NULL )
{
choosen_platform_index = atoi(env_mode);
}
/* Process the command line arguments */
/* Special case: just list the tests */
if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
{
log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
for( i = 0; i < num_fns - 1; i++ )
{
log_info( "\t\t%s\n", fnNames[ i ] );
}
test_finish();
return 0;
}
/* How are we supposed to seed the random # generators? */
if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
{
log_info(" Initializing random seed based on the clock.\n");
gRandomSeed = (unsigned)clock();
gReSeed = 1;
argc--;
}
else
{
log_info(" Initializing random seed to 0.\n");
}
/* Do we have an integer to specify the number of elements to pass to tests? */
if( argc > 1 )
{
ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
{
/* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
/* (hence why we stored the result in ret first) */
num_elements = ret;
log_info( "Testing with num_elements of %d\n", num_elements );
argc--;
}
}
/* Do we have a CPU/GPU specification? */
if( argc > 1 )
{
if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
{
device_type = CL_DEVICE_TYPE_GPU;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
{
device_type = CL_DEVICE_TYPE_CPU;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
{
device_type = CL_DEVICE_TYPE_ACCELERATOR;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
{
device_type = CL_DEVICE_TYPE_DEFAULT;
argc--;
}
}
/* Did we choose a specific device index? */
if( argc > 1 )
{
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
{
choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
argc--;
}
}
/* Did we choose a specific platform index? */
if( argc > 1 )
{
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
{
choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
argc--;
}
}
switch( device_type )
{
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
default: log_error( "Requesting unknown device "); return -1;
}
log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
#if defined( __APPLE__ )
#if defined( __i386__ ) || defined( __x86_64__ )
#define kHasSSE3 0x00000008
#define kHasSupplementalSSE3 0x00000100
#define kHasSSE4_1 0x00000400
#define kHasSSE4_2 0x00000800
/* check our environment for a hint to disable SSE variants */
{
const char *env = getenv( "CL_MAX_SSE" );
if( env )
{
extern int _cpu_capabilities;
int mask = 0;
if( 0 == strcasecmp( env, "SSE4.1" ) )
mask = kHasSSE4_2;
else if( 0 == strcasecmp( env, "SSSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1;
else if( 0 == strcasecmp( env, "SSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
else if( 0 == strcasecmp( env, "SSE2" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
else
{
log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
return -2;
}
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
_cpu_capabilities &= ~mask;
}
}
#endif
#endif
/* Get the platform */
err = clGetPlatformIDs(0, NULL, &num_platforms);
if (err) {
print_error(err, "clGetPlatformIDs failed");
test_finish();
return -1;
}
platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
if (!platforms || choosen_platform_index >= num_platforms) {
log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
test_finish();
return -1;
}
err = clGetPlatformIDs(num_platforms, platforms, NULL);
if (err) {
print_error(err, "clGetPlatformIDs failed");
test_finish();
return -1;
}
/* Get the number of requested devices */
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0, NULL, &num_devices );
if (err) {
print_error(err, "clGetDeviceIDs failed");
test_finish();
return -1;
}
devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
if (!devices || choosen_device_index >= num_devices) {
log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
test_finish();
return -1;
}
/* Get the requested device */
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, num_devices, devices, NULL );
if (err) {
print_error(err, "clGetDeviceIDs failed");
test_finish();
return -1;
}
device = devices[choosen_device_index];
free(devices);
devices = NULL;
free(platforms);
platforms = NULL;
if( printDeviceHeader( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
cl_device_fp_config fpconfig = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
if (err) {
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
test_finish();
return -1;
}
gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
log_info( "sizeof( void*) = %d (host)\n", (int) sizeof( void* ) );
//detect whether profile of the device is embedded
char profile[1024] = "";
err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if (err)
{
print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
test_finish();
return -1;
}
gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
//detect the floating point capabilities
cl_device_fp_config floatCapabilities = 0;
err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
if (err)
{
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
test_finish();
return -1;
}
// Check for problems that only embedded will have
if( gIsEmbedded )
{
//If the device is embedded, we need to detect if the device supports Infinity and NaN
if ((floatCapabilities & CL_FP_INF_NAN) == 0)
gInfNanSupport = 0;
// check the extensions list to see if ulong and long are supported
size_t extensionsStringSize = 0;
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
{
print_error( err, "Unable to get extensions string size for embedded device" );
test_finish();
return -1;
}
char *extensions_string = (char*) malloc(extensionsStringSize);
if( NULL == extensions_string )
{
print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
test_finish();
return -1;
}
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
{
print_error( err, "Unable to get extensions string for embedded device" );
test_finish();
return -1;
}
if( extensions_string[extensionsStringSize-1] != '\0' )
{
log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
test_finish();
return -1;
}
if( NULL == strstr( extensions_string, "cles_khr_int64" ))
gHasLong = 0;
free(extensions_string);
}
if( getenv( "OPENCL_1_0_DEVICE" ) )
{
char c_version[1024];
gIsOpenCL_1_0_Device = 1;
memset( c_version, 0, sizeof( c_version ) );
if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
{
log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
test_finish();
return -1;
}
if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
{
gIsOpenCL_C_1_0_Device = 1;
log_info( "Device is a OpenCL C 1.0 device\n" );
}
else
log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
}
cl_uint device_address_bits = 0;
if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
{
print_error( err, "Unable to obtain device address bits" );
test_finish();
return -1;
}
if( device_address_bits )
log_info( "sizeof( void*) = %d (device)\n", device_address_bits/8 );
else
{
log_error("Invalid device address bit size returned by device.\n");
test_finish();
return -1;
}
/* If we have a device checking function, run it */
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
if (num_elements <= 0)
num_elements = DEFAULT_NUM_ELEMENTS;
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
#if defined(__APPLE__) && defined(__arm__)
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
#if defined(__APPLE__) && defined(__arm__)
// Restore the old FP mode before leaving.
RestoreFPState( &oldMode );
#endif
return error;
}
int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
basefn *fnList, const char *fnNames[],
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements )
{
int ret, argIndex;
unsigned int i;
int fn_to_test = -1; // initialized to test all.
// unsigned int threadSize;
char partial[512] = { 0 };
/* Now that we have an environment, go through our arguments and run tests that match each argument */
if( argc == 1 )
{
/* No actual arguments, so just run all tests */
ret = callTestFunctions( fnList, num_fns - 1, fnNames,
device, forceNoContextCreation, num_elements, -1, NULL, queueProps );
}
else
{
/* Go through each argument and use it to process a list of functions to run */
ret = 0;
for( argIndex = 1; argIndex < argc; argIndex++ )
{
/* Are we a partial test? */
fn_to_test = -1;
if( strchr( argv[argIndex], '*' ) != NULL )
{
/* Yes, store the partial test for later */
strcpy( partial, argv[argIndex] );
strchr( partial, '*' )[0] = 0;
}
else
{
/* Nope, loop through looking for an exact name match */
for (i=0; i<num_fns; i++)
{
if (strcmp(argv[argIndex], fnNames[i]) == 0)
{
fn_to_test = i;
break;
}
}
if (i == num_fns)
{
log_error("invalid test name: %s \n", argv[argIndex]);
ret = 1;
continue; /* Keep processing other arguments */
}
else if( ( fn_to_test == (int)num_fns - 1 ) && ( strcmp( fnNames[i], "all" ) == 0 ) )
{
fn_to_test = -1;
}
}
/* Execute this particular test loop (remember to remove 1 from the function count for the lack of "all" at the end!) */
ret += callTestFunctions( fnList, num_fns - 1, fnNames,
device, forceNoContextCreation, num_elements,
fn_to_test, partial, queueProps );
}
}
if (gTestsFailed == 0) {
if (gTestsPassed > 1)
log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
else if (gTestsPassed > 0)
log_info("PASSED test.\n");
} else if (gTestsFailed > 0) {
if (gTestsFailed+gTestsPassed > 1)
log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
else
log_error("FAILED test.\n");
}
test_finish();
return ret;
}
// The actual function that loops through tests and executes them
int callTestFunctions( basefn functionList[], int numFunctions,
const char *functionNames[],
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse,
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps )
{
int numErrors = 0, found = 0, i;
if( functionIndexToCall >= numFunctions )
{
log_error( "ERROR: Invalid function index to test!\n" );
return 1;
}
if (functionIndexToCall == -1)
{
for (i=0; i<numFunctions; i++)
{
/* If we're matching partial names, skip any that don't match */
if( partialName != NULL && strncmp( functionNames[i], partialName, strlen( partialName ) ) != 0 )
continue;
/* Skip any unimplemented tests */
if (functionList[i] == 0)
{
log_info("%s test currently not implemented\n", functionNames[i]);
continue;
}
found = 1;
numErrors += callSingleTestFunction( functionList[i], functionNames[i], deviceToUse, forceNoContextCreation, numElementsToUse, queueProps );
}
if( found == 0 && partialName != NULL )
{
log_error( "ERROR: Wildcard test name does not match any tests: %s\n", partialName );
return numErrors + 1;
}
}
else
{
/* Run a single test */
if (functionList[functionIndexToCall])
{
numErrors += callSingleTestFunction( functionList[functionIndexToCall], functionNames[functionIndexToCall],
deviceToUse, forceNoContextCreation, numElementsToUse, queueProps );
}
else
log_info("%s test currently not implemented\n", functionNames[functionIndexToCall]);
}
return numErrors;
}
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
log_info( "%s\n", errinfo );
}
// Actual function execution
int callSingleTestFunction( basefn functionToCall, const char *functionName,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps )
{
int numErrors = 0, ret;
cl_int error;
cl_context context = NULL;
cl_command_queue queue = NULL;
/* Create a context to work with, unless we're told not to */
if( !forceNoContextCreation )
{
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
if (!context)
{
print_error( error, "Unable to create testing context" );
return 1;
}
queue = clCreateCommandQueue( context, deviceToUse, queueProps, &error );
if( queue == NULL )
{
print_error( error, "Unable to create testing command queue" );
return 1;
}
}
/* Run the test and print the result */
log_info( "%s...\n", functionName );
fflush( stdout );
ret = functionToCall( deviceToUse, context, queue, numElementsToUse); //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
if( ret == TEST_NOT_IMPLEMENTED )
{
/* Tests can also let us know they're not implemented yet */
log_info("%s test currently not implemented\n\n", functionName);
}
else
{
/* Print result */
if( ret == 0 ) {
log_info( "%s passed\n", functionName );
gTestsPassed++;
}
else
{
numErrors++;
log_error( "%s FAILED\n", functionName );
gTestsFailed++;
}
}
/* Release the context */
if( !forceNoContextCreation )
{
int error = clFinish(queue);
if (error) {
log_error("clFinish failed: %d", error);
numErrors++;
}
clReleaseCommandQueue( queue );
clReleaseContext( context );
}
return numErrors;
}
void checkDeviceTypeOverride( cl_device_type *inOutType )
{
/* Check if we are forced to CPU mode */
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
if( force_cpu != NULL )
{
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
*inOutType = CL_DEVICE_TYPE_GPU;
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
*inOutType = CL_DEVICE_TYPE_CPU;
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
*inOutType = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
*inOutType = CL_DEVICE_TYPE_DEFAULT;
}
switch( *inOutType )
{
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
default: break;
}
log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
#if defined( __APPLE__ )
{
// report on any unusual library search path indirection
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
if( libSearchPath )
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
// report on any unusual framework search path indirection
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
if( libSearchPath )
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
}
#endif
}
#if ! defined( __APPLE__ )
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
{
uint32_t pat = ((uint32_t*) src_pattern)[0];
size_t count = bytes / 4;
size_t i;
uint32_t *d = (uint32_t*)dest;
for( i = 0; i < count; i++ )
d[i] = pat;
d += i;
bytes &= 3;
if( bytes )
memcpy( d, src_pattern, bytes );
}
#endif
extern cl_device_type GetDeviceType( cl_device_id d )
{
cl_device_type result = -1;
cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
if( CL_SUCCESS != err )
log_error( "ERROR: Unable to get device type for device %p\n", d );
return result;
}
cl_device_id GetOpposingDevice( cl_device_id device )
{
cl_int error;
cl_device_id *otherDevices;
cl_uint actualCount;
cl_platform_id plat;
// Get the platform of the device to use for getting a list of devices
error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get device's platform" );
return NULL;
}
// Get a list of all devices
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get list of devices size" );
return NULL;
}
otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get list of devices" );
free(otherDevices);
return NULL;
}
if( actualCount == 1 )
{
free(otherDevices);
return device; // NULL means error, returning self means we couldn't find another one
}
// Loop and just find one that isn't the one we were given
cl_uint i;
for( i = 0; i < actualCount; i++ )
{
if( otherDevices[ i ] != device )
{
cl_device_type newType;
error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get device type for other device" );
free(otherDevices);
return NULL;
}
cl_device_id result = otherDevices[ i ];
free(otherDevices);
return result;
}
}
// Should never get here
free(otherDevices);
return NULL;
}

View File

@@ -0,0 +1,104 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testHarness_h
#define _testHarness_h
#include "threadTesting.h"
#include "clImageHelper.h"
#ifdef __cplusplus
extern "C" {
#endif
extern cl_uint gReSeed;
extern cl_uint gRandomSeed;
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
// setup work, and then call each function in turn as dictatated by the passed arguments.
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
typedef int (*DeviceCheckFn)( cl_device_id device );
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
basefn *fnList, const char *fnNames[],
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
// Call this function if you need to do all the setup work yourself, and just need the function list called/
// managed.
// functionIndexToCall can be a valid index into the function list, or -1 to run all of them.
// partialName can be a string to partially match function names against and only execute functions who
// match, or NULL to not restrict execution (ignored if functionIndexToCall is not -1)
// functionList is the actual array of functions
// numFunctions is the number of functions in the list (which should NOT have NULL at the end for "all")
// functionNames is an array of strings representing the name of each function, to be used in partial matching
// contextProps are used to create a testing context for each test
// deviceToUse, deviceGroupToUse and numElementsToUse are all just passed to each test function
extern int callTestFunctions( basefn functionList[], int numFunctions,
const char *functionNames[],
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse,
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps );
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps );
///// Miscellaneous steps
// Given a pre-existing device type choice, check the environment for an override, then print what
// choice was made and how (and return the overridden choice, if there is one)
extern void checkDeviceTypeOverride( cl_device_type *inOutType );
// standard callback function for context pfn_notify
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
extern cl_device_type GetDeviceType( cl_device_id );
// Given a device (most likely passed in by the harness, but not required), will attempt to find
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
// is the only device available, the SAME device is returned, so check!
extern cl_device_id GetOpposingDevice( cl_device_id device );
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
#if ! defined( __APPLE__ )
void memset_pattern4(void *, const void *, size_t);
#endif
#ifdef __cplusplus
}
#endif
#endif // _testHarness_h

View File

@@ -0,0 +1,51 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "mt19937.h"
#include <stdio.h>
int main( void )
{
MTdata d = init_genrand(42);
int i;
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
int errcount = 0;
for( i = 0; i < 65536; i++ )
{
cl_uint u = genrand_int32( d );
if( 0 == (i & 4095) )
{
if( u != reference[i>>12] )
{
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
errcount++;
}
}
}
free_mtdata(d);
if( errcount )
printf("mt19937 test failed.\n");
else
printf("mt19937 test passed.\n");
return 0;
}

View File

@@ -0,0 +1,106 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "threadTesting.h"
#include "errorHelpers.h"
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <math.h>
#include <string.h>
#if !defined(_WIN32)
#include <pthread.h>
#endif
#if 0 // Disabed for now
typedef struct
{
basefn mFunction;
cl_device_id mDevice;
cl_context mContext;
int mNumElements;
} TestFnArgs;
////////////////////////////////////////////////////////////////////////////////
// Thread-based testing. Spawns a new thread to run the given test function,
// then waits for it to complete. The entire idea is that, if the thread crashes,
// we can catch it and report it as a failure instead of crashing the entire suite
////////////////////////////////////////////////////////////////////////////////
void *test_thread_wrapper( void *data )
{
TestFnArgs *args;
int retVal;
cl_context context;
args = (TestFnArgs *)data;
/* Create a new context to use (contexts can't cross threads) */
context = clCreateContext(NULL, args->mDeviceGroup);
if( context == NULL )
{
log_error("clCreateContext failed for new thread\n");
return (void *)(-1);
}
/* Call function */
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
clReleaseContext( context );
return (void *)retVal;
}
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
int error;
pthread_t threadHdl;
void *retVal;
TestFnArgs args;
args.mFunction = fnToTest;
args.mDeviceGroup = deviceGroup;
args.mDevice = device;
args.mContext = context;
args.mNumElements = numElements;
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
if( error != 0 )
{
log_error( "ERROR: Unable to create thread for testing!\n" );
return -1;
}
/* Thread has been started, now just wait for it to complete (or crash) */
error = pthread_join( threadHdl, &retVal );
if( error != 0 )
{
log_error( "ERROR: Unable to join testing thread!\n" );
return -1;
}
return (int)((intptr_t)retVal);
}
#endif

View File

@@ -0,0 +1,32 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _threadTesting_h
#define _threadTesting_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#define TEST_NOT_IMPLEMENTED -99
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
#endif // _threadTesting_h

View File

@@ -0,0 +1,481 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "typeWrappers.h"
#include "kernelHelpers.h"
#include "errorHelpers.h"
#include <stdlib.h>
#include "clImageHelper.h"
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
#if defined( __APPLE__ )
#define kPageSize 4096
#include <sys/mman.h>
#include <stdlib.h>
#elif defined(__linux__)
#include <unistd.h>
#define kPageSize (getpagesize())
#endif
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
} else {
backingStore = NULL;
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width, height );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = height * rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
for( row = 0; row < height; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
} else {
backingStore = NULL;
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = height * depth * rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
for( row = 0; row < height*depth; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
} else {
backingStore = NULL;
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
{
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
backingStoreSize = rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE2D:
backingStoreSize = height * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE3D:
backingStoreSize = height * depth * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
backingStoreSize = arraySize * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
break;
}
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
for( row = 0; row < sz; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
break;
}
} else {
backingStore = NULL;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
break;
}
}
#else
backingStore = NULL;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
break;
}
#endif
return error;
}
/*******
* clProtectedArray implementation
*******/
clProtectedArray::clProtectedArray()
{
mBuffer = mValidBuffer = NULL;
}
clProtectedArray::clProtectedArray( size_t sizeInBytes )
{
mBuffer = mValidBuffer = NULL;
Allocate( sizeInBytes );
}
clProtectedArray::~clProtectedArray()
{
if( mBuffer != NULL ) {
#if defined( __APPLE__ )
int error = munmap( mBuffer, mRealSize );
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
#else
free( mBuffer );
#endif
}
}
void clProtectedArray::Allocate( size_t sizeInBytes )
{
#if defined( __APPLE__ )
// Allocate enough space to: round up our actual allocation to an even number of pages
// and allocate two pages on either side
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
mRealSize = mRoundedSize + kPageSize * 2;
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
mValidBuffer = mBuffer + kPageSize;
// Protect guard area from access
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
#else
mRoundedSize = mRealSize = sizeInBytes;
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
#endif
}

View File

@@ -0,0 +1,333 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _typeWrappers_h
#define _typeWrappers_h
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#include "compat.h"
#include <stdio.h>
#include "mt19937.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
extern "C" cl_uint gReSeed;
extern "C" cl_uint gRandomSeed;
/* cl_context wrapper */
class clContextWrapper
{
public:
clContextWrapper() { mContext = NULL; }
clContextWrapper( cl_context program ) { mContext = program; }
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
operator cl_context() { return mContext; }
cl_context * operator&() { return &mContext; }
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
protected:
cl_context mContext;
};
/* cl_program wrapper */
class clProgramWrapper
{
public:
clProgramWrapper() { mProgram = NULL; }
clProgramWrapper( cl_program program ) { mProgram = program; }
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
operator cl_program() { return mProgram; }
cl_program * operator&() { return &mProgram; }
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
protected:
cl_program mProgram;
};
/* cl_kernel wrapper */
class clKernelWrapper
{
public:
clKernelWrapper() { mKernel = NULL; }
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
operator cl_kernel() { return mKernel; }
cl_kernel * operator&() { return &mKernel; }
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
protected:
cl_kernel mKernel;
};
/* cl_mem (stream) wrapper */
class clMemWrapper
{
public:
clMemWrapper() { mMem = NULL; }
clMemWrapper( cl_mem mem ) { mMem = mem; }
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
operator cl_mem() { return mMem; }
cl_mem * operator&() { return &mMem; }
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
protected:
cl_mem mMem;
};
class clProtectedImage
{
public:
clProtectedImage() { image = NULL; backingStore = NULL; }
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
~clProtectedImage()
{
if( image != NULL )
clReleaseMemObject( image );
#if defined( __APPLE__ )
if(backingStore)
munmap(backingStore, backingStoreSize);
#endif
}
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
operator cl_mem() { return image; }
cl_mem * operator&() { return &image; }
bool operator==( const cl_mem &rhs ) { return image == rhs; }
protected:
void *backingStore;
size_t backingStoreSize;
cl_mem image;
};
/* cl_command_queue wrapper */
class clCommandQueueWrapper
{
public:
clCommandQueueWrapper() { mMem = NULL; }
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
operator cl_command_queue() { return mMem; }
cl_command_queue * operator&() { return &mMem; }
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
protected:
cl_command_queue mMem;
};
/* cl_sampler wrapper */
class clSamplerWrapper
{
public:
clSamplerWrapper() { mMem = NULL; }
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
operator cl_sampler() { return mMem; }
cl_sampler * operator&() { return &mMem; }
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
protected:
cl_sampler mMem;
};
/* cl_event wrapper */
class clEventWrapper
{
public:
clEventWrapper() { mMem = NULL; }
clEventWrapper( cl_event mem ) { mMem = mem; }
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
operator cl_event() { return mMem; }
cl_event * operator&() { return &mMem; }
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
protected:
cl_event mMem;
};
/* Generic protected memory buffer, for verifying access within bounds */
class clProtectedArray
{
public:
clProtectedArray();
clProtectedArray( size_t sizeInBytes );
virtual ~clProtectedArray();
void Allocate( size_t sizeInBytes );
operator void *() { return (void *)mValidBuffer; }
operator const void *() const { return (const void *)mValidBuffer; }
protected:
char * mBuffer;
char * mValidBuffer;
size_t mRealSize, mRoundedSize;
};
class RandomSeed
{
public:
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
~RandomSeed()
{
if( gReSeed )
gRandomSeed = genrand_int32( mtData );
free_mtdata(mtData);
}
operator MTdata () {return mtData;}
protected:
MTdata mtData;
};
template <typename T> class BufferOwningPtr
{
BufferOwningPtr(BufferOwningPtr const &); // do not implement
void operator=(BufferOwningPtr const &); // do not implement
void *ptr;
void *map;
size_t mapsize; // Bytes allocated total, pointed to by map.
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
bool aligned;
public:
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
explicit BufferOwningPtr(void *p, void *m, size_t s)
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
{
#if ! defined( __APPLE__ )
if(m)
{
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
abort();
}
#endif
}
~BufferOwningPtr() {
if (map) {
#if defined( __APPLE__ )
int error = munmap(map, mapsize);
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
#endif
} else {
if ( aligned )
{
align_free(ptr);
}
else
{
free(ptr);
}
}
}
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
if (map){
#if defined( __APPLE__ )
int error = munmap(map, mapsize);
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
#else
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
abort();
#endif
} else {
if ( aligned )
{
align_free(ptr);
}
else
{
free(ptr);
}
}
ptr = p;
map = m;
mapsize = mapsize_;
allocsize = allocsize_;
aligned = aligned_;
#if ! defined( __APPLE__ )
if(m)
{
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
abort();
}
#endif
}
operator T*() { return (T*)ptr; }
size_t getSize() const { return allocsize; };
};
#endif // _typeWrappers_h