Initial open source release of OpenCL 2.0 CTS.

This commit is contained in:
Kedar Patil
2017-05-16 18:50:35 +05:30
parent 6911ba5116
commit 3a440d17c8
883 changed files with 318212 additions and 0 deletions

View File

@@ -0,0 +1 @@
add_subdirectory(test_conformance)

View File

@@ -0,0 +1,26 @@
PRODUCTS = harness/\
# utils/
TOP=$(shell pwd)
all: $(PRODUCTS)
clean:
@for testdir in $(dir $(PRODUCTS)) ; \
do ( \
echo "==================================================================================" ; \
echo "Cleaning $$testdir" ; \
echo "==================================================================================" ; \
cd $$testdir && make clean \
); \
done \
$(PRODUCTS):
@echo "==================================================================================" ;
@echo "(`date "+%H:%M:%S"`) Make $@" ;
@echo "==================================================================================" ;
cd $(dir $@) && make
.PHONY: clean $(PRODUCTS) all

View File

@@ -0,0 +1,52 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _gl_headers_h
#define _gl_headers_h
#if defined( __APPLE__ )
#include <OpenGL/OpenGL.h>
#if defined(CGL_VERSION_1_3)
#include <OpenGL/gl3.h>
#include <OpenGL/gl3ext.h>
#else
#include <OpenGL/gl.h>
#include <OpenGL/glext.h>
#endif
#include <GLUT/glut.h>
#else
#ifdef _WIN32
#include <windows.h>
#endif
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glext.h>
#ifdef _WIN32
#include <GL/glut.h>
#else
#include <GL/freeglut.h>
#endif
#endif
#ifdef _WIN32
GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
// No glutGetProcAddress in the standard glut v3.7.
#define glutGetProcAddress(procName) wglGetProcAddress(procName)
#endif
#endif // __gl_headers_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,283 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _helpers_h
#define _helpers_h
#include "../harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#if !defined (__APPLE__)
#include <CL/cl.h>
#include "gl_headers.h"
#include <CL/cl_gl.h>
#else
#include "gl_headers.h"
#endif
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/threadTesting.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/mt19937.h"
typedef cl_mem
(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context context,
cl_mem_flags flags,
GLuint bufobj,
int * errcode_ret);
typedef cl_mem
(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context context ,
cl_mem_flags flags ,
GLenum target ,
GLint miplevel ,
GLuint texture ,
cl_int * errcode_ret) ;
typedef cl_mem
(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context context ,
cl_mem_flags flags ,
GLenum target ,
GLint miplevel ,
GLuint texture ,
cl_int * errcode_ret) ;
typedef cl_mem
(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context context ,
cl_mem_flags flags ,
GLenum target ,
GLint miplevel ,
GLuint texture ,
cl_int * errcode_ret) ;
typedef cl_mem
(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context context ,
cl_mem_flags flags ,
GLuint renderbuffer ,
cl_int * errcode_ret) ;
typedef cl_int
(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem memobj ,
cl_gl_object_type * gl_object_type ,
GLuint * gl_object_name) ;
typedef cl_int
(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem memobj ,
cl_gl_texture_info param_name ,
size_t param_value_size ,
void * param_value ,
size_t * param_value_size_ret) ;
typedef cl_int
(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue command_queue ,
cl_uint num_objects ,
const cl_mem * mem_objects ,
cl_uint num_events_in_wait_list ,
const cl_event * event_wait_list ,
cl_event * event) ;
typedef cl_int
(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue command_queue ,
cl_uint num_objects ,
const cl_mem * mem_objects ,
cl_uint num_events_in_wait_list ,
const cl_event * event_wait_list ,
cl_event * event) ;
extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
class glBufferWrapper
{
public:
glBufferWrapper() { mBuffer = 0; }
glBufferWrapper( GLuint b ) { mBuffer = b; }
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
operator GLuint() { return mBuffer; }
operator GLuint *() { return &mBuffer; }
GLuint * operator&() { return &mBuffer; }
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
protected:
GLuint mBuffer;
};
class glTextureWrapper
{
public:
glTextureWrapper() { mHandle = 0; }
glTextureWrapper( GLuint b ) { mHandle = b; }
~glTextureWrapper() {
if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
}
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
operator GLuint() { return mHandle; }
operator GLuint *() { return &mHandle; }
GLuint * operator&() { return &mHandle; }
bool operator==( GLuint rhs ) { return mHandle == rhs; }
protected:
// The texture handle.
GLuint mHandle;
};
class glRenderbufferWrapper
{
public:
glRenderbufferWrapper() { mBuffer = 0; }
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
operator GLuint() { return mBuffer; }
operator GLuint *() { return &mBuffer; }
GLuint * operator&() { return &mBuffer; }
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
protected:
GLuint mBuffer;
};
class glFramebufferWrapper
{
public:
glFramebufferWrapper() { mBuffer = 0; }
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
operator GLuint() { return mBuffer; }
operator GLuint *() { return &mBuffer; }
GLuint * operator&() { return &mBuffer; }
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
protected:
GLuint mBuffer;
};
// Helper functions (defined in helpers.cpp)
extern void * CreateGLTexture1DArray( size_t width, size_t length,
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID, int *outError,
bool allocateMem, MTdata d);
extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID, int *outError,
bool allocateMem, MTdata d);
extern void * CreateGLTextureBuffer( size_t width,
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
bool allocateMem, MTdata d);
extern void * CreateGLTexture1D(size_t width,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID,
int *outError, bool allocateMem, MTdata d );
extern void * CreateGLTexture2D( size_t width, size_t height,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID,
int *outError, bool allocateMem, MTdata d );
extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type, GLuint *outTextureID,
int *outError, MTdata d, bool allocateMem = true );
extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
GLenum glFormat, GLenum glInternalFormat,
GLenum glType, ExplicitType typeToReadAs,
size_t outWidth, size_t outHeight );
extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
GLuint *outFramebuffer,
GLuint *outRenderbuffer );
extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
GLenum target, GLenum glFormat,
GLenum internalFormat, GLenum glType,
ExplicitType type,
GLuint *outFramebuffer,
GLuint *outRenderbuffer,
int *outError, MTdata d, bool allocateMem );
extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
GLenum attachment, GLenum glFormat,
GLenum glInternalFormat, GLenum glType,
ExplicitType typeToReadAs,
size_t outWidth, size_t outHeight );
extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
extern const char *GetGLTypeName( GLenum type );
extern const char *GetGLAttachmentName( GLenum att );
extern const char *GetGLTargetName( GLenum tgt );
extern const char *GetGLBaseFormatName( GLenum baseformat );
extern const char *GetGLFormatName( GLenum format );
extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
extern GLenum GetGLFormat(GLenum internalFormat);
extern GLenum GetGLTypeForExplicitType(ExplicitType type);
extern size_t GetGLTypeSize(GLenum type);
extern ExplicitType GetExplicitTypeForGLType(GLenum type);
extern GLenum get_base_gl_target( GLenum target );
extern int init_clgl_ext( void );
#endif // _helpers_h

View File

@@ -0,0 +1,48 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _setup_h
#define _setup_h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gl_headers.h"
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
// subclass internally, then return it as a definition for GLEnvironment::Create
class GLEnvironment
{
public:
GLEnvironment() {}
virtual ~GLEnvironment() {}
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
virtual cl_context CreateCLContext( void ) = 0;
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
static GLEnvironment * Instance( void );
};
#endif // _setup_h

View File

@@ -0,0 +1,156 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "setup.h"
#include "../../test_common/harness/errorHelpers.h"
#include <OpenGL/CGLDevice.h>
class OSXGLEnvironment : public GLEnvironment
{
public:
OSXGLEnvironment()
{
mCGLContext = NULL;
}
virtual int Init( int *argc, char **argv, int use_opengl_32 )
{
if (!use_opengl_32) {
// Create a GLUT window to render into
glutInit( argc, argv );
glutInitWindowSize( 512, 512 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow( "OpenCL <-> OpenGL Test" );
}
else {
CGLPixelFormatAttribute attribs[] = {
kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
kCGLPFAAllowOfflineRenderers,
kCGLPFANoRecovery,
kCGLPFAAccelerated,
kCGLPFADoubleBuffer,
(CGLPixelFormatAttribute)0
};
CGLError err;
CGLPixelFormatObj pix;
GLint npix;
err = CGLChoosePixelFormat (attribs, &pix, &npix);
if(err != kCGLNoError)
{
log_error("Failed to choose pixel format\n");
return -1;
}
err = CGLCreateContext(pix, NULL, &mCGLContext);
if(err != kCGLNoError)
{
log_error("Failed to create GL context\n");
return -1;
}
CGLSetCurrentContext(mCGLContext);
}
return 0;
}
virtual cl_context CreateCLContext( void )
{
int error;
if( mCGLContext == NULL )
mCGLContext = CGLGetCurrentContext();
CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
if (error) {
print_error(error, "clCreateContext failed");
return NULL;
}
// Verify that all devices in the context support the required extension
cl_device_id devices[64];
size_t size_out;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
if (error) {
print_error(error, "clGetContextInfo failed");
return NULL;
}
char extensions[8192];
for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return NULL;
}
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
return NULL;
}
}
return context;
}
virtual int SupportsCLGLInterop( cl_device_type device_type )
{
int found_valid_device = 0;
cl_device_id devices[64];
cl_uint num_of_devices;
int error;
error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
if (error) {
print_error(error, "clGetDeviceIDs failed");
return -1;
}
char extensions[8192];
for (int i=0; i<(int)num_of_devices; i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return -1;
}
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
} else {
log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
found_valid_device = 1;
}
}
return found_valid_device;
}
virtual ~OSXGLEnvironment()
{
CGLDestroyContext( mCGLContext );
}
CGLContextObj mCGLContext;
};
GLEnvironment * GLEnvironment::Instance( void )
{
static OSXGLEnvironment * env = NULL;
if( env == NULL )
env = new OSXGLEnvironment();
return env;
}

View File

@@ -0,0 +1,204 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#define GL_GLEXT_PROTOTYPES
#include "setup.h"
#include "testBase.h"
#include "../../test_common/harness/errorHelpers.h"
#include <GL/gl.h>
#include <GL/glut.h>
#include <GL/glext.h>
#include <GL/glut.h>
#include <CL/cl_ext.h>
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties *properties,
cl_gl_context_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
// Rename references to this dynamically linked function to avoid
// collision with static link version
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
#define MAX_DEVICES 32
class WGLEnvironment : public GLEnvironment
{
private:
cl_device_id m_devices[MAX_DEVICES];
int m_device_count;
cl_platform_id m_platform;
public:
WGLEnvironment()
{
m_device_count = 0;
m_platform = 0;
}
virtual int Init( int *argc, char **argv, int use_opengl_32 )
{
// Create a GLUT window to render into
glutInit( argc, argv );
glutInitWindowSize( 512, 512 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow( "OpenCL <-> OpenGL Test" );
glewInit();
return 0;
}
virtual cl_context CreateCLContext( void )
{
HGLRC hGLRC = wglGetCurrentContext();
HDC hDC = wglGetCurrentDC();
cl_context_properties properties[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
CL_GL_CONTEXT_KHR, (cl_context_properties) hGLRC,
CL_WGL_HDC_KHR, (cl_context_properties) hDC,
0
};
cl_device_id devices[MAX_DEVICES];
size_t dev_size;
cl_int status;
if (!hGLRC || !hDC) {
print_error(CL_INVALID_CONTEXT, "No GL context bound");
return 0;
}
if (!clGetGLContextInfoKHR) {
// As OpenCL for the platforms. Warn if more than one platform found,
// since this might not be the platform we want. By default, we simply
// use the first returned platform.
cl_uint nplatforms;
cl_platform_id platform;
clGetPlatformIDs(0, NULL, &nplatforms);
clGetPlatformIDs(1, &platform, NULL);
if (nplatforms > 1) {
log_info("clGetPlatformIDs returned multiple values. This is not "
"an error, but might result in obtaining incorrect function "
"pointers if you do not want the first returned platform.\n");
// Show them the platform name, in case it is a problem.
size_t size;
char *name;
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
name = (char*)malloc(size);
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
log_info("Using platform with name: %s \n", name);
free(name);
}
clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
if (!clGetGLContextInfoKHR) {
print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
}
}
status = clGetGLContextInfoKHR(properties,
CL_DEVICES_FOR_GL_CONTEXT_KHR,
sizeof(devices),
devices,
&dev_size);
if (status != CL_SUCCESS) {
print_error(status, "clGetGLContextInfoKHR failed");
return 0;
}
dev_size /= sizeof(cl_device_id);
log_info("GL context supports %d compute devices\n", dev_size);
status = clGetGLContextInfoKHR(properties,
CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
sizeof(devices),
devices,
&dev_size);
if (status != CL_SUCCESS) {
print_error(status, "clGetGLContextInfoKHR failed");
return 0;
}
cl_device_id ctxDevice = m_devices[0];
if (dev_size > 0) {
log_info("GL context current device: 0x%x\n", devices[0]);
for (int i = 0; i < m_device_count; i++) {
if (m_devices[i] == devices[0]) {
ctxDevice = devices[0];
break;
}
}
} else {
log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
}
return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
}
virtual int SupportsCLGLInterop( cl_device_type device_type )
{
cl_device_id devices[MAX_DEVICES];
cl_uint num_of_devices;
int error;
error = clGetPlatformIDs(1, &m_platform, NULL);
if (error) {
print_error(error, "clGetPlatformIDs failed");
return -1;
}
error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
if (error) {
print_error(error, "clGetDeviceIDs failed");
return -1;
}
// Check all devices, search for one that supports cl_khr_gl_sharing
char extensions[8192];
for (int i=0; i<(int)num_of_devices; i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return -1;
}
if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
} else {
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
m_devices[m_device_count++] = devices[i];
}
}
return m_device_count > 0;
}
virtual ~WGLEnvironment()
{
}
};
GLEnvironment * GLEnvironment::Instance( void )
{
static WGLEnvironment * env = NULL;
if( env == NULL )
env = new WGLEnvironment();
return env;
}

View File

@@ -0,0 +1,122 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#define GL_GLEXT_PROTOTYPES
#include "setup.h"
#include "testBase.h"
#include "../../test_common/harness/errorHelpers.h"
#include <GL/gl.h>
#include <GL/glut.h>
#include <GL/glext.h>
#include <GL/freeglut.h>
#include <GL/glx.h>
#include <CL/cl_ext.h>
class X11GLEnvironment : public GLEnvironment
{
private:
cl_device_id m_devices[64];
cl_uint m_device_count;
public:
X11GLEnvironment()
{
m_device_count = 0;
}
virtual int Init( int *argc, char **argv, int use_opencl_32 )
{
// Create a GLUT window to render into
glutInit( argc, argv );
glutInitWindowSize( 512, 512 );
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
glutCreateWindow( "OpenCL <-> OpenGL Test" );
glewInit();
return 0;
}
virtual cl_context CreateCLContext( void )
{
GLXContext context = glXGetCurrentContext();
Display *dpy = glXGetCurrentDisplay();
cl_context_properties properties[] = {
CL_GL_CONTEXT_KHR, (cl_context_properties) context,
CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
0
};
cl_int status;
if (!context || !dpy) {
print_error(CL_INVALID_CONTEXT, "No GL context bound");
return 0;
}
return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
}
virtual int SupportsCLGLInterop( cl_device_type device_type )
{
int found_valid_device = 0;
cl_platform_id platform;
cl_device_id devices[64];
cl_uint num_of_devices;
int error;
error = clGetPlatformIDs(1, &platform, NULL);
if (error) {
print_error(error, "clGetPlatformIDs failed");
return -1;
}
error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
// If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
if (error == CL_DEVICE_NOT_FOUND)
return 0;
if (error) {
print_error(error, "clGetDeviceIDs failed");
return -1;
}
char extensions[8192];
for (int i=0; i<(int)num_of_devices; i++) {
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
if (error) {
print_error(error, "clGetDeviceInfo failed");
return -1;
}
if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
} else {
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
found_valid_device = 1;
m_devices[m_device_count++] = devices[i];
}
}
return found_valid_device;
}
virtual ~X11GLEnvironment()
{
}
};
GLEnvironment * GLEnvironment::Instance( void )
{
static X11GLEnvironment * env = NULL;
if( env == NULL )
env = new X11GLEnvironment();
return env;
}

View File

@@ -0,0 +1,18 @@
project
: requirements <include>.
<toolset>gcc:<cflags>"-xc++"
<toolset>msvc:<cflags>"/TP"
<warnings-as-errors>off
: usage-requirements <include>.
;
local harness.objs ;
for source in [ glob *.c *.cpp ]
{
harness.objs += [ obj $(source:B).obj : $(source) ] ;
}
alias harness : $(harness.objs)
: <use>/Runtime//OpenCL.lib :
: <library>/Runtime//OpenCL.lib
;

View File

@@ -0,0 +1,41 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = conversions.c \
errorHelpers.c \
genericThread.cpp \
imageHelpers.cpp \
kernelHelpers.c \
mt19937.c \
rounding_mode.c \
testHarness.c \
testHarness.cpp \
ThreadPool.c \
threadTesting.c \
typeWrappers.cpp
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
all: $(OBJECTS)
clean:
rm -f $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,899 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "ThreadPool.h"
#include "errorHelpers.h"
#include "fpcontrol.h"
#include <stdio.h>
#include <stdlib.h>
#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system
#if defined( _WIN32 )
#include <windows.h>
#if defined(_MSC_VER)
#include <intrin.h>
#endif
#include "mingw_compat.h"
#include <process.h>
#else // !_WIN32
#include <pthread.h>
#include <unistd.h>
#include <sys/errno.h>
#endif // !_WIN32
// declarations
#ifdef _WIN32
void ThreadPool_WorkerFunc( void *p );
#else
void *ThreadPool_WorkerFunc( void *p );
#endif
void ThreadPool_Init(void);
void ThreadPool_Exit(void);
#if defined (__MINGW32__)
// Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
CRITICAL_SECTION gAtomicLock;
#elif defined( __GNUC__ ) || defined( _MSC_VER)
#else
pthread_mutex_t gAtomicLock;
#endif
// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions.
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
{
#if defined (__MINGW32__)
// No atomics on Mingw32
EnterCriticalSection(&gAtomicLock);
cl_int old = *a;
*a = old + b;
LeaveCriticalSection(&gAtomicLock);
return old;
#elif defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
return __sync_fetch_and_add( a, b );
// do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize
#elif defined( _MSC_VER )
return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
#else
#warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
if( pthread_mutex_lock(&gAtomicLock) )
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
cl_int old = *a;
*a = old + b;
if( pthread_mutex_unlock(&gAtomicLock) )
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
return old;
#endif
}
#if defined( _WIN32 )
// Uncomment the following line if Windows XP support is not required.
// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
#define _INIT_ONCE INIT_ONCE
#define _PINIT_ONCE PINIT_ONCE
#define _InitOnceExecuteOnce InitOnceExecuteOnce
#else // !HAS_INIT_ONCE_EXECUTE_ONCE
typedef volatile LONG _INIT_ONCE;
typedef _INIT_ONCE *_PINIT_ONCE;
typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
#define _INIT_ONCE_UNINITIALIZED 0
#define _INIT_ONCE_IN_PROGRESS 1
#define _INIT_ONCE_DONE 2
static BOOL _InitOnceExecuteOnce(
_PINIT_ONCE InitOnce,
_PINIT_ONCE_FN InitFn,
PVOID Parameter,
LPVOID *Context
)
{
while ( *InitOnce != _INIT_ONCE_DONE )
{
if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
{
InitFn( InitOnce, Parameter, Context );
*InitOnce = _INIT_ONCE_DONE;
return TRUE;
}
Sleep( 1 );
}
return TRUE;
}
#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
// Uncomment the following line if Windows XP support is not required.
// #define HAS_CONDITION_VARIABLE 1
#if defined(HAS_CONDITION_VARIABLE)
#define _CONDITION_VARIABLE CONDITION_VARIABLE
#define _InitializeConditionVariable InitializeConditionVariable
#define _SleepConditionVariableCS SleepConditionVariableCS
#define _WakeAllConditionVariable WakeAllConditionVariable
#else // !HAS_CONDITION_VARIABLE
typedef struct
{
HANDLE mEvent; // Used to park the thread.
CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
volatile cl_int mWaiters; // Number of threads waiting on this cond var.
volatile cl_int mGeneration; // Wait generation count.
volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event.
} _CONDITION_VARIABLE;
typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
{
cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
InitializeCriticalSection( cond_var->mLock );
cond_var->mWaiters = 0;
cond_var->mGeneration = 0;
#if !defined ( NDEBUG )
cond_var->mReleaseCount = 0;
#endif // !NDEBUG
}
static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
{
EnterCriticalSection( cond_var->mLock );
cl_int generation = cond_var->mGeneration;
++cond_var->mWaiters;
LeaveCriticalSection( cond_var->mLock );
LeaveCriticalSection( cond_lock );
while ( TRUE )
{
WaitForSingleObject( cond_var->mEvent, INFINITE );
EnterCriticalSection( cond_var->mLock );
BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
LeaveCriticalSection( cond_var->mLock );
if ( done )
{
break;
}
}
EnterCriticalSection( cond_lock );
EnterCriticalSection( cond_var->mLock );
if ( --cond_var->mReleaseCount == 0 )
{
ResetEvent( cond_var->mEvent );
}
--cond_var->mWaiters;
LeaveCriticalSection( cond_var->mLock );
}
static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
{
EnterCriticalSection( cond_var->mLock );
if (cond_var->mWaiters > 0 )
{
++cond_var->mGeneration;
cond_var->mReleaseCount = cond_var->mWaiters;
SetEvent( cond_var->mEvent );
}
LeaveCriticalSection( cond_var->mLock );
}
#endif // !HAS_CONDITION_VARIABLE
#endif // _WIN32
#define MAX_COUNT (1<<29)
// Global state to coordinate whether the threads have been launched successfully or not
#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
static _INIT_ONCE threadpool_init_control;
#elif defined (_WIN32) // MingW of XP
static int threadpool_init_control;
#else // Posix platforms
pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
#endif
cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time,
// because we are too lazy to set up a queue here, and don't expect to need one.
#if defined( _WIN32 )
CRITICAL_SECTION gThreadPoolLock[1];
#else // !_WIN32
pthread_mutex_t gThreadPoolLock;
#endif // !_WIN32
// Condition variable to park ThreadPool threads when not working
#if defined( _WIN32 )
CRITICAL_SECTION cond_lock[1];
_CONDITION_VARIABLE cond_var[1];
#else // !_WIN32
pthread_mutex_t cond_lock;
pthread_cond_t cond_var;
#endif // !_WIN32
volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run.
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
// State that only changes when the threadpool is not working.
volatile TPFuncPtr gFunc_ptr = NULL;
volatile void *gUserInfo = NULL;
volatile cl_int gJobCount = 0;
// State that may change while the thread pool is working
volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
// Condition variable to park caller while waiting
#if defined( _WIN32 )
HANDLE caller_event;
#else // !_WIN32
pthread_mutex_t caller_cond_lock;
pthread_cond_t caller_cond_var;
#endif // !_WIN32
volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
// The total number of threads launched.
volatile cl_int gThreadCount = 0;
#ifdef _WIN32
void ThreadPool_WorkerFunc( void *p )
#else
void *ThreadPool_WorkerFunc( void *p )
#endif
{
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
while( MAX_COUNT > item )
{
cl_int err;
// check for more work to do
if( 0 >= item )
{
// log_info( "Thread %d has run out of work.\n", threadID );
// No work to do. Attempt to block waiting for work
#if defined( _WIN32 )
EnterCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_lock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
goto exit;
}
#endif // !_WIN32
cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
if( 1 == remaining )
{ // last thread out signal the main thread to wake up
#if defined( _WIN32 )
SetEvent( caller_event );
#else // !_WIN32
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
goto exit;
}
if( (err = pthread_cond_broadcast( &caller_cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
goto exit;
}
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
goto exit;
}
#endif // !_WIN32
}
// loop in case we are woken only to discover that some other thread already did all the work
while( 0 >= item )
{
#if defined( _WIN32 )
_SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
pthread_mutex_unlock( &cond_lock);
goto exit;
}
#endif // !_WIN32
// try again to get a valid item id
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
if( MAX_COUNT <= item ) // exit if we are done
{
#if defined( _WIN32 )
LeaveCriticalSection( cond_lock );
#else // !_WIN32
pthread_mutex_unlock( &cond_lock);
#endif // !_WIN32
goto exit;
}
}
ThreadPool_AtomicAdd( &gRunning, 1 );
// log_info( "Thread %d has found work.\n", threadID);
#if defined( _WIN32 )
LeaveCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_unlock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
goto exit;
}
#endif // !_WIN32
}
// we have a valid item, so do the work
if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error
{
// log_info( "Thread %d doing job %d\n", threadID, item - 1);
#if defined(__APPLE__) && defined(__arm__)
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
// Call the user's function with this item ID
err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state
RestoreFPState( &oldMode );
#endif
if( err )
{
#if (__MINGW32__)
EnterCriticalSection(&gAtomicLock);
if( jobError == CL_SUCCESS );
jobError = err;
gRunCount = 0;
LeaveCriticalSection(&gAtomicLock);
#elif defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
// set the new error if we are the first one there.
__sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
// drop run count to 0
gRunCount = 0;
__sync_synchronize();
#elif defined( _MSC_VER )
// set the new error if we are the first one there.
_InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
// drop run count to 0
gRunCount = 0;
_mm_mfence();
#else
if( pthread_mutex_lock(&gAtomicLock) )
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
if( jobError == CL_SUCCESS );
jobError = err;
gRunCount = 0;
if( pthread_mutex_unlock(&gAtomicLock) )
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
#endif
}
}
// get the next item
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
}
exit:
log_info( "ThreadPool: thread %d exiting.\n", threadID );
ThreadPool_AtomicAdd( &gThreadCount, -1 );
#if !defined(_WIN32)
return NULL;
#endif
}
// SetThreadCount() may be used to artifically set the number of worker threads
// If the value is 0 (the default) the number of threads will be determined based on
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
// that we still get some testing for thread safety.
//
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
// code will run single threaded, but will report an error to indicate that the test
// is invalid. This option is intended for debugging purposes only. It is suggested
// as a convention that test apps set the thread count to 1 in response to the -m flag.
//
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
// otherwise the behavior is indefined.
void SetThreadCount( int count )
{
if( threadPoolInitErr == CL_SUCCESS )
{
log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
abort();
}
gThreadCount = count;
}
void ThreadPool_Init(void)
{
cl_int i;
int err;
volatile cl_uint threadID = 0;
// Check for manual override of multithreading code. We add this for better debuggability.
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
{
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
gThreadCount = 1;
return;
}
// Figure out how many threads to run -- check first for non-zero to give the implementation the chance
if( 0 == gThreadCount )
{
#if defined(_MSC_VER) || defined (__MINGW64__)
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
DWORD length = 0;
GetLogicalProcessorInformation( NULL, &length );
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
{
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
{
if( ptr->Relationship == RelationProcessorCore )
{
// Count the number of bits in ProcessorMask (number of logical cores)
ULONG mask = ptr->ProcessorMask;
while( mask )
{
++gThreadCount;
mask &= mask - 1; // Remove 1 bit at a time
}
}
++ptr;
}
free(buffer);
}
#elif defined (__MINGW32__)
{
#warning How about this, instead of hard coding it to 2?
SYSTEM_INFO sysinfo;
GetSystemInfo( &sysinfo );
gThreadCount = sysinfo.dwNumberOfProcessors;
}
#else // !_WIN32
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
#endif // !_WIN32
// Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
if( 1 == gThreadCount )
gThreadCount = 2;
}
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
if( gThreadCount < 2 )
{
log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
gThreadCount = 1;
return;
}
#if defined( _WIN32 )
InitializeCriticalSection( gThreadPoolLock );
InitializeCriticalSection( cond_lock );
_InitializeConditionVariable( cond_var );
caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
#elif defined (__GNUC__)
// Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
// with some flavors of gcc compilers.
pthread_cond_init(&cond_var, NULL);
pthread_mutex_init(&cond_lock ,NULL);
pthread_cond_init(&caller_cond_var, NULL);
pthread_mutex_init(&caller_cond_lock, NULL);
pthread_mutex_init(&gThreadPoolLock, NULL);
#endif
#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
pthread_mutex_initialize(gAtomicLock);
#elif defined (__MINGW32__)
InitializeCriticalSection(&gAtomicLock);
#endif
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
// That would cause a deadlock.
#if !defined( _WIN32 )
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
gThreadCount = 1;
return;
}
#endif // !_WIN32
gRunning = gThreadCount;
// init threads
for( i = 0; i < gThreadCount; i++ )
{
#if defined( _WIN32 )
uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
err = ( handle == 0 );
#else // !_WIN32
pthread_t tid = 0;
err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
#endif // !_WIN32
if( err )
{
log_error( "Error %d launching thread %d\n", err, i );
threadPoolInitErr = err;
gThreadCount = i;
break;
}
}
atexit( ThreadPool_Exit );
// block until they are done launching.
do
{
#if defined( _WIN32 )
WaitForSingleObject( caller_event, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
pthread_mutex_unlock( &caller_cond_lock);
return;
}
#endif // !_WIN32
}
while( gRunCount != -gThreadCount );
#if !defined( _WIN32 )
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
return;
}
#endif // !_WIN32
threadPoolInitErr = CL_SUCCESS;
}
#if defined(_MSC_VER)
static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
{
ThreadPool_Init();
return TRUE;
}
#endif
void ThreadPool_Exit(void)
{
int err, count;
gRunCount = CL_INT_MAX;
#if defined( __GNUC__ )
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
__sync_synchronize();
#elif defined( _MSC_VER )
_mm_mfence();
#else
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
#endif
// spin waiting for threads to die
for (count = 0; 0 != gThreadCount && count < 1000; count++)
{
#if defined( _WIN32 )
_WakeAllConditionVariable( cond_var );
Sleep(1);
#else // !_WIN32
if( (err = pthread_cond_broadcast( &cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
break;
}
usleep(1000);
#endif // !_WIN32
}
if( gThreadCount )
log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
else
log_info( "Thread pool exited in a orderly fashion.\n" );
}
// Blocking API that farms out count jobs to a thread pool.
// It may return with some work undone if func_ptr() returns a non-zero
// result.
//
// This function obviously has its shortcommings. Only one call to ThreadPool_Do
// can be running at a time. It is not intended for general purpose use.
// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
// all available then it would make more sense to use those features.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo )
{
cl_int newErr;
cl_int err = 0;
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
#elif defined (_WIN32)
if (threadpool_init_control == 0) {
#warning This is buggy and race prone. Find a better way.
ThreadPool_Init();
threadpool_init_control = 1;
}
#else //posix platform
err = pthread_once( &threadpool_init_control, ThreadPool_Init );
if( err )
{
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
return err;
}
#endif
// Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
if( threadPoolInitErr )
{
cl_uint currentJob = 0;
cl_int result = CL_SUCCESS;
#if defined(__APPLE__) && defined(__arm__)
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
for( currentJob = 0; currentJob < count; currentJob++ )
if((result = func_ptr( currentJob, 0, userInfo )))
{
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state before leaving
RestoreFPState( &oldMode );
#endif
return result;
}
#if defined(__APPLE__) && defined(__arm__)
// Restore FP state before leaving
RestoreFPState( &oldMode );
#endif
return CL_SUCCESS;
}
if( count >= MAX_COUNT )
{
log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
return -1;
}
// Enter critical region
#if defined( _WIN32 )
EnterCriticalSection( gThreadPoolLock );
#else // !_WIN32
if( (err = pthread_mutex_lock( &gThreadPoolLock )))
{
switch (err)
{
case EDEADLK:
log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
break;
case EINVAL:
log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
break;
default:
break;
}
return err;
}
#endif // !_WIN32
// Start modifying the job state observable by worker threads
#if defined( _WIN32 )
EnterCriticalSection( cond_lock );
#else // !_WIN32
if((err = pthread_mutex_lock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
// That would cause a deadlock.
#if !defined( _WIN32 )
if((err = pthread_mutex_lock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// Prime the worker threads to get going
jobError = CL_SUCCESS;
gRunCount = gJobCount = count;
gFunc_ptr = func_ptr;
gUserInfo = userInfo;
#if defined( _WIN32 )
_WakeAllConditionVariable( cond_var );
LeaveCriticalSection( cond_lock );
#else // !_WIN32
if( (err = pthread_cond_broadcast( &cond_var )))
{
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
if((err = pthread_mutex_unlock( &cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
// block until they are done. It would be slightly more efficient to do some of the work here though.
do
{
#if defined( _WIN32 )
WaitForSingleObject( caller_event, INFINITE );
#else // !_WIN32
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
{
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
pthread_mutex_unlock( &caller_cond_lock);
goto exit;
}
#endif // !_WIN32
}
while( gRunning );
#if !defined(_WIN32)
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
{
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
goto exit;
}
#endif // !_WIN32
err = jobError;
exit:
// exit critical region
#if defined( _WIN32 )
LeaveCriticalSection( gThreadPoolLock );
#else // !_WIN32
newErr = pthread_mutex_unlock( &gThreadPoolLock );
if( newErr)
{
log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
return err;
}
#endif // !_WIN32
return err;
}
cl_uint GetThreadCount( void )
{
// Lazily set up our threads
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
#elif defined (_WIN32)
if (threadpool_init_control == 0) {
#warning This is buggy and race prone. Find a better way.
ThreadPool_Init();
threadpool_init_control = 1;
}
#else
cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
if( err )
{
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
return err;
}
#endif // !_WIN32
if( gThreadCount < 1 )
return 1;
return gThreadCount;
}
#else
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
#endif
//
// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
// of OpenCL API, while also checking
//
// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
// It is not okay to use this for conformance testing!!!
//
// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code.
//
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
{
cl_uint r = *a;
// since this fallback code path is not multithreaded, we just do a regular add here
// If your operating system supports memory-barrier-atomics, use those here
*a = r + b;
return r;
}
// Blocking API that farms out count jobs to a thread pool.
// It may return with some work undone if func_ptr() returns a non-zero
// result.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo )
{
cl_uint currentJob = 0;
cl_int result = CL_SUCCESS;
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
// THIS FUNCTION IS NOT INTENDED FOR USE!!
log_error( "ERROR: Test must be multithreaded!\n" );
exit(-1);
#else
static int spewCount = 0;
if( 0 == spewCount )
{
log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
spewCount = 1;
}
#endif
// The multithreaded code should mimic this behavior:
for( currentJob = 0; currentJob < count; currentJob++ )
if((result = func_ptr( currentJob, 0, userInfo )))
return result;
return CL_SUCCESS;
}
cl_uint GetThreadCount( void )
{
return 1;
}
void SetThreadCount( int count )
{
if( count > 1 )
log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
}
#endif

View File

@@ -0,0 +1,76 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef THREAD_POOL_H
#define THREAD_POOL_H
#if defined( __APPLE__ )
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#if defined(__cplusplus)
extern "C" {
#endif
//
// An atomic add operator
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
// Your function prototype
//
// A function pointer to the function you want to execute in a multithreaded context. No
// synchronization primitives are provided, other than the atomic add above. You may not
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
// work, however.
//
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
// the work is done.
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
// Some workitems may not run if a non-zero result is returned from func_ptr().
// This function may not be called from a TPFuncPtr.
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
cl_uint count,
void *userInfo );
// Returns the number of worker threads that underlie the threadpool. The value passed
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
// This is safe to call from a TPFuncPtr.
cl_uint GetThreadCount( void );
// SetThreadCount() may be used to artifically set the number of worker threads
// If the value is 0 (the default) the number of threads will be determined based on
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
// that we still get some testing for thread safety.
//
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
// code will run single threaded, but will report an error to indicate that the test
// is invalid. This option is intended for debugging purposes only. It is suggested
// as a convention that test apps set the thread count to 1 in response to the -m flag.
//
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
void SetThreadCount( int count );
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* THREAD_POOL_H */

View File

@@ -0,0 +1,253 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef test_conformance_clImageHelper_h
#define test_conformance_clImageHelper_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include <stdio.h>
#include "errorHelpers.h"
#ifdef __cplusplus
extern "C" {
#endif
// helper function to replace clCreateImage2D , to make the existing code use
// the functions of version 1.2 and veriosn 1.1 respectively
inline cl_mem create_image_2d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_row_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage = NULL;
#ifdef CL_VERSION_1_2
cl_image_desc image_desc_dest;
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
image_desc_dest.image_width = image_width;
image_desc_dest.image_height = image_height;
image_desc_dest.image_depth= 0;// not usedfor 2d
image_desc_dest.image_array_size = 0;// not used for 2d
image_desc_dest.image_row_pitch = image_row_pitch;
image_desc_dest.image_slice_pitch = 0;
image_desc_dest.num_mip_levels = 0;
image_desc_dest.num_samples = 0;
image_desc_dest.buffer = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
#else
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
}
#endif
return mImage;
}
inline cl_mem create_image_3d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_depth,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
#ifdef CL_VERSION_1_2
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
image_desc.image_width = image_width;
image_desc.image_height = image_height;
image_desc.image_depth = image_depth;
image_desc.image_array_size = 0;// not used for one image
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
#else
mImage = clCreateImage3D( context,
flags, image_format,
image_width,
image_height,
image_depth,
image_row_pitch,
image_slice_pitch,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
}
#endif
return mImage;
}
inline cl_mem create_image_2d_array (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_height,
size_t image_array_size,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
image_desc.image_width = image_width;
image_desc.image_height = image_height;
image_desc.image_depth = 1;
image_desc.image_array_size = image_array_size;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
inline cl_mem create_image_1d_array (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_array_size,
size_t image_row_pitch,
size_t image_slice_pitch,
void *host_ptr,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
image_desc.image_width = image_width;
image_desc.image_height = 1;
image_desc.image_depth = 1;
image_desc.image_array_size = image_array_size;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = image_slice_pitch;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = NULL;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
inline cl_mem create_image_1d (cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
size_t image_width,
size_t image_row_pitch,
void *host_ptr,
cl_mem buffer,
cl_int *errcode_ret)
{
cl_mem mImage;
cl_image_desc image_desc;
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
image_desc.image_width = image_width;
image_desc.image_height = 1;
image_desc.image_depth = 1;
image_desc.image_row_pitch = image_row_pitch;
image_desc.image_slice_pitch = 0;
image_desc.num_mip_levels = 0;
image_desc.num_samples = 0;
image_desc.buffer = buffer;
mImage = clCreateImage( context,
flags,
image_format,
&image_desc,
host_ptr,
errcode_ret );
if (errcode_ret && (*errcode_ret)) {
// Log an info message and rely on the calling function to produce an error
// if necessary.
log_info("clCreateImage failed (%d)\n", *errcode_ret);
}
return mImage;
}
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,210 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _COMPAT_H_
#define _COMPAT_H_
#if defined(_WIN32) && defined (_MSC_VER)
#include <Windows.h>
#include <Winbase.h>
#include <CL/cl.h>
#include <float.h>
#include <xmmintrin.h>
#include <math.h>
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
#define isfinite(x) _finite(x)
#if !defined(__cplusplus)
typedef char bool;
#define inline
#else
extern "C" {
#endif
typedef unsigned char uint8_t;
typedef char int8_t;
typedef unsigned short uint16_t;
typedef short int16_t;
typedef unsigned int uint32_t;
typedef int int32_t;
typedef unsigned long long uint64_t;
typedef long long int64_t;
#define MAXPATHLEN MAX_PATH
typedef unsigned short ushort;
typedef unsigned int uint;
typedef unsigned long ulong;
#define INFINITY (FLT_MAX + FLT_MAX)
//#define NAN (INFINITY | 1)
//const static int PINFBITPATT_SP32 = INFINITY;
#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif
#define isnan( x ) ((x) != (x))
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
double rint( double x);
float rintf( float x);
long double rintl( long double x);
float cbrtf( float );
double cbrt( double );
int ilogb( double x);
int ilogbf (float x);
int ilogbl(long double x);
double fmax(double x, double y);
double fmin(double x, double y);
float fmaxf( float x, float y );
float fminf(float x, float y);
double log2(double x);
long double log2l(long double x);
double exp2(double x);
long double exp2l(long double x);
double fdim(double x, double y);
float fdimf(float x, float y);
long double fdiml(long double x, long double y);
double remquo( double x, double y, int *quo);
float remquof( float x, float y, int *quo);
long double remquol( long double x, long double y, int *quo);
long double scalblnl(long double x, long n);
inline long long
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
// end of math functions
uint64_t ReadTime( void );
double SubtractTime( uint64_t endTime, uint64_t startTime );
#define sleep(X) Sleep(1000*X)
#define snprintf sprintf_s
//#define hypotl _hypot
float make_nan();
float nanf( const char* str);
double nan( const char* str);
long double nanl( const char* str);
//#if defined USE_BOOST
//#include <boost/math/tr1.hpp>
//double hypot(double x, double y);
float hypotf(float x, float y);
long double hypotl(long double x, long double y) ;
double lgamma(double x);
float lgammaf(float x);
double trunc(double x);
float truncf(float x);
double log1p(double x);
float log1pf(float x);
long double log1pl(long double x);
double copysign(double x, double y);
float copysignf(float x, float y);
long double copysignl(long double x, long double y);
long lround(double x);
long lroundf(float x);
//long lroundl(long double x)
double round(double x);
float roundf(float x);
long double roundl(long double x);
int signbit(double x);
int signbitf(float x);
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); }
//#endif // USE_BOOST
long int lrint (double flt);
long int lrintf (float flt);
float int2float (int32_t ix);
int32_t float2int (float fx);
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
int __builtin_clz(unsigned int pattern);
static const double zero= 0.00000000000000000000e+00;
#define NAN (INFINITY - INFINITY)
#define HUGE_VALF (float)HUGE_VAL
int usleep(int usec);
// reimplement fenv.h because windows doesn't have it
#define FE_INEXACT 0x0020
#define FE_UNDERFLOW 0x0010
#define FE_OVERFLOW 0x0008
#define FE_DIVBYZERO 0x0004
#define FE_INVALID 0x0001
#define FE_ALL_EXCEPT 0x003D
int fetestexcept(int excepts);
int feclearexcept(int excepts);
#ifdef __cplusplus
}
#endif
#else // !((defined(_WIN32) && defined(_MSC_VER)
#if defined(__MINGW32__)
#include <windows.h>
#define sleep(X) Sleep(1000*X)
#endif
#if defined(__linux__) || defined(__MINGW32__) || defined(__APPLE__)
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#include <fenv.h>
#include <math.h>
#include <float.h>
#include <stdint.h>
#endif
#define MAKE_HEX_FLOAT(x,y,z) x
#define MAKE_HEX_DOUBLE(x,y,z) x
#define MAKE_HEX_LONG(x,y,z) x
#endif // !((defined(_WIN32) && defined(_MSC_VER)
#endif // _COMPAT_H_

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,126 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _conversions_h
#define _conversions_h
#include "compat.h"
#include "errorHelpers.h"
#include "mt19937.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#if defined(__cplusplus)
extern "C" {
#endif
/* Note: the next three all have to match in size and order!! */
enum ExplicitTypes
{
kBool = 0,
kChar,
kUChar,
kUnsignedChar,
kShort,
kUShort,
kUnsignedShort,
kInt,
kUInt,
kUnsignedInt,
kLong,
kULong,
kUnsignedLong,
kFloat,
kHalf,
kDouble,
kNumExplicitTypes
};
typedef enum ExplicitTypes ExplicitType;
enum RoundingTypes
{
kRoundToEven = 0,
kRoundToZero,
kRoundToPosInf,
kRoundToNegInf,
kRoundToNearest,
kNumRoundingTypes,
kDefaultRoundingType = kRoundToNearest
};
typedef enum RoundingTypes RoundingType;
extern void print_type_to_string(ExplicitType type, void *data, char* string);
extern size_t get_explicit_type_size( ExplicitType type );
extern const char * get_explicit_type_name( ExplicitType type );
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
extern float read_as_float( void *inRaw, ExplicitType inType );
extern float get_random_float(float low, float high, MTdata d);
extern double get_random_double(double low, double high, MTdata d);
extern float any_float( MTdata d );
extern double any_double( MTdata d );
extern int random_in_range( int minV, int maxV, MTdata d );
size_t get_random_size_t(size_t low, size_t high, MTdata d);
// Note: though this takes a double, this is for use with single precision tests
static inline int IsFloatSubnormal( float x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ float d; uint32_t u;}u;
u.d = fabsf(x);
return (u.u-1) < 0x007fffffU;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) FLT_MIN && x != 0.0;
#endif
}
static inline int IsDoubleSubnormal( double x )
{
#if 2 == FLT_RADIX
// Do this in integer to avoid problems with FTZ behavior
union{ double d; uint64_t u;}u;
u.d = fabs( x);
return (u.u-1) < 0x000fffffffffffffULL;
#else
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
return fabs(x) < (double) DBL_MIN && x != 0.0;
#endif
}
#if defined(__cplusplus)
}
#endif
#endif // _conversions_h

View File

@@ -0,0 +1,579 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "errorHelpers.h"
const char *IGetErrorString( int clErrorCode )
{
switch( clErrorCode )
{
case CL_SUCCESS: return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
default: return "(unknown)";
}
}
const char *GetChannelOrderName( cl_channel_order order )
{
switch( order )
{
case CL_R: return "CL_R";
case CL_A: return "CL_A";
case CL_Rx: return "CL_Rx";
case CL_RG: return "CL_RG";
case CL_RA: return "CL_RA";
case CL_RGx: return "CL_RGx";
case CL_RGB: return "CL_RGB";
case CL_RGBx: return "CL_RGBx";
case CL_RGBA: return "CL_RGBA";
case CL_ARGB: return "CL_ARGB";
case CL_BGRA: return "CL_BGRA";
case CL_INTENSITY: return "CL_INTENSITY";
case CL_LUMINANCE: return "CL_LUMINANCE";
#if defined CL_1RGB_APPLE
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
#endif
#if defined CL_BGR1_APPLE
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
#endif
default: return NULL;
}
}
int IsChannelOrderSupported( cl_channel_order order )
{
switch( order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_RG:
case CL_RA:
case CL_RGx:
case CL_RGB:
case CL_RGBx:
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
case CL_INTENSITY:
case CL_LUMINANCE:
return 1;
#if defined CL_1RGB_APPLE
case CL_1RGB_APPLE:
return 1;
#endif
#if defined CL_BGR1_APPLE
case CL_BGR1_APPLE:
return 1;
#endif
default:
return 0;
}
}
const char *GetChannelTypeName( cl_channel_type type )
{
switch( type )
{
case CL_SNORM_INT8: return "CL_SNORM_INT8";
case CL_SNORM_INT16: return "CL_SNORM_INT16";
case CL_UNORM_INT8: return "CL_UNORM_INT8";
case CL_UNORM_INT16: return "CL_UNORM_INT16";
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
case CL_FLOAT: return "CL_FLOAT";
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
#endif
default: return NULL;
}
}
int IsChannelTypeSupported( cl_channel_type type )
{
switch( type )
{
case CL_SNORM_INT8:
case CL_SNORM_INT16:
case CL_UNORM_INT8:
case CL_UNORM_INT16:
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
case CL_UNORM_INT_101010:
case CL_SIGNED_INT8:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT8:
case CL_UNSIGNED_INT16:
case CL_UNSIGNED_INT32:
case CL_HALF_FLOAT:
case CL_FLOAT:
return 1;
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
return 1;
#endif
default:
return 0;
}
}
const char *GetAddressModeName( cl_addressing_mode mode )
{
switch( mode )
{
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
default: return NULL;
}
}
const char *GetDeviceTypeName( cl_device_type type )
{
switch( type )
{
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
default: return NULL;
}
}
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
{
static char scratch[ 1024 ];
size_t i, j;
if( buffer == NULL )
buffer = scratch;
unsigned char *p = (unsigned char *)dataBuffer;
char *bPtr;
buffer[ 0 ] = 0;
bPtr = buffer;
for( i = 0; i < vecSize; i++ )
{
if( i > 0 )
{
bPtr[ 0 ] = ' ';
bPtr++;
}
for( j = 0; j < typeSize; j++ )
{
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
bPtr += 2;
}
p += typeSize;
}
bPtr[ 0 ] = 0;
return buffer;
}
#ifndef MAX
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
#endif
#if defined( _MSC_VER )
#define scalbnf(_a, _i ) ldexpf( _a, _i )
#define scalbn(_a, _i ) ldexp( _a, _i )
#define scalbnl(_a, _i ) ldexpl( _a, _i )
#endif
static float Ulp_Error_Half_Float( float test, double reference );
static inline float half2float( cl_ushort half );
// taken from math tests
#define HALF_MIN_EXP -13
#define HALF_MANT_DIG 11
static float Ulp_Error_Half_Float( float test, double reference )
{
union{ double d; uint64_t u; }u; u.d = reference;
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
double testVal = test;
if( u.u & 0x000fffffffffffffULL )
{ // Non-power of two and NaN
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
if( isinf( reference ) )
{
if( (double) test == reference )
return 0.0f;
return (float) (testVal - reference );
}
// reference is a normal power of two or a zero
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
// Taken from vLoadHalf test
static inline float half2float( cl_ushort us )
{
uint32_t u = us;
uint32_t sign = (u << 16) & 0x80000000;
int32_t exponent = (u & 0x7c00) >> 10;
uint32_t mantissa = (u & 0x03ff) << 13;
union{ unsigned int u; float f;}uu;
if( exponent == 0 )
{
if( mantissa == 0 )
return sign ? -0.0f : 0.0f;
int shift = __builtin_clz( mantissa ) - 8;
exponent -= shift-1;
mantissa <<= shift;
mantissa &= 0x007fffff;
}
else
if( exponent == 31)
{
uu.u = mantissa | sign;
if( mantissa )
uu.u |= 0x7fc00000;
else
uu.u |= 0x7f800000;
return uu.f;
}
exponent += 127 - 15;
exponent <<= 23;
exponent |= mantissa;
uu.u = exponent | sign;
return uu.f;
}
float Ulp_Error_Half( cl_ushort test, float reference )
{
return Ulp_Error_Half_Float( half2float(test), reference );
}
float Ulp_Error( float test, double reference )
{
union{ double d; uint64_t u; }u; u.d = reference;
double testVal = test;
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
if( isinf( reference ) )
{
if( testVal == reference )
return 0.0f;
return (float) (testVal - reference );
}
if( isinf( testVal) )
{ // infinite test value, but finite (but possibly overflowing in float) reference.
//
// The function probably overflowed prematurely here. Formally, the spec says this is
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
// that the internal precision of some half_pow implementations would have to be 29+ bits
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
// after rounding to single is 4*32 = 128, which will ultimately result in premature
// overflow, even though a good faith representation would be correct to within 2**-29
// interally.
// In the interest of not requiring the implementation go to extraordinary lengths to
// deliver a half precision function, we allow premature overflow within the limit
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
// 2**128, the next value that would appear in the number line if float had sufficient range.
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
// Note that the same hack may not work in long double, which is not guaranteed to have
// more range than double. It is not clear that premature overflow should be tolerated for
// double.
}
if( u.u & 0x000fffffffffffffULL )
{ // Non-power of two and NaN
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
// Scale the exponent of the error
return (float) scalbn( testVal - reference, ulp_exp );
}
float Ulp_Error_Double( double test, long double reference )
{
// Deal with long double = double
// On most systems long double is a higher precision type than double. They provide either
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
// That is sufficient to represent the accuracy of a floating point result to many more bits
// than double and we can calculate sub-ulp errors. This is the standard system for which this
// test suite is designed.
//
// On some systems double and long double are the same thing. Then we run into a problem,
// because our representation of the infinitely precise result (passed in as reference above)
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
// reported error by half an ulp to take this into account. A more correct and permanent fix
// would be to undertake refactoring the reference code to return results in this format:
//
// typedef struct DoubleReference
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
// double correctlyRoundedResult; // as best we can
// double ulps; // plus a fractional amount to account for the difference
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
//
// This would provide a useful higher-than-double precision format for everyone that we can use,
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
// that use a head to tail double double for long double.
// Note: This function presumes that someone has already tested whether the result is correctly,
// rounded before calling this function. That test:
//
// if( (float) reference == test )
// return 0.0f;
//
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
// results.
int x;
long double testVal = test;
if( 0.5L != frexpl( reference, &x) )
{ // Non-power of two and NaN
if( isinf( reference ) )
{
if( testVal == reference )
return 0.0f;
return (float) ( testVal - reference );
}
if( isnan( reference ) && isnan( test ) )
return 0.0f; // if we are expecting a NaN, any NaN is fine
// The unbiased exponent of the ulp unit place
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
// Scale the exponent of the error
float result = (float) scalbnl( testVal - reference, ulp_exp );
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
if( sizeof(long double) == sizeof( double ) )
result += copysignf( 0.5f, result);
return result;
}
// reference is a normal power of two or a zero
// The unbiased exponent of the ulp unit place
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
// Scale the exponent of the error
float result = (float) scalbnl( testVal - reference, ulp_exp );
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
if( sizeof(long double) == sizeof( double ) )
result += copysignf( 0.5f, result);
return result;
}
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
{
int error;
size_t size_ret;
// Does the program object exist?
if (program != NULL) {
// Was the number of devices given
if (num_devices == 0) {
// If zero devices were specified then allocate and query the device list from the context
cl_context context;
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
test_error( error, "Unable to query program's context" );
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
test_error( error, "Unable to query context's device size" );
num_devices = size_ret / sizeof(cl_device_id);
device_list = (cl_device_id *) malloc(size_ret);
if (device_list == NULL) {
print_error( error, "malloc failed" );
return CL_OUT_OF_HOST_MEMORY;
}
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
test_error( error, "Unable to query context's devices" );
}
// For each device in the device_list
unsigned int i;
for (i = 0; i < num_devices; i++) {
// Get the build status
cl_build_status build_status;
error = clGetProgramBuildInfo(program,
device_list[i],
CL_PROGRAM_BUILD_STATUS,
sizeof(build_status),
&build_status,
&size_ret);
test_error( error, "Unable to query build status" );
// If the build failed then log the status, and allocate the build log, log it and free it
if (build_status != CL_BUILD_SUCCESS) {
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
test_error( error, "Unable to query build log size" );
char *build_log = (char *) malloc(size_ret);
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
test_error( error, "Unable to query build log" );
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
free(build_log);
}
}
// Was the number of devices given
if (num_devices == 0) {
// If zero devices were specified then free the device list
free(device_list);
}
}
return CL_SUCCESS;
}

View File

@@ -0,0 +1,149 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _errorHelpers_h
#define _errorHelpers_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
#define LOWER_IS_BETTER 0
#define HIGHER_IS_BETTER 1
// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
// functions as described below. This is helpful for integration into an automated testing
// system.
#if USE_ATF
// export BUILD_WITH_ATF=1
#include <ATF/ATF.h>
#define test_start() ATFTestStart()
#define log_info ATFLogInfo
#define log_error ATFLogError
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
#define test_finish() ATFTestFinish()
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
#define vlog ATFLogInfo
#define vlog_error ATFLogError
#else
#define test_start()
#define log_info printf
#define log_error printf
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
_higherBetter?"higher is better":"lower is better", _number )
#define test_finish()
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
_higherBetter?"higher is better":"lower is better" , _number)
#ifdef _WIN32
#ifdef __MINGW32__
// Use __mingw_printf since it supports "%a" format specifier
#define vlog __mingw_printf
#define vlog_error __mingw_printf
#else
// Use home-baked function that treats "%a" as "%f"
static int vlog_win32(const char *format, ...);
#define vlog vlog_win32
#define vlog_error vlog_win32
#endif
#else
#define vlog_error printf
#define vlog printf
#endif
#endif
#define ct_assert(b) ct_assert_i(b, __LINE__)
#define ct_assert_i(b, line) ct_assert_ii(b, line)
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
// expected error code vs. what we got
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
extern const char *IGetErrorString( int clErrorCode );
extern float Ulp_Error_Half( cl_ushort test, float reference );
extern float Ulp_Error( float test, double reference );
extern float Ulp_Error_Double( double test, long double reference );
extern const char *GetChannelTypeName( cl_channel_type type );
extern int IsChannelTypeSupported( cl_channel_type type );
extern const char *GetChannelOrderName( cl_channel_order order );
extern int IsChannelOrderSupported( cl_channel_order order );
extern const char *GetAddressModeName( cl_addressing_mode mode );
extern const char *GetDeviceTypeName( cl_device_type type );
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
#if defined (_WIN32) && !defined(__MINGW32__)
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
static int vlog_win32(const char *format, ...)
{
const char *new_format = format;
if (strstr(format, "%a")) {
char *temp;
if ((temp = strdup(format)) == NULL) {
printf("vlog_win32: Failed to allocate memory for strdup\n");
return -1;
}
new_format = temp;
while (*temp) {
// replace %a with %f
if ((*temp == '%') && (*(temp+1) == 'a')) {
*(temp+1) = 'f';
}
temp++;
}
}
va_list args;
va_start(args, format);
vprintf(new_format, args);
va_end(args);
if (new_format != format) {
free((void*)new_format);
}
return 0;
}
#endif
#ifdef __cplusplus
}
#endif
#endif // _errorHelpers_h

View File

@@ -0,0 +1,104 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _fpcontrol_h
#define _fpcontrol_h
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
// in integer code, and have found this is the only way to correctly verify operation.
//
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
typedef int FPU_mode_type;
#if defined( __i386__ ) || defined( __x86_64__ )
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
extern __thread fpu_control_t fpu_control;
#endif
// Set the reference hardware floating point unit to FTZ mode
static inline void ForceFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode | 0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control |= _FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
// Add 64 bit support
#elif defined (__aarch64__)
unsigned fpscr;
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
#else
#error ForceFTZ needs an implentation
#endif
}
// Disable the denorm flush to zero
static inline void DisableFTZ( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
*mode = _mm_getcsr();
_mm_setcsr( *mode & ~0x8040);
#elif defined( __PPC__ )
*mode = fpu_control;
fpu_control &= ~_FPU_MASK_NI;
#elif defined ( __arm__ )
unsigned fpscr;
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
// Add 64 bit support
#elif defined (__aarch64__)
unsigned fpscr;
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
*mode = fpscr;
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
#else
#error DisableFTZ needs an implentation
#endif
}
// Restore the reference hardware to floating point state indicated by *mode
static inline void RestoreFPState( FPU_mode_type *mode )
{
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
_mm_setcsr( *mode );
#elif defined( __PPC__)
fpu_control = *mode;
#elif defined (__arm__)
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
// Add 64 bit support
#elif defined (__aarch64__)
__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
#else
#error RestoreFPState needs an implementation
#endif
}
#else
#error ForceFTZ and RestoreFPState need implentations
#endif
#endif

View File

@@ -0,0 +1,53 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "genericThread.h"
#if defined(_WIN32)
#include <windows.h>
#else // !_WIN32
#include <pthread.h>
#endif
void * genericThread::IStaticReflector( void * data )
{
genericThread *t = (genericThread *)data;
return t->IRun();
}
bool genericThread::Start( void )
{
#if defined(_WIN32)
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
return ( mHandle != NULL );
#else // !_WIN32
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
return ( error == 0 );
#endif // !_WIN32
}
void * genericThread::Join( void )
{
#if defined(_WIN32)
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
return NULL;
#else // !_WIN32
void * retVal;
int error = pthread_join( (pthread_t)mHandle, &retVal );
if( error != 0 )
retVal = NULL;
return retVal;
#endif // !_WIN32
}

View File

@@ -0,0 +1,42 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _genericThread_h
#define _genericThread_h
#include <stdio.h>
class genericThread
{
public:
virtual ~genericThread() {}
bool Start( void );
void * Join( void );
protected:
virtual void * IRun( void ) = 0;
private:
void* mHandle;
static void * IStaticReflector( void * data );
};
#endif // _genericThread_h

View File

@@ -0,0 +1,249 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "imageHelpers.h"
size_t get_format_type_size( const cl_image_format *format )
{
return get_channel_data_type_size( format->image_channel_data_type );
}
size_t get_channel_data_type_size( cl_channel_type channelType )
{
switch( channelType )
{
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
return 1;
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
case CL_HALF_FLOAT:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return sizeof( cl_short );
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
return sizeof( cl_int );
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_SHORT_565_REV:
case CL_UNORM_SHORT_555_REV:
#endif
return 2;
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_8888:
case CL_UNORM_INT_8888_REV:
return 4;
#endif
case CL_UNORM_INT_101010:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_101010_REV:
#endif
return 4;
case CL_FLOAT:
return sizeof( cl_float );
default:
return 0;
}
}
size_t get_format_channel_count( const cl_image_format *format )
{
return get_channel_order_channel_count( format->image_channel_order );
}
size_t get_channel_order_channel_count( cl_channel_order order )
{
switch( order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_INTENSITY:
case CL_LUMINANCE:
return 1;
case CL_RG:
case CL_RA:
case CL_RGx:
return 2;
case CL_RGB:
case CL_RGBx:
return 3;
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
#ifdef CL_1RGB_APPLE
case CL_1RGB_APPLE:
#endif
#ifdef CL_BGR1_APPLE
case CL_BGR1_APPLE:
#endif
return 4;
default:
return 0;
}
}
int is_format_signed( const cl_image_format *format )
{
switch( format->image_channel_data_type )
{
case CL_SNORM_INT8:
case CL_SIGNED_INT8:
case CL_SNORM_INT16:
case CL_SIGNED_INT16:
case CL_SIGNED_INT32:
case CL_HALF_FLOAT:
case CL_FLOAT:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return 1;
default:
return 0;
}
}
size_t get_pixel_size( cl_image_format *format )
{
switch( format->image_channel_data_type )
{
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
return get_format_channel_count( format );
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
case CL_HALF_FLOAT:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return get_format_channel_count( format ) * sizeof( cl_ushort );
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
return get_format_channel_count( format ) * sizeof( cl_int );
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_SHORT_565_REV:
case CL_UNORM_SHORT_555_REV:
#endif
return 2;
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_8888:
case CL_UNORM_INT_8888_REV:
return 4;
#endif
case CL_UNORM_INT_101010:
#ifdef OBSOLETE_FORAMT
case CL_UNORM_INT_101010_REV:
#endif
return 4;
case CL_FLOAT:
return get_format_channel_count( format ) * sizeof( cl_float );
default:
return 0;
}
}
int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
{
cl_image_format formatList[ 128 ];
unsigned int outFormatCount, i;
int error;
/* Make sure each image format is supported */
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
return error;
/* Look for one that is an 8-bit format */
for( i = 0; i < outFormatCount; i++ )
{
if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
{
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
{
*outFormat = formatList[ i ];
return 0;
}
}
}
return -1;
}
int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
{
cl_image_format formatList[ 128 ];
unsigned int outFormatCount, i;
int error;
/* Make sure each image format is supported */
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
return error;
/* Look for one that is an 8-bit format */
for( i = 0; i < outFormatCount; i++ )
{
if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
formatList[ i ].image_channel_data_type == CL_FLOAT ||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
{
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
{
*outFormat = formatList[ i ];
return 0;
}
}
}
return -1;
}

View File

@@ -0,0 +1,37 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _imageHelpers_h
#define _imageHelpers_h
#include "errorHelpers.h"
extern size_t get_format_type_size( const cl_image_format *format );
extern size_t get_channel_data_type_size( cl_channel_type channelType );
extern size_t get_format_channel_count( const cl_image_format *format );
extern size_t get_channel_order_channel_count( cl_channel_order order );
extern int is_format_signed( const cl_image_format *format );
extern size_t get_pixel_size( cl_image_format *format );
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
#endif // _imageHelpers_h

View File

@@ -0,0 +1,684 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "kernelHelpers.h"
#include "errorHelpers.h"
#include "imageHelpers.h"
#if defined(__MINGW32__)
#include "mingw_compat.h"
#endif
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
{
int error = CL_SUCCESS;
/* Create the program object from source */
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
if( *outProgram == NULL || error != CL_SUCCESS)
{
print_error( error, "clCreateProgramWithSource failed" );
return error;
}
/* Compile the program */
int buildProgramFailed = 0;
int printedSource = 0;
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
if (error != CL_SUCCESS)
{
unsigned int i;
print_error(error, "clBuildProgram failed");
buildProgramFailed = 1;
printedSource = 1;
log_error( "Original source is: ------------\n" );
for( i = 0; i < numKernelLines; i++ )
log_error( "%s", kernelProgram[ i ] );
}
// Verify the build status on all devices
cl_uint deviceCount = 0;
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
if (error != CL_SUCCESS) {
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
return error;
}
if (deviceCount == 0) {
log_error("No devices found for program.\n");
return -1;
}
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
if( NULL == devices )
return -1;
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
if (error != CL_SUCCESS) {
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
free( devices );
return error;
}
cl_uint z;
for( z = 0; z < deviceCount; z++ )
{
char deviceName[4096] = "";
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
if (error != CL_SUCCESS || deviceName[0] == '\0') {
log_error("Device \"%d\" failed to return a name\n", z);
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
}
cl_build_status buildStatus;
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
if (error != CL_SUCCESS) {
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
free( devices );
return error;
}
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
char log[10240] = "";
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
char statusString[64] = "";
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
sprintf(statusString, "CL_BUILD_SUCCESS");
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
sprintf(statusString, "CL_BUILD_NONE");
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
sprintf(statusString, "CL_BUILD_ERROR");
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
else
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
if (error != CL_SUCCESS || log[0]=='\0'){
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
if (error) {
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
free( devices );
return error;
} else {
log_error("clGetProgramBuildInfo returned an empty log.\n");
free( devices );
return -1;
}
}
// In this case we've already printed out the code above.
if (!printedSource)
{
unsigned int i;
log_error( "Original source is: ------------\n" );
for( i = 0; i < numKernelLines; i++ )
log_error( "%s", kernelProgram[ i ] );
printedSource = 1;
}
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
log_error( "%s\n", log );
log_error( "\n----------\n" );
free( devices );
return -1;
}
}
/* And create a kernel from it */
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
if( *outKernel == NULL || error != CL_SUCCESS)
{
print_error( error, "Unable to create kernel" );
free( devices );
return error;
}
free( devices );
return 0;
}
int get_device_version( cl_device_id id, size_t* major, size_t* minor)
{
cl_char buffer[ 4098 ];
size_t length;
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get device version string" );
char *p1 = (char *)buffer + strlen( "OpenCL " );
char *p2;
while( *p1 == ' ' )
p1++;
*major = strtol( p1, &p2, 10 );
error = *p2 != '.';
test_error(error, "ERROR: Version number must contain a decimal point!");
*minor = strtol( ++p2, NULL, 10 );
return error;
}
int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
{
cl_device_id *devices;
size_t size, maxCommonSize = 0;
int numDevices, i, j, error;
cl_uint numDims;
size_t outSize;
size_t sizeLimit[]={1,1,1};
/* Assume fewer than 16 devices will be returned */
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
test_error( error, "Unable to obtain list of devices size for context" );
devices = (cl_device_id *)malloc(outSize);
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
test_error( error, "Unable to obtain list of devices for context" );
numDevices = (int)( outSize / sizeof( cl_device_id ) );
for( i = 0; i < numDevices; i++ )
{
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
test_error( error, "Unable to obtain max work group size for device" );
if( size < maxCommonSize || maxCommonSize == 0)
maxCommonSize = size;
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
if( size < maxCommonSize || maxCommonSize == 0)
maxCommonSize = size;
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
sizeLimit[0] = 1;
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
if (outLimits != NULL)
{
if (i == 0) {
for (j=0; j<3; j++)
outLimits[j] = sizeLimit[j];
} else {
for (j=0; j<(int)numDims; j++) {
if (sizeLimit[j] < outLimits[j])
outLimits[j] = sizeLimit[j];
}
}
}
}
free(devices);
*outMaxSize = (unsigned int)maxCommonSize;
return 0;
}
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
size_t globalThreadSize, size_t *outMaxSize )
{
size_t sizeLimit[3];
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
if( error != 0 )
return error;
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
the modulo test will succeed and break the loop anyway */
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
;
return 0;
}
int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
size_t *globalThreadSizes, size_t *outMaxSizes )
{
size_t sizeLimit[3];
size_t maxSize;
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
if( error != 0 )
return error;
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
sizes */
/* Simple case */
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
{
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
return 0;
}
}
size_t remainingSize, sizeForThisOne;
remainingSize = maxSize;
int i, j;
for (i=0 ; i<2; i++) {
if (globalThreadSizes[i] > remainingSize)
sizeForThisOne = remainingSize;
else
sizeForThisOne = globalThreadSizes[i];
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
outMaxSizes[i] = sizeForThisOne;
remainingSize = maxSize;
for (j=0; j<=i; j++)
remainingSize /=outMaxSizes[j];
}
return 0;
}
int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
size_t *globalThreadSizes, size_t *outMaxSizes )
{
size_t sizeLimit[3];
size_t maxSize;
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
if( error != 0 )
return error;
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
sizes */
/* Simple case */
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
{
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
return 0;
}
}
size_t remainingSize, sizeForThisOne;
remainingSize = maxSize;
int i, j;
for (i=0 ; i<3; i++) {
if (globalThreadSizes[i] > remainingSize)
sizeForThisOne = remainingSize;
else
sizeForThisOne = globalThreadSizes[i];
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
outMaxSizes[i] = sizeForThisOne;
remainingSize = maxSize;
for (j=0; j<=i; j++)
remainingSize /=outMaxSizes[j];
}
return 0;
}
/* Helper to determine if an extension is supported by a device */
int is_extension_available( cl_device_id device, const char *extensionName )
{
char *extString;
size_t size = 0;
int err;
int result = 0;
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
{
log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
return 0;
}
if( 0 == size )
return 0;
extString = (char*) malloc( size );
if( NULL == extString )
{
log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__, err );
return 0;
}
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
{
log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
free( extString );
return 0;
}
if( strstr( extString, extensionName ) )
result = 1;
free( extString );
return result;
}
/* Helper to determine if a device supports an image format */
int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
{
cl_image_format *list;
cl_uint count = 0;
cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
if( count == 0 )
return 0;
list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
if( NULL == list )
{
log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__, err );
return 0;
}
cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
if( error )
{
log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
free( list );
return 0;
}
// iterate looking for a match.
cl_uint i;
for( i = 0; i < count; i++ )
{
if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
fmt->image_channel_order == list[ i ].image_channel_order )
break;
}
free( list );
return ( i < count ) ? true : false;
}
size_t get_pixel_bytes( const cl_image_format *fmt );
size_t get_pixel_bytes( const cl_image_format *fmt )
{
size_t chanCount;
switch( fmt->image_channel_order )
{
case CL_R:
case CL_A:
case CL_Rx:
case CL_INTENSITY:
case CL_LUMINANCE:
chanCount = 1;
break;
case CL_RG:
case CL_RA:
case CL_RGx:
chanCount = 2;
break;
case CL_RGB:
case CL_RGBx:
chanCount = 3;
break;
case CL_RGBA:
case CL_ARGB:
case CL_BGRA:
#ifdef CL_1RGB_APPLE
case CL_1RGB_APPLE:
#endif
#ifdef CL_BGR1_APPLE
case CL_BGR1_APPLE:
#endif
chanCount = 4;
break;
default:
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
abort();
break;
}
switch( fmt->image_channel_data_type )
{
case CL_UNORM_SHORT_565:
case CL_UNORM_SHORT_555:
return 2;
case CL_UNORM_INT_101010:
return 4;
case CL_SNORM_INT8:
case CL_UNORM_INT8:
case CL_SIGNED_INT8:
case CL_UNSIGNED_INT8:
return chanCount;
case CL_SNORM_INT16:
case CL_UNORM_INT16:
case CL_HALF_FLOAT:
case CL_SIGNED_INT16:
case CL_UNSIGNED_INT16:
#ifdef CL_SFIXED14_APPLE
case CL_SFIXED14_APPLE:
#endif
return chanCount * 2;
case CL_SIGNED_INT32:
case CL_UNSIGNED_INT32:
case CL_FLOAT:
return chanCount * 4;
default:
log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
abort();
}
return 0;
}
int verifyImageSupport( cl_device_id device )
{
if( checkForImageSupport( device ) )
{
log_error( "ERROR: Device does not supported images as required by this test!\n" );
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
return 0;
}
int checkForImageSupport( cl_device_id device )
{
cl_uint i;
int error;
/* Check the device props to see if images are supported at all first */
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
test_error( error, "Unable to query device for image support" );
if( i == 0 )
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
/* So our support is good */
return 0;
}
int checkFor3DImageSupport( cl_device_id device )
{
cl_uint i;
int error;
/* Check the device props to see if images are supported at all first */
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
test_error( error, "Unable to query device for image support" );
if( i == 0 )
{
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
char profile[128];
error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
{
size_t width = -1L;
size_t height = -1L;
size_t depth = -1L;
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
if( 0 == (height | width | depth ))
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
}
/* So our support is good */
return 0;
}
void * align_malloc(size_t size, size_t alignment)
{
#if defined(_WIN32) && defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
void * ptr = NULL;
if (0 == posix_memalign(&ptr, alignment, size))
return ptr;
return NULL;
#elif defined(__MINGW32__)
return __mingw_aligned_malloc(size, alignment);
#else
#error "Please add support OS for aligned malloc"
#endif
}
void align_free(void * ptr)
{
#if defined(_WIN32) && defined(_MSC_VER)
_aligned_free(ptr);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
return free(ptr);
#elif defined(__MINGW32__)
return __mingw_aligned_free(ptr);
#else
#error "Please add support OS for aligned free"
#endif
}
size_t get_min_alignment(cl_context context)
{
static cl_uint align_size = 0;
if( 0 == align_size )
{
cl_device_id * devices;
size_t devices_size = 0;
cl_uint result = 0;
cl_int error;
int i;
error = clGetContextInfo (context,
CL_CONTEXT_DEVICES,
0,
NULL,
&devices_size);
test_error_ret(error, "clGetContextInfo failed", 0);
devices = (cl_device_id*)malloc(devices_size);
if (devices == NULL) {
print_error( error, "malloc failed" );
return 0;
}
error = clGetContextInfo (context,
CL_CONTEXT_DEVICES,
devices_size,
(void*)devices,
NULL);
test_error_ret(error, "clGetContextInfo failed", 0);
for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
{
cl_uint alignment = 0;
error = clGetDeviceInfo (devices[i],
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
sizeof(cl_uint),
(void*)&alignment,
NULL);
if (error == CL_SUCCESS)
{
alignment >>= 3; // convert bits to bytes
result = (alignment > result) ? alignment : result;
}
else
print_error( error, "clGetDeviceInfo failed" );
}
align_size = result;
free(devices);
}
return align_size;
}
cl_device_fp_config get_default_rounding_mode( cl_device_id device )
{
char profileStr[128] = "";
cl_device_fp_config single = 0;
int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
if( error )
test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
if( single & CL_FP_ROUND_TO_NEAREST )
return CL_FP_ROUND_TO_NEAREST;
if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
// Make sure we are an embedded device before allowing a pass
if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
return CL_FP_ROUND_TO_ZERO;
}
int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
{
cl_command_queue_properties realProps;
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
return ( realProps & prop ) ? 1 : 0;
}
int printDeviceHeader( cl_device_id device )
{
char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
int error;
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
( error == CL_SUCCESS ) ? cLangVersion : "" );
return CL_SUCCESS;
}

View File

@@ -0,0 +1,128 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _kernelHelpers_h
#define _kernelHelpers_h
#include "compat.h"
#include <stdio.h>
#include <stdlib.h>
#if defined (__MINGW32__)
#include <malloc.h>
#endif
#include <string.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
/*
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
*
* const char *source = {
* INIT_OPENCL_DEBUG_INFO
* "__kernel void foo( int x )\n"
* "{\n"
* " ...\n"
* "}\n"
* };
*/
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
#ifndef STRINGIFY_VALUE
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
#endif
#ifndef STRINGIFY
#define STRINGIFY(_x) #_x
#endif
/* Helper that creates a single program and kernel from a single-kernel program source */
extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
/* Helper to get major/minor number for a device */
extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
/* Helper to determine if an extension is supported by a device */
extern int is_extension_available( cl_device_id device, const char *extensionName );
/* Helper to determine if a device supports an image format */
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
/* Helper to get pixel size for a pixel format */
size_t get_pixel_bytes( const cl_image_format *fmt );
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
extern int verifyImageSupport( cl_device_id device );
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
extern int checkForImageSupport( cl_device_id device );
extern int checkFor3DImageSupport( cl_device_id device );
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
/* Helper for aligned memory allocation */
void * align_malloc(size_t size, size_t alignment);
void align_free(void *);
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
size_t get_min_alignment(cl_context context);
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
if( checkForImageSupport( device ) ) \
{ \
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
return 0; \
}
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
if( checkFor3DImageSupport( device ) ) \
{ \
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
return 0; \
}
/* Prints out the standard device header for all tests given the device to print for */
extern int printDeviceHeader( cl_device_id device );
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // _kernelHelpers_h

View File

@@ -0,0 +1,59 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#if defined(__MINGW32__)
#include "mingw_compat.h"
#include <stdio.h>
#include <string.h>
//This function is unavailable on various mingw compilers,
//especially 64 bit so implementing it here
const char *basename_dot=".";
char*
basename(char *path)
{
char *p = path, *b = NULL;
int len = strlen(path);
if (path == NULL) {
return (char*)basename_dot;
}
// Not absolute path on windows
if (path[1] != ':') {
return path;
}
// Trim trailing path seperators
if (path[len - 1] == '\\' ||
path[len - 1] == '/' ) {
len--;
path[len] = '\0';
}
while (len) {
while((*p != '\\' || *p != '/') && len) {
p++;
len--;
}
p++;
b = p;
}
return b;
}
#endif

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef MINGW_COMPAT_H
#define MINGW_COMPAT_H
#if defined(__MINGW32__)
char *basename(char *path);
#include <malloc.h>
#if defined(__MINGW64__)
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
#define __mingw_aligned_malloc _aligned_malloc
#define __mingw_aligned_free _aligned_free
#include <stddef.h>
#endif //(__MINGW64__)
#endif //(__MINGW32__)
#endif // MINGW_COMPAT_H

View File

@@ -0,0 +1,749 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#if defined(_WIN32) && defined (_MSC_VER)
#include "compat.h"
#include <math.h>
#include <float.h>
#include <assert.h>
#include <CL/cl_platform.h>
///////////////////////////////////////////////////////////////////
//
// rint, rintf
//
///////////////////////////////////////////////////////////////////
float copysignf( float x, float y )
{
union{ cl_uint u; float f; }ux, uy;
ux.f = x;
uy.f = y;
ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
return ux.f;
}
double copysign( double x, double y )
{
union{ cl_ulong u; double f; }ux, uy;
ux.f = x;
uy.f = y;
ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
return ux.f;
}
long double copysignl( long double x, long double y )
{
union
{
long double f;
struct{ cl_ulong m; cl_ushort sexp; }u;
}ux, uy;
ux.f = x;
uy.f = y;
ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
return ux.f;
}
float rintf(float x)
{
float absx = fabsf(x);
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
x = copysignf( rounded, x );
}
return x;
}
double rint(double x)
{
double absx = fabs(x);
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
x = copysign( rounded, x );
}
return x;
}
long double rintl(long double x)
{
double absx = fabs(x);
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
{
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
long double rounded = x + magic;
rounded -= magic;
x = copysignl( rounded, x );
}
return x;
}
///////////////////////////////////////////////////////////////////
//
// ilogb, ilogbf, ilogbl
//
///////////////////////////////////////////////////////////////////
#ifndef FP_ILOGB0
#define FP_ILOGB0 INT_MIN
#endif
#ifndef FP_ILOGBNAN
#define FP_ILOGBNAN INT_MIN
#endif
int ilogb (double x)
{
union{ double f; cl_ulong u;} u;
u.f = x;
cl_ulong absx = u.u & CL_LONG_MAX;
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
{
switch( absx )
{
case 0:
return FP_ILOGB0;
case 0x7ff0000000000000ULL:
return INT_MAX;
default:
if( absx > 0x7ff0000000000000ULL )
return FP_ILOGBNAN;
// subnormal
u.u = absx | 0x3ff0000000000000ULL;
u.f -= 1.0;
return (u.u >> 52) - (1023 + 1022);
}
}
return (absx >> 52) - 1023;
}
int ilogbf (float x)
{
union{ float f; cl_uint u;} u;
u.f = x;
cl_uint absx = u.u & 0x7fffffff;
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
{
switch( absx )
{
case 0:
return FP_ILOGB0;
case 0x7f800000U:
return INT_MAX;
default:
if( absx > 0x7f800000 )
return FP_ILOGBNAN;
// subnormal
u.u = absx | 0x3f800000U;
u.f -= 1.0f;
return (u.u >> 23) - (127 + 126);
}
}
return (absx >> 23) - 127;
}
int ilogbl (long double x)
{
union
{
long double f;
struct{ cl_ulong m; cl_ushort sexp; }u;
} u;
u.f = x;
int exp = u.u.sexp & 0x7fff;
if( 0 == exp )
{
if( 0 == u.u.m )
return FP_ILOGB0;
//subnormal
u.u.sexp = 0x3fff;
u.f -= 1.0f;
exp = u.u.sexp & 0x7fff;
return exp - (0x3fff + 0x3ffe);
}
else if( 0x7fff == exp )
{
if( u.u.m & CL_LONG_MAX )
return FP_ILOGBNAN;
return INT_MAX;
}
return exp - 0x3fff;
}
///////////////////////////////////////////////////////////////////
//
// fmax, fmin, fmaxf, fminf
//
///////////////////////////////////////////////////////////////////
static void GET_BITS_SP32(float fx, unsigned int* ux)
{
volatile union {float f; unsigned int u;} _bitsy;
_bitsy.f = (fx);
*ux = _bitsy.u;
}
/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
/* { */
/* volatile union {float f; unsigned int i;} _bitsy; */
/* _bitsy.f = (fx); */
/* *ux = _bitsy.i; */
/* } */
static void PUT_BITS_SP32(unsigned int ux, float* fx)
{
volatile union {float f; unsigned int u;} _bitsy;
_bitsy.u = (ux);
*fx = _bitsy.f;
}
/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
/* { */
/* volatile union {float f; unsigned int i;} _bitsy; */
/* _bitsy.i = (ux); */
/* *fx = _bitsy.f; */
/* } */
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
{
volatile union {double d; unsigned __int64 l;} _bitsy;
_bitsy.d = (dx);
*lx = _bitsy.l;
}
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
{
volatile union {double d; unsigned __int64 l;} _bitsy;
_bitsy.l = (lx);
*dx = _bitsy.d;
}
#if 0
int SIGNBIT_DP64(double x )
{
int hx;
_GET_HIGH_WORD(hx,x);
return((hx>>31));
}
#endif
/* fmax(x, y) returns the larger (more positive) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
/* This works so long as the compiler knows that (x != x) means
that x is NaN; gcc does. */
double fmax(double x, double y)
{
if( isnan(y) )
return x;
return x >= y ? x : y;
}
/* fmin(x, y) returns the smaller (more negative) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
double fmin(double x, double y)
{
if( isnan(y) )
return x;
return x <= y ? x : y;
}
float fmaxf( float x, float y )
{
if( isnan(y) )
return x;
return x >= y ? x : y;
}
/* fminf(x, y) returns the smaller (more negative) of x and y.
NaNs are treated as missing values: if one argument is NaN,
the other argument is returned. If both arguments are NaN,
the first argument is returned. */
float fminf(float x, float y)
{
if( isnan(y) )
return x;
return x <= y ? x : y;
}
long double scalblnl(long double x, long n)
{
union
{
long double d;
struct{ cl_ulong m; cl_ushort sexp;}u;
}u;
u.u.m = CL_LONG_MIN;
if( x == 0.0L || n < -2200)
return copysignl( 0.0L, x );
if( n > 2200 )
return INFINITY;
if( n < 0 )
{
u.u.sexp = 0x3fff - 1022;
while( n <= -1022 )
{
x *= u.d;
n += 1022;
}
u.u.sexp = 0x3fff + n;
x *= u.d;
return x;
}
if( n > 0 )
{
u.u.sexp = 0x3fff + 1023;
while( n >= 1023 )
{
x *= u.d;
n -= 1023;
}
u.u.sexp = 0x3fff + n;
x *= u.d;
return x;
}
return x;
}
///////////////////////////////////////////////////////////////////
//
// log2
//
///////////////////////////////////////////////////////////////////
const static cl_double log_e_base2 = 1.4426950408889634074;
const static cl_double log_10_base2 = 3.3219280948873623478;
//double log10(double x);
double log2(double x)
{
return 1.44269504088896340735992468100189214 * log(x);
}
long double log2l(long double x)
{
return 1.44269504088896340735992468100189214L * log(x);
}
///////////////////////////////////////////////////////////////////
//
// misc functions
//
///////////////////////////////////////////////////////////////////
/*
// This function is commented out because the Windows implementation should never call munmap.
// If it is calling it, we have a bug. Please file a bugzilla.
int munmap(void *addr, size_t len)
{
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
return (int)VirtualAlloc( (LPVOID)addr, len,
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
}
*/
uint64_t ReadTime( void )
{
LARGE_INTEGER current;
QueryPerformanceCounter(&current);
return (uint64_t)current.QuadPart;
}
double SubtractTime( uint64_t endTime, uint64_t startTime )
{
static double PerformanceFrequency = 0.0;
if (PerformanceFrequency == 0.0) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
PerformanceFrequency = (double) frequency.QuadPart;
}
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
}
float make_nan()
{
/* This is the IEEE 754 single-precision format:
unsigned int mantissa: 22;
unsigned int quiet_nan: 1;
unsigned int exponent: 8;
unsigned int negative: 1;
*/
//const static unsigned
static const int32_t _nan = 0x7fc00000;
return *(const float*)(&_nan);
}
float nanf( const char* str)
{
cl_uint u = atoi( str );
u |= 0x7fc00000U;
return *( float*)(&u);
}
double nan( const char* str)
{
cl_ulong u = atoi( str );
u |= 0x7ff8000000000000ULL;
return *( double*)(&u);
}
// double check this implementatation
long double nanl( const char* str)
{
union
{
long double f;
struct { cl_ulong m; cl_ushort sexp; }u;
}u;
u.u.sexp = 0x7fff;
u.u.m = 0x8000000000000000ULL | atoi( str );
return u.f;
}
double trunc(double x)
{
double absx = fabs(x);
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
{
cl_long rounded = x;
x = copysign( (double) rounded, x );
}
return x;
}
float truncf(float x)
{
float absx = fabsf(x);
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
cl_int rounded = x;
x = copysignf( (float) rounded, x );
}
return x;
}
long lround(double x)
{
double absx = fabs(x);
if( absx < 0.5 )
return 0;
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
{
absx += 0.5;
cl_long rounded = absx;
absx = rounded;
x = copysign( absx, x );
}
if( x >= (double) LONG_MAX )
return LONG_MAX;
return (long) x;
}
long lroundf(float x)
{
float absx = fabsf(x);
if( absx < 0.5f )
return 0;
if( absx < 8388608.0f )
{
absx += 0.5f;
cl_int rounded = absx;
absx = rounded;
x = copysignf( absx, x );
}
if( x >= (float) LONG_MAX )
return LONG_MAX;
return (long) x;
}
double round(double x)
{
double absx = fabs(x);
if( absx < 0.5 )
return copysign( 0.0, x);
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
{
absx += 0.5;
cl_long rounded = absx;
absx = rounded;
x = copysign( absx, x );
}
return x;
}
float roundf(float x)
{
float absx = fabsf(x);
if( absx < 0.5f )
return copysignf( 0.0f, x);
if( absx < 8388608.0f )
{
absx += 0.5f;
cl_int rounded = absx;
absx = rounded;
x = copysignf( absx, x );
}
return x;
}
long double roundl(long double x)
{
long double absx = fabsl(x);
if( absx < 0.5L )
return copysignl( 0.0L, x);
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
{
absx += 0.5L;
cl_ulong rounded = absx;
absx = rounded;
x = copysignl( absx, x );
}
return x;
}
int signbit(double x)
{
union
{
double f;
cl_ulong u;
}u;
u.f = x;
return u.u >> 63;
}
int signbitf(float x)
{
union
{
float f;
cl_uint u;
}u;
u.f = x;
return u.u >> 31;
}
float cbrtf( float x )
{
float z = pow( fabs((double) x), 1.0 / 3.0 );
return copysignf( z, x );
}
double cbrt( double x )
{
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
}
float int2float (int32_t ix)
{
union {
float f;
int32_t i;
} u;
u.i = ix;
return u.f;
}
int32_t float2int (float fx)
{
union {
float f;
int32_t i;
} u;
u.f = fx;
return u.i;
}
#if defined(_MSC_VER) && !defined(_WIN64)
/** Returns the number of leading 0-bits in x,
starting at the most significant bit position.
If x is 0, the result is undefined.
*/
int __builtin_clz(unsigned int pattern)
{
#if 0
int res;
__asm {
mov eax, pattern
bsr eax, eax
mov res, eax
}
return 31 - res;
#endif
unsigned long index;
unsigned char res = _BitScanReverse( &index, pattern);
if (res) {
return 8*sizeof(int) - 1 - index;
} else {
return 8*sizeof(int);
}
}
#else
int __builtin_clz(unsigned int pattern)
{
int count;
if (pattern == 0u) {
return 32;
}
count = 31;
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
if (pattern >= 1u<<1) { count -= 1; }
return count;
}
#endif //defined(_MSC_VER) && !defined(_WIN64)
#include <intrin.h>
#include <emmintrin.h>
long int lrint (double x)
{
double absx = fabs(x);
if( x >= (double) LONG_MAX )
return LONG_MAX;
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
{
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
double rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
long int lrintf (float x)
{
float absx = fabsf(x);
if( x >= (float) LONG_MAX )
return LONG_MAX;
if( absx < 8388608.0f /* 0x1.0p23f */ )
{
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
float rounded = x + magic;
rounded -= magic;
return (long int) rounded;
}
return (long int) x;
}
int usleep(int usec)
{
Sleep((usec + 999) / 1000);
return 0;
}
int fetestexcept(int excepts)
{
unsigned int status = _statusfp();
return excepts & (
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
((status & _SW_INVALID) ? FE_INVALID : 0)
);
}
int feclearexcept(int excepts)
{
_clearfp();
return 0;
}
#endif //defined(_WIN32)

View File

@@ -0,0 +1,274 @@
/*
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
*/
#include <stdio.h>
#include <stdlib.h>
#include "mt19937.h"
#include "mingw_compat.h"
#ifdef __SSE2__
#include <emmintrin.h>
#endif
static void * align_malloc(size_t size, size_t alignment)
{
#if defined(_WIN32) && defined(_MSC_VER)
return _aligned_malloc(size, alignment);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
void * ptr = NULL;
if (0 == posix_memalign(&ptr, alignment, size))
return ptr;
return NULL;
#elif defined(__MINGW32__)
return __mingw_aligned_malloc(size, alignment);
#else
#error "Please add support OS for aligned malloc"
#endif
}
static void align_free(void * ptr)
{
#if defined(_WIN32) && defined(_MSC_VER)
_aligned_free(ptr);
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
return free(ptr);
#elif defined(__MINGW32__)
return __mingw_aligned_free(ptr);
#else
#error "Please add support OS for aligned free"
#endif
}
/* Period parameters */
#define N 624 /* vector code requires multiple of 4 here */
#define M 397
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
typedef struct _MTdata
{
cl_uint mt[N];
#ifdef __SSE2__
cl_uint cache[N];
#endif
cl_int mti;
}_MTdata;
/* initializes mt[N] with a seed */
MTdata init_genrand(cl_uint s)
{
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
if( NULL != r )
{
cl_uint *mt = r->mt;
int mti = 0;
mt[0]= s; // & 0xffffffffUL;
for (mti=1; mti<N; mti++) {
mt[mti] = (cl_uint)
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
/* In the previous versions, MSBs of the seed affect */
/* only MSBs of the array mt[]. */
/* 2002/01/09 modified by Makoto Matsumoto */
// mt[mti] &= 0xffffffffUL;
/* for >32 bit machines */
}
r->mti = mti;
}
return r;
}
void free_mtdata( MTdata d )
{
if(d)
align_free(d);
}
/* generates a random number on [0,0xffffffff]-interval */
cl_uint genrand_int32( MTdata d)
{
/* mag01[x] = x * MATRIX_A for x=0,1 */
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
#ifdef __SSE2__
static volatile int init = 0;
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
#endif
cl_uint *mt = d->mt;
cl_uint y;
if (d->mti == N)
{ /* generate N words at one time */
int kk;
#ifdef __SSE2__
if( 0 == init )
{
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
init = 1;
}
#endif
kk = 0;
#ifdef __SSE2__
// vector loop
for( ; kk + 4 <= N-M; kk += 4 )
{
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
_mm_store_si128( (__m128i*) (mt + kk ), vr );
}
#endif
for ( ;kk<N-M;kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
#ifdef __SSE2__
// advance to next aligned location
for (;kk<N-1 && (kk & 3);kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
// vector loop
for( ; kk + 4 <= N-1; kk += 4 )
{
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
_mm_store_si128( (__m128i*) (mt + kk ), vr );
}
#endif
for (;kk<N-1;kk++) {
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
}
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
#ifdef __SSE2__
// Do the tempering ahead of time in vector code
for( kk = 0; kk + 4 <= N; kk += 4 )
{
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
}
#endif
d->mti = 0;
}
#ifdef __SSE2__
y = d->cache[d->mti++];
#else
y = mt[d->mti++];
/* Tempering */
y ^= (y >> 11);
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
y ^= (y >> 18);
#endif
return y;
}
cl_ulong genrand_int64( MTdata d)
{
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
}
/* generates a random number on [0,1]-real-interval */
double genrand_real1(MTdata d)
{
return genrand_int32(d)*(1.0/4294967295.0);
/* divided by 2^32-1 */
}
/* generates a random number on [0,1)-real-interval */
double genrand_real2(MTdata d)
{
return genrand_int32(d)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on (0,1)-real-interval */
double genrand_real3(MTdata d)
{
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
/* divided by 2^32 */
}
/* generates a random number on [0,1) with 53-bit resolution*/
double genrand_res53(MTdata d)
{
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
return(a*67108864.0+b)*(1.0/9007199254740992.0);
}

View File

@@ -0,0 +1,99 @@
/*
* mt19937.h
*
* Mersenne Twister.
*
A C-program for MT19937, with initialization improved 2002/1/26.
Coded by Takuji Nishimura and Makoto Matsumoto.
Before using, initialize the state by using init_genrand(seed)
or init_by_array(init_key, key_length).
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The names of its contributors may not be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Any feedback is very welcome.
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
*/
#ifndef MT19937_H
#define MT19937_H 1
#if defined( __APPLE__ )
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* Interfaces here have been modified from original sources so that they
* are safe to call reentrantly, so long as a different MTdata is used
* on each thread.
*/
typedef struct _MTdata *MTdata;
/* Create the random number generator with seed */
MTdata init_genrand( cl_uint /*seed*/ );
/* release memory used by a MTdata private data */
void free_mtdata( MTdata /*data*/ );
/* generates a random number on [0,0xffffffff]-interval */
cl_uint genrand_int32( MTdata /*data*/);
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
cl_ulong genrand_int64( MTdata /*data*/);
/* generates a random number on [0,1]-real-interval */
double genrand_real1( MTdata /*data*/);
/* generates a random number on [0,1)-real-interval */
double genrand_real2( MTdata /*data*/);
/* generates a random number on (0,1)-real-interval */
double genrand_real3( MTdata /*data*/);
/* generates a random number on [0,1) with 53-bit resolution*/
double genrand_res53( MTdata /*data*/ );
#ifdef __cplusplus
}
#endif
#endif /* MT19937_H */

View File

@@ -0,0 +1,49 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _ref_counting_h
#define _ref_counting_h
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
cl_uint c##_refCount; \
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
test_error( error, "Unable to check reference count for " #type );
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
cl_uint c##_refCount_new; \
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
test_error( error, "Unable to check reference count for " #type ); \
if( c##_refCount != c##_refCount_new ) \
{ \
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
return -1; \
}
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
#endif // _ref_counting_h

View File

@@ -0,0 +1,175 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "rounding_mode.h"
#if !(defined(_WIN32) && defined(_MSC_VER))
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
const int *p = int_rounds;
if( outType == kfloat || outType == kdouble )
p = flt_rounds;
int oldRound = fegetround();
fesetround( p[r] );
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
default:
abort(); // ??!
}
return kDefaultRoundingMode; //never happens
}
RoundingMode get_round( void )
{
int oldRound = fegetround();
switch( oldRound )
{
case FE_TONEAREST:
return kRoundToNearestEven;
case FE_UPWARD:
return kRoundUp;
case FE_DOWNWARD:
return kRoundDown;
case FE_TOWARDZERO:
return kRoundTowardZero;
}
return kDefaultRoundingMode;
}
#else
RoundingMode set_round( RoundingMode r, Type outType )
{
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
if (err) {
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
return kDefaultRoundingMode; //what else never happens
}
oldRound &= _MCW_RC;
RoundingMode old =
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
return old; //returning old rounding mode
}
RoundingMode get_round( void )
{
unsigned int oldRound;
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
oldRound &= _MCW_RC;
return
(oldRound == _RC_NEAR)? kRoundToNearestEven :
(oldRound == _RC_UP)? kRoundUp :
(oldRound == _RC_DOWN)? kRoundDown :
(oldRound == _RC_CHOP)? kRoundTowardZero:
kDefaultRoundingMode;
}
#endif
//
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
// software by testing against FLT_MIN or DBL_MIN in that file.
//
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
// operators do (e.g. add, subtract, multiply, divide, etc.)
//
// Configuring hardware to FTZ mode varies by platform.
// CAUTION: Some C implementations may also fail to behave properly in this mode.
//
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
// in which case, these function are at liberty to do nothing.
//
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
#include <xmmintrin.h>
#elif defined( __PPC__ )
#include <fpu_control.h>
#endif
void *FlushToZero( void )
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ int i; void *p; }u = { _mm_getcsr() };
_mm_setcsr( u.i | 0x8040 );
return u.p;
#elif defined( __arm__ ) || defined(__aarch64__)
// processor is already in FTZ mode -- do nothing
return NULL;
#elif defined( __PPC__ )
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags |= _FPU_MASK_NI;
_FPU_SETCW(flags);
return NULL;
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
void UnFlushToZero( void *p)
{
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
union{ void *p; int i; }u = { p };
_mm_setcsr( u.i );
#elif defined( __arm__ ) || defined(__aarch64__)
// processor is already in FTZ mode -- do nothing
#elif defined( __PPC__)
fpu_control_t flags = 0;
_FPU_GETCW(flags);
flags &= ~_FPU_MASK_NI;
_FPU_SETCW(flags);
#else
#error Unknown arch
#endif
#else
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
#endif
}

View File

@@ -0,0 +1,71 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __ROUNDING_MODE_H__
#define __ROUNDING_MODE_H__
#include "compat.h"
#include <stdlib.h>
#if (defined(_WIN32) && defined (_MSC_VER))
#include "errorHelpers.h"
#include "testHarness.h"
#endif
typedef enum
{
kDefaultRoundingMode = 0,
kRoundToNearestEven,
kRoundUp,
kRoundDown,
kRoundTowardZero,
kRoundingModeCount
}RoundingMode;
typedef enum
{
kuchar = 0,
kchar = 1,
kushort = 2,
kshort = 3,
kuint = 4,
kint = 5,
kfloat = 6,
kdouble = 7,
kulong = 8,
klong = 9,
//This goes last
kTypeCount
}Type;
#ifdef __cplusplus
extern "C" {
#endif
extern RoundingMode set_round( RoundingMode r, Type outType );
extern RoundingMode get_round( void );
extern void *FlushToZero( void );
extern void UnFlushToZero( void *p);
#ifdef __cplusplus
}
#endif
#endif /* __ROUNDING_MODE_H__ */

View File

@@ -0,0 +1,842 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testHarness.h"
#include "compat.h"
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <string.h>
#include "threadTesting.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
#include "fpcontrol.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <time.h>
#if !defined (__APPLE__)
#include <CL/cl.h>
#endif
int gTestsPassed = 0;
int gTestsFailed = 0;
cl_uint gRandomSeed = 0;
cl_uint gReSeed = 0;
int gFlushDenormsToZero = 0;
int gInfNanSupport = 1;
int gIsEmbedded = 0;
int gIsOpenCL_C_1_0_Device = 0;
int gIsOpenCL_1_0_Device = 0;
int gHasLong = 1;
#define DEFAULT_NUM_ELEMENTS 0x4000
int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
{
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
( imageSupportRequired ) ? verifyImageSupport : NULL );
}
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
DeviceCheckFn deviceCheckFn )
{
test_start();
log_info("*** Compatibility with Previous Versions test ***\n");
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
cl_uint num_platforms = 0;
cl_platform_id *platforms;
cl_device_id device;
int num_elements = DEFAULT_NUM_ELEMENTS;
cl_uint num_devices = 0;
cl_device_id *devices = NULL;
cl_uint choosen_device_index = 0;
cl_uint choosen_platform_index = 0;
int err, ret;
char *endPtr;
unsigned int i;
int based_on_env_var = 0;
/* Check for environment variable to set device type */
char *env_mode = getenv( "CL_DEVICE_TYPE" );
if( env_mode != NULL )
{
based_on_env_var = 1;
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
device_type = CL_DEVICE_TYPE_GPU;
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
device_type = CL_DEVICE_TYPE_CPU;
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
device_type = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
device_type = CL_DEVICE_TYPE_DEFAULT;
else
{
log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
abort();
}
}
#if defined( __APPLE__ )
{
// report on any unusual library search path indirection
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
if( libSearchPath )
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
// report on any unusual framework search path indirection
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
if( libSearchPath )
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
}
#endif
env_mode = getenv( "CL_DEVICE_INDEX" );
if( env_mode != NULL )
{
choosen_device_index = atoi(env_mode);
}
env_mode = getenv( "CL_PLATFORM_INDEX" );
if( env_mode != NULL )
{
choosen_platform_index = atoi(env_mode);
}
/* Process the command line arguments */
/* Special case: just list the tests */
if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
{
log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
for( i = 0; i < num_fns - 1; i++ )
{
log_info( "\t\t%s\n", fnNames[ i ] );
}
test_finish();
return 0;
}
/* How are we supposed to seed the random # generators? */
if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
{
log_info(" Initializing random seed based on the clock.\n");
gRandomSeed = (unsigned)clock();
gReSeed = 1;
argc--;
}
else
{
log_info(" Initializing random seed to 0.\n");
}
/* Do we have an integer to specify the number of elements to pass to tests? */
if( argc > 1 )
{
ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
{
/* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
/* (hence why we stored the result in ret first) */
num_elements = ret;
log_info( "Testing with num_elements of %d\n", num_elements );
argc--;
}
}
/* Do we have a CPU/GPU specification? */
if( argc > 1 )
{
if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
{
device_type = CL_DEVICE_TYPE_GPU;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
{
device_type = CL_DEVICE_TYPE_CPU;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
{
device_type = CL_DEVICE_TYPE_ACCELERATOR;
argc--;
}
else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
{
device_type = CL_DEVICE_TYPE_DEFAULT;
argc--;
}
}
/* Did we choose a specific device index? */
if( argc > 1 )
{
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
{
choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
argc--;
}
}
/* Did we choose a specific platform index? */
if( argc > 1 )
{
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
{
choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
argc--;
}
}
switch( device_type )
{
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
default: log_error( "Requesting unknown device "); return -1;
}
log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
#if defined( __APPLE__ )
#if defined( __i386__ ) || defined( __x86_64__ )
#define kHasSSE3 0x00000008
#define kHasSupplementalSSE3 0x00000100
#define kHasSSE4_1 0x00000400
#define kHasSSE4_2 0x00000800
/* check our environment for a hint to disable SSE variants */
{
const char *env = getenv( "CL_MAX_SSE" );
if( env )
{
extern int _cpu_capabilities;
int mask = 0;
if( 0 == strcasecmp( env, "SSE4.1" ) )
mask = kHasSSE4_2;
else if( 0 == strcasecmp( env, "SSSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1;
else if( 0 == strcasecmp( env, "SSE3" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
else if( 0 == strcasecmp( env, "SSE2" ) )
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
else
{
log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
return -2;
}
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
_cpu_capabilities &= ~mask;
}
}
#endif
#endif
/* Get the platform */
err = clGetPlatformIDs(0, NULL, &num_platforms);
if (err) {
print_error(err, "clGetPlatformIDs failed");
test_finish();
return -1;
}
platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
if (!platforms || choosen_platform_index >= num_platforms) {
log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
test_finish();
return -1;
}
err = clGetPlatformIDs(num_platforms, platforms, NULL);
if (err) {
print_error(err, "clGetPlatformIDs failed");
test_finish();
return -1;
}
/* Get the number of requested devices */
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0, NULL, &num_devices );
if (err) {
print_error(err, "clGetDeviceIDs failed");
test_finish();
return -1;
}
devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
if (!devices || choosen_device_index >= num_devices) {
log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
test_finish();
return -1;
}
/* Get the requested device */
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, num_devices, devices, NULL );
if (err) {
print_error(err, "clGetDeviceIDs failed");
test_finish();
return -1;
}
device = devices[choosen_device_index];
free(devices);
devices = NULL;
free(platforms);
platforms = NULL;
if( printDeviceHeader( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
cl_device_fp_config fpconfig = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
if (err) {
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
test_finish();
return -1;
}
gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
log_info( "sizeof( void*) = %d (host)\n", (int) sizeof( void* ) );
//detect whether profile of the device is embedded
char profile[1024] = "";
err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if (err)
{
print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
test_finish();
return -1;
}
gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
//detect the floating point capabilities
cl_device_fp_config floatCapabilities = 0;
err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
if (err)
{
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
test_finish();
return -1;
}
// Check for problems that only embedded will have
if( gIsEmbedded )
{
//If the device is embedded, we need to detect if the device supports Infinity and NaN
if ((floatCapabilities & CL_FP_INF_NAN) == 0)
gInfNanSupport = 0;
// check the extensions list to see if ulong and long are supported
size_t extensionsStringSize = 0;
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
{
print_error( err, "Unable to get extensions string size for embedded device" );
test_finish();
return -1;
}
char *extensions_string = (char*) malloc(extensionsStringSize);
if( NULL == extensions_string )
{
print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
test_finish();
return -1;
}
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
{
print_error( err, "Unable to get extensions string for embedded device" );
test_finish();
return -1;
}
if( extensions_string[extensionsStringSize-1] != '\0' )
{
log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
test_finish();
return -1;
}
if( NULL == strstr( extensions_string, "cles_khr_int64" ))
gHasLong = 0;
free(extensions_string);
}
if( getenv( "OPENCL_1_0_DEVICE" ) )
{
char c_version[1024];
gIsOpenCL_1_0_Device = 1;
memset( c_version, 0, sizeof( c_version ) );
if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
{
log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
test_finish();
return -1;
}
if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
{
gIsOpenCL_C_1_0_Device = 1;
log_info( "Device is a OpenCL C 1.0 device\n" );
}
else
log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
}
cl_uint device_address_bits = 0;
if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
{
print_error( err, "Unable to obtain device address bits" );
test_finish();
return -1;
}
if( device_address_bits )
log_info( "sizeof( void*) = %d (device)\n", device_address_bits/8 );
else
{
log_error("Invalid device address bit size returned by device.\n");
test_finish();
return -1;
}
/* If we have a device checking function, run it */
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
{
test_finish();
return -1;
}
if (num_elements <= 0)
num_elements = DEFAULT_NUM_ELEMENTS;
// On most platforms which support denorm, default is FTZ off. However,
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
// This creates issues in result verification. Since spec allows the implementation to either flush or
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
// where reference is being computed to make sure we get non-flushed reference result. If implementation
// returns flushed result, we correctly take care of that in verification code.
#if defined(__APPLE__) && defined(__arm__)
FPU_mode_type oldMode;
DisableFTZ( &oldMode );
#endif
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
#if defined(__APPLE__) && defined(__arm__)
// Restore the old FP mode before leaving.
RestoreFPState( &oldMode );
#endif
return error;
}
static int find_wildcard_matching_functions( const char *fnNames[], unsigned char fnsToCall[], unsigned int num_fns,
const char *wildcard )
{
int found_tests = 0;
size_t wildcard_length = strlen( wildcard ) - 1; /* -1 for the asterisk */
for( unsigned int fnIndex = 0; fnIndex < num_fns; fnIndex++ )
{
if( strncmp( fnNames[ fnIndex ], wildcard, wildcard_length ) == 0 )
{
if( fnsToCall[ fnIndex ] )
{
log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
return EXIT_FAILURE;
}
fnsToCall[ fnIndex ] = 1;
found_tests = 1;
}
}
if( !found_tests )
{
log_error( "ERROR: The wildcard '%s' did not match any test names.\n", wildcard );
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
static int find_argument_matching_function( const char *fnNames[], unsigned char *fnsToCall, unsigned int num_fns,
const char *argument )
{
unsigned int fnIndex;
for( fnIndex = 0; fnIndex < num_fns; fnIndex++ )
{
if( strcmp( argument, fnNames[ fnIndex ] ) == 0 )
{
if( fnsToCall[ fnIndex ] )
{
log_error( "ERROR: Test '%s' has already been selected.\n", fnNames[ fnIndex ] );
return EXIT_FAILURE;
}
else
{
fnsToCall[ fnIndex ] = 1;
break;
}
}
}
if( fnIndex == num_fns )
{
log_error( "ERROR: The argument '%s' did not match any test names.\n", argument );
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
basefn fnList[], const char *fnNames[], int forceNoContextCreation,
cl_command_queue_properties queueProps, int num_elements )
{
int ret = EXIT_SUCCESS;
unsigned char *fnsToCall = ( unsigned char* ) calloc( num_fns, 1 );
if( argc == 1 )
{
/* No actual arguments, all tests will be run. */
memset( fnsToCall, 1, num_fns );
}
else
{
for( int argIndex = 1; argIndex < argc; argIndex++ )
{
if( strchr( argv[ argIndex ], '*' ) != NULL )
{
ret = find_wildcard_matching_functions( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
}
else
{
if( strcmp( argv[ argIndex ], "all" ) == 0 )
{
memset( fnsToCall, 1, num_fns );
break;
}
else
{
ret = find_argument_matching_function( fnNames, fnsToCall, num_fns, argv[ argIndex ] );
}
}
if( ret == EXIT_FAILURE )
{
break;
}
}
}
if( ret == EXIT_SUCCESS )
{
ret = callTestFunctions( fnList, fnNames, fnsToCall, num_fns, device, forceNoContextCreation, num_elements, queueProps );
if( gTestsFailed == 0 )
{
if( gTestsPassed > 1 )
{
log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
}
else if( gTestsPassed > 0 )
{
log_info("PASSED test.\n");
}
}
else if( gTestsFailed > 0 )
{
if( gTestsFailed+gTestsPassed > 1 )
{
log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
}
else
{
log_error("FAILED test.\n");
}
}
}
test_finish();
free( fnsToCall );
return ret;
}
int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps )
{
int numErrors = 0;
for( int i = 0; i < numFunctions; ++i )
{
if( functionsToCall[ i ] )
{
/* Skip any unimplemented tests. */
if( functionList[ i ] != NULL )
{
numErrors += callSingleTestFunction( functionList[ i ], functionNames[ i ], deviceToUse,
forceNoContextCreation, numElementsToUse, queueProps );
}
else
{
log_info( "%s test currently not implemented\n", functionNames[ i ] );
}
}
}
return numErrors;
}
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
{
log_info( "%s\n", errinfo );
}
// Actual function execution
int callSingleTestFunction( basefn functionToCall, const char *functionName,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps )
{
int numErrors = 0, ret;
cl_int error;
cl_context context = NULL;
cl_command_queue queue = NULL;
/* Create a context to work with, unless we're told not to */
if( !forceNoContextCreation )
{
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
if (!context)
{
print_error( error, "Unable to create testing context" );
return 1;
}
queue = clCreateCommandQueue( context, deviceToUse, queueProps, &error );
if( queue == NULL )
{
print_error( error, "Unable to create testing command queue" );
return 1;
}
}
/* Run the test and print the result */
log_info( "%s...\n", functionName );
fflush( stdout );
ret = functionToCall( deviceToUse, context, queue, numElementsToUse); //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
if( ret == TEST_NOT_IMPLEMENTED )
{
/* Tests can also let us know they're not implemented yet */
log_info("%s test currently not implemented\n\n", functionName);
}
else
{
/* Print result */
if( ret == 0 ) {
log_info( "%s passed\n", functionName );
gTestsPassed++;
}
else
{
numErrors++;
log_error( "%s FAILED\n", functionName );
gTestsFailed++;
}
}
/* Release the context */
if( !forceNoContextCreation )
{
int error = clFinish(queue);
if (error) {
log_error("clFinish failed: %d", error);
numErrors++;
}
clReleaseCommandQueue( queue );
clReleaseContext( context );
}
return numErrors;
}
void checkDeviceTypeOverride( cl_device_type *inOutType )
{
/* Check if we are forced to CPU mode */
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
if( force_cpu != NULL )
{
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
*inOutType = CL_DEVICE_TYPE_GPU;
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
*inOutType = CL_DEVICE_TYPE_CPU;
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
*inOutType = CL_DEVICE_TYPE_ACCELERATOR;
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
*inOutType = CL_DEVICE_TYPE_DEFAULT;
}
switch( *inOutType )
{
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
default: break;
}
log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
#if defined( __APPLE__ )
{
// report on any unusual library search path indirection
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
if( libSearchPath )
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
// report on any unusual framework search path indirection
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
if( libSearchPath )
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
}
#endif
}
#if ! defined( __APPLE__ )
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
{
uint32_t pat = ((uint32_t*) src_pattern)[0];
size_t count = bytes / 4;
size_t i;
uint32_t *d = (uint32_t*)dest;
for( i = 0; i < count; i++ )
d[i] = pat;
d += i;
bytes &= 3;
if( bytes )
memcpy( d, src_pattern, bytes );
}
#endif
extern cl_device_type GetDeviceType( cl_device_id d )
{
cl_device_type result = -1;
cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
if( CL_SUCCESS != err )
log_error( "ERROR: Unable to get device type for device %p\n", d );
return result;
}
cl_device_id GetOpposingDevice( cl_device_id device )
{
cl_int error;
cl_device_id *otherDevices;
cl_uint actualCount;
cl_platform_id plat;
// Get the platform of the device to use for getting a list of devices
error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get device's platform" );
return NULL;
}
// Get a list of all devices
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get list of devices size" );
return NULL;
}
otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get list of devices" );
free(otherDevices);
return NULL;
}
if( actualCount == 1 )
{
free(otherDevices);
return device; // NULL means error, returning self means we couldn't find another one
}
// Loop and just find one that isn't the one we were given
cl_uint i;
for( i = 0; i < actualCount; i++ )
{
if( otherDevices[ i ] != device )
{
cl_device_type newType;
error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
if( error != CL_SUCCESS )
{
print_error( error, "Unable to get device type for other device" );
free(otherDevices);
return NULL;
}
cl_device_id result = otherDevices[ i ];
free(otherDevices);
return result;
}
}
// Should never get here
free(otherDevices);
return NULL;
}

View File

@@ -0,0 +1,100 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testHarness_h
#define _testHarness_h
#include "threadTesting.h"
#include "clImageHelper.h"
#ifdef __cplusplus
extern "C" {
#endif
extern cl_uint gReSeed;
extern cl_uint gRandomSeed;
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
// setup work, and then call each function in turn as dictatated by the passed arguments.
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
typedef int (*DeviceCheckFn)( cl_device_id device );
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
basefn fnList[], const char *fnNames[],
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
basefn *fnList, const char *fnNames[],
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
// Call this function if you need to do all the setup work yourself, and just need the function list called/
// managed.
// functionList is the actual array of functions
// functionNames is an array of strings representing the name of each function
// functionsToCall is an array of integers (treated as bools) which tell which function is to be called,
// each element at index i, corresponds to the element in functionList at index i
// numFunctions is the number of elements in the arrays
// contextProps are used to create a testing context for each test
// deviceToUse and numElementsToUse are all just passed to each test function
extern int callTestFunctions( basefn functionList[], const char *functionNames[], unsigned char functionsToCall[],
int numFunctions, cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps );
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
cl_device_id deviceToUse, int forceNoContextCreation,
int numElementsToUse, cl_command_queue_properties queueProps );
///// Miscellaneous steps
// Given a pre-existing device type choice, check the environment for an override, then print what
// choice was made and how (and return the overridden choice, if there is one)
extern void checkDeviceTypeOverride( cl_device_type *inOutType );
// standard callback function for context pfn_notify
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
extern cl_device_type GetDeviceType( cl_device_id );
// Given a device (most likely passed in by the harness, but not required), will attempt to find
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
// is the only device available, the SAME device is returned, so check!
extern cl_device_id GetOpposingDevice( cl_device_id device );
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
#if ! defined( __APPLE__ )
void memset_pattern4(void *, const void *, size_t);
#endif
#ifdef __cplusplus
}
#endif
#endif // _testHarness_h

View File

@@ -0,0 +1,51 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "mt19937.h"
#include <stdio.h>
int main( void )
{
MTdata d = init_genrand(42);
int i;
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
int errcount = 0;
for( i = 0; i < 65536; i++ )
{
cl_uint u = genrand_int32( d );
if( 0 == (i & 4095) )
{
if( u != reference[i>>12] )
{
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
errcount++;
}
}
}
free_mtdata(d);
if( errcount )
printf("mt19937 test failed.\n");
else
printf("mt19937 test passed.\n");
return 0;
}

View File

@@ -0,0 +1,106 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "threadTesting.h"
#include "errorHelpers.h"
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <stdbool.h>
#endif
#include <math.h>
#include <string.h>
#if !defined(_WIN32)
#include <pthread.h>
#endif
#if 0 // Disabed for now
typedef struct
{
basefn mFunction;
cl_device_id mDevice;
cl_context mContext;
int mNumElements;
} TestFnArgs;
////////////////////////////////////////////////////////////////////////////////
// Thread-based testing. Spawns a new thread to run the given test function,
// then waits for it to complete. The entire idea is that, if the thread crashes,
// we can catch it and report it as a failure instead of crashing the entire suite
////////////////////////////////////////////////////////////////////////////////
void *test_thread_wrapper( void *data )
{
TestFnArgs *args;
int retVal;
cl_context context;
args = (TestFnArgs *)data;
/* Create a new context to use (contexts can't cross threads) */
context = clCreateContext(NULL, args->mDeviceGroup);
if( context == NULL )
{
log_error("clCreateContext failed for new thread\n");
return (void *)(-1);
}
/* Call function */
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
clReleaseContext( context );
return (void *)retVal;
}
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
{
int error;
pthread_t threadHdl;
void *retVal;
TestFnArgs args;
args.mFunction = fnToTest;
args.mDeviceGroup = deviceGroup;
args.mDevice = device;
args.mContext = context;
args.mNumElements = numElements;
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
if( error != 0 )
{
log_error( "ERROR: Unable to create thread for testing!\n" );
return -1;
}
/* Thread has been started, now just wait for it to complete (or crash) */
error = pthread_join( threadHdl, &retVal );
if( error != 0 )
{
log_error( "ERROR: Unable to join testing thread!\n" );
return -1;
}
return (int)((intptr_t)retVal);
}
#endif

View File

@@ -0,0 +1,32 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _threadTesting_h
#define _threadTesting_h
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#define TEST_NOT_IMPLEMENTED -99
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
#endif // _threadTesting_h

View File

@@ -0,0 +1,481 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "typeWrappers.h"
#include "kernelHelpers.h"
#include "errorHelpers.h"
#include <stdlib.h>
#include "clImageHelper.h"
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
#if defined( __APPLE__ )
#define kPageSize 4096
#include <sys/mman.h>
#include <stdlib.h>
#elif defined(__linux__)
#include <unistd.h>
#define kPageSize (getpagesize())
#endif
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
} else {
backingStore = NULL;
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width, height );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = height * rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
for( row = 0; row < height; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
} else {
backingStore = NULL;
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
{
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
backingStoreSize = height * depth * rowStride + 8 * rowStride;
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
for( row = 0; row < height*depth; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
} else {
backingStore = NULL;
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
}
#else
backingStore = NULL;
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
#endif
return error;
}
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
{
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
if( errcode_ret != NULL )
*errcode_ret = err;
}
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
{
cl_int error;
#if defined( __APPLE__ )
int protect_pages = 1;
cl_device_id devices[16];
size_t number_of_devices;
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
number_of_devices /= sizeof(cl_device_id);
for (int i=0; i<(int)number_of_devices; i++) {
cl_device_type type;
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
if (type == CL_DEVICE_TYPE_GPU) {
protect_pages = 0;
break;
}
}
if (protect_pages) {
size_t pixelBytes = get_pixel_bytes(fmt);
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
size_t rowStride = rowBytes + kPageSize;
// create backing store
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
backingStoreSize = rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE2D:
backingStoreSize = height * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE3D:
backingStoreSize = height * depth * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
backingStoreSize = arraySize * rowStride + 8 * rowStride;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
break;
}
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
// add guard pages
size_t row;
char *p = (char*) backingStore;
char *imagePtr = (char*) backingStore + 4 * rowStride;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
p += rowBytes;
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
for( row = 0; row < sz; row++ )
{
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
}
p -= rowBytes;
for( row = 0; row < 4; row++ )
{
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
}
if( getenv( "CL_ALIGN_RIGHT" ) )
{
static int spewEnv = 1;
if(spewEnv)
{
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
spewEnv = 0;
}
imagePtr += rowBytes - pixelBytes * width;
}
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
break;
}
} else {
backingStore = NULL;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
break;
}
}
#else
backingStore = NULL;
switch (imageType)
{
case CL_MEM_OBJECT_IMAGE1D:
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D:
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE3D:
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
break;
}
#endif
return error;
}
/*******
* clProtectedArray implementation
*******/
clProtectedArray::clProtectedArray()
{
mBuffer = mValidBuffer = NULL;
}
clProtectedArray::clProtectedArray( size_t sizeInBytes )
{
mBuffer = mValidBuffer = NULL;
Allocate( sizeInBytes );
}
clProtectedArray::~clProtectedArray()
{
if( mBuffer != NULL ) {
#if defined( __APPLE__ )
int error = munmap( mBuffer, mRealSize );
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
#else
free( mBuffer );
#endif
}
}
void clProtectedArray::Allocate( size_t sizeInBytes )
{
#if defined( __APPLE__ )
// Allocate enough space to: round up our actual allocation to an even number of pages
// and allocate two pages on either side
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
mRealSize = mRoundedSize + kPageSize * 2;
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
mValidBuffer = mBuffer + kPageSize;
// Protect guard area from access
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
#else
mRoundedSize = mRealSize = sizeInBytes;
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
#endif
}

View File

@@ -0,0 +1,333 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _typeWrappers_h
#define _typeWrappers_h
#include <stdio.h>
#include <stdlib.h>
#if !defined(_WIN32)
#include <sys/mman.h>
#endif
#include "compat.h"
#include <stdio.h>
#include "mt19937.h"
#include "errorHelpers.h"
#include "kernelHelpers.h"
extern "C" cl_uint gReSeed;
extern "C" cl_uint gRandomSeed;
/* cl_context wrapper */
class clContextWrapper
{
public:
clContextWrapper() { mContext = NULL; }
clContextWrapper( cl_context program ) { mContext = program; }
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
operator cl_context() { return mContext; }
cl_context * operator&() { return &mContext; }
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
protected:
cl_context mContext;
};
/* cl_program wrapper */
class clProgramWrapper
{
public:
clProgramWrapper() { mProgram = NULL; }
clProgramWrapper( cl_program program ) { mProgram = program; }
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
operator cl_program() { return mProgram; }
cl_program * operator&() { return &mProgram; }
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
protected:
cl_program mProgram;
};
/* cl_kernel wrapper */
class clKernelWrapper
{
public:
clKernelWrapper() { mKernel = NULL; }
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
operator cl_kernel() { return mKernel; }
cl_kernel * operator&() { return &mKernel; }
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
protected:
cl_kernel mKernel;
};
/* cl_mem (stream) wrapper */
class clMemWrapper
{
public:
clMemWrapper() { mMem = NULL; }
clMemWrapper( cl_mem mem ) { mMem = mem; }
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
operator cl_mem() { return mMem; }
cl_mem * operator&() { return &mMem; }
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
protected:
cl_mem mMem;
};
class clProtectedImage
{
public:
clProtectedImage() { image = NULL; backingStore = NULL; }
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
~clProtectedImage()
{
if( image != NULL )
clReleaseMemObject( image );
#if defined( __APPLE__ )
if(backingStore)
munmap(backingStore, backingStoreSize);
#endif
}
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
operator cl_mem() { return image; }
cl_mem * operator&() { return &image; }
bool operator==( const cl_mem &rhs ) { return image == rhs; }
protected:
void *backingStore;
size_t backingStoreSize;
cl_mem image;
};
/* cl_command_queue wrapper */
class clCommandQueueWrapper
{
public:
clCommandQueueWrapper() { mMem = NULL; }
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
operator cl_command_queue() { return mMem; }
cl_command_queue * operator&() { return &mMem; }
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
protected:
cl_command_queue mMem;
};
/* cl_sampler wrapper */
class clSamplerWrapper
{
public:
clSamplerWrapper() { mMem = NULL; }
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
operator cl_sampler() { return mMem; }
cl_sampler * operator&() { return &mMem; }
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
protected:
cl_sampler mMem;
};
/* cl_event wrapper */
class clEventWrapper
{
public:
clEventWrapper() { mMem = NULL; }
clEventWrapper( cl_event mem ) { mMem = mem; }
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
operator cl_event() { return mMem; }
cl_event * operator&() { return &mMem; }
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
protected:
cl_event mMem;
};
/* Generic protected memory buffer, for verifying access within bounds */
class clProtectedArray
{
public:
clProtectedArray();
clProtectedArray( size_t sizeInBytes );
virtual ~clProtectedArray();
void Allocate( size_t sizeInBytes );
operator void *() { return (void *)mValidBuffer; }
operator const void *() const { return (const void *)mValidBuffer; }
protected:
char * mBuffer;
char * mValidBuffer;
size_t mRealSize, mRoundedSize;
};
class RandomSeed
{
public:
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
~RandomSeed()
{
if( gReSeed )
gRandomSeed = genrand_int32( mtData );
free_mtdata(mtData);
}
operator MTdata () {return mtData;}
protected:
MTdata mtData;
};
template <typename T> class BufferOwningPtr
{
BufferOwningPtr(BufferOwningPtr const &); // do not implement
void operator=(BufferOwningPtr const &); // do not implement
void *ptr;
void *map;
size_t mapsize; // Bytes allocated total, pointed to by map.
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
bool aligned;
public:
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
explicit BufferOwningPtr(void *p, void *m, size_t s)
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
{
#if ! defined( __APPLE__ )
if(m)
{
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
abort();
}
#endif
}
~BufferOwningPtr() {
if (map) {
#if defined( __APPLE__ )
int error = munmap(map, mapsize);
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
#endif
} else {
if ( aligned )
{
align_free(ptr);
}
else
{
free(ptr);
}
}
}
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
if (map){
#if defined( __APPLE__ )
int error = munmap(map, mapsize);
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
#else
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
abort();
#endif
} else {
if ( aligned )
{
align_free(ptr);
}
else
{
free(ptr);
}
}
ptr = p;
map = m;
mapsize = mapsize_;
allocsize = allocsize_;
aligned = aligned_;
#if ! defined( __APPLE__ )
if(m)
{
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
abort();
}
#endif
}
operator T*() { return (T*)ptr; }
size_t getSize() const { return allocsize; };
};
#endif // _typeWrappers_h

View File

@@ -0,0 +1,2 @@
add_subdirectory(api)
add_subdirectory(basic)

View File

@@ -0,0 +1,34 @@
set(MODULE_NAME COMPATIBILITY_API)
set(${MODULE_NAME}_SOURCES
main.c
test_bool.c
test_retain.cpp
test_retain_program.c
test_queries.cpp
test_create_kernels.c
test_kernels.c
test_api_min_max.c
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_binary.cpp
test_native_kernel.cpp
test_mem_objects.cpp
test_create_context_from_type.cpp
test_device_min_data_type_align_size_alignment.cpp
test_platform.cpp
test_mem_object_info.cpp
test_null_buffer_arg.c
test_kernel_arg_info.c
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/conversions.c
../../test_common/harness/mt19937.c
../../test_common/harness/msvc9.c
../../test_common/harness/imageHelpers.cpp
)
include(../../../CMakeCommon.txt)

View File

@@ -0,0 +1,27 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_api
: main.c
test_api_min_max.c
test_binary.cpp
test_create_kernels.c
test_create_context_from_type.cpp
test_kernel_arg_changes.cpp
test_kernel_arg_multi_setup.cpp
test_kernels.c
test_native_kernel.cpp
test_queries.cpp
test_retain_program.c
test_platform.cpp
;
install dist
: test_api #test.lst
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
<variant>release:<location>$(DIST)/release/tests/test_conformance/api
;

View File

@@ -0,0 +1,61 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_retain_program.c \
test_queries.cpp \
test_create_kernels.c \
test_kernels.c \
test_kernel_arg_info.c \
test_api_min_max.c \
test_kernel_arg_changes.cpp \
test_kernel_arg_multi_setup.cpp \
test_binary.cpp \
test_native_kernel.cpp \
test_create_context_from_type.cpp \
test_platform.cpp \
test_retain.cpp \
test_device_min_data_type_align_size_alignment.cpp \
test_mem_objects.cpp \
test_bool.c \
test_null_buffer_arg.c \
test_mem_object_info.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c
DEFINES = DONT_TEST_GARBAGE_POINTERS
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
HEADERS =
TARGET = test_api
INCLUDE =
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,214 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include "procs.h"
#include "../../test_common/harness/testHarness.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables:
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
test_get_platform_info,
test_get_sampler_info,
test_get_command_queue_info,
test_get_context_info,
test_get_device_info,
test_enqueue_task,
test_binary_get,
test_program_binary_create,
test_kernel_required_group_size,
test_release_kernel_order,
test_release_during_execute,
test_load_single_kernel,
test_load_two_kernels,
test_load_two_kernels_in_one,
test_load_two_kernels_manually,
test_get_program_info_kernel_names,
test_get_kernel_arg_info,
test_create_kernels_in_program,
test_get_kernel_info,
test_execute_kernel_local_sizes,
test_set_kernel_arg_by_index,
test_set_kernel_arg_constant,
test_set_kernel_arg_struct_array,
test_kernel_global_constant,
test_min_max_thread_dimensions,
test_min_max_work_items_sizes,
test_min_max_work_group_size,
test_min_max_read_image_args,
test_min_max_write_image_args,
test_min_max_mem_alloc_size,
test_min_max_image_2d_width,
test_min_max_image_2d_height,
test_min_max_image_3d_width,
test_min_max_image_3d_height,
test_min_max_image_3d_depth,
test_min_max_image_array_size,
test_min_max_image_buffer_size,
test_min_max_parameter_size,
test_min_max_samplers,
test_min_max_constant_buffer_size,
test_min_max_constant_args,
test_min_max_compute_units,
test_min_max_address_bits,
test_min_max_single_fp_config,
test_min_max_double_fp_config,
test_min_max_local_mem_size,
test_min_max_kernel_preferred_work_group_size_multiple,
test_min_max_execution_capabilities,
test_min_max_queue_properties,
test_min_max_device_version,
test_min_max_language_version,
test_kernel_arg_changes,
test_kernel_arg_multi_setup_random,
test_native_kernel,
test_create_context_from_type,
test_platform_extensions,
test_get_platform_ids,
test_for_bool_type,
test_repeated_setup_cleanup,
test_retain_queue_single,
test_retain_queue_multiple,
test_retain_mem_object_single,
test_retain_mem_object_multiple,
test_min_data_type_align_size_alignment,
test_mem_object_destructor_callback,
test_null_buffer_arg,
test_get_buffer_info,
test_get_image2d_info,
test_get_image3d_info,
test_get_image1d_info,
test_get_image1d_array_info,
test_get_image2d_array_info,
};
const char *basefn_names[] = {
"get_platform_info",
"get_sampler_info",
"get_command_queue_info",
"get_context_info",
"get_device_info",
"enqueue_task",
"binary_get",
"binary_create",
"kernel_required_group_size",
"release_kernel_order",
"release_during_execute",
"load_single_kernel",
"load_two_kernels",
"load_two_kernels_in_one",
"load_two_kernels_manually",
"get_program_info_kernel_names",
"get_kernel_arg_info",
"create_kernels_in_program",
"get_kernel_info",
"execute_kernel_local_sizes",
"set_kernel_arg_by_index",
"set_kernel_arg_constant",
"set_kernel_arg_struct_array",
"kernel_global_constant",
"min_max_thread_dimensions",
"min_max_work_items_sizes",
"min_max_work_group_size",
"min_max_read_image_args",
"min_max_write_image_args",
"min_max_mem_alloc_size",
"min_max_image_2d_width",
"min_max_image_2d_height",
"min_max_image_3d_width",
"min_max_image_3d_height",
"min_max_image_3d_depth",
"min_max_image_array_size",
"min_max_image_buffer_size",
"min_max_parameter_size",
"min_max_samplers",
"min_max_constant_buffer_size",
"min_max_constant_args",
"min_max_compute_units",
"min_max_address_bits",
"min_max_single_fp_config",
"min_max_double_fp_config",
"min_max_local_mem_size",
"min_max_kernel_preferred_work_group_size_multiple",
"min_max_execution_capabilities",
"min_max_queue_properties",
"min_max_device_version",
"min_max_language_version",
"kernel_arg_changes",
"kernel_arg_multi_setup_random",
"native_kernel",
"create_context_from_type",
"platform_extensions",
"get_platform_ids",
"bool_type",
"repeated_setup_cleanup",
"retain_queue_single",
"retain_queue_multiple",
"retain_mem_object_single",
"retain_mem_object_multiple",
"min_data_type_align_size_alignment",
"mem_object_destructor_callback",
"null_buffer_arg",
"get_buffer_info",
"get_image2d_info",
"get_image3d_info",
"get_image1d_info",
"get_image1d_array_info",
"get_image2d_array_info",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
}

View File

@@ -0,0 +1,108 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/clImageHelper.h"
#include "../../test_common/harness/imageHelpers.h"
extern float calculate_ulperror(float a, float b);
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
extern int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );

View File

@@ -0,0 +1,31 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef _testBase_h
#define _testBase_h
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#endif // _testBase_h

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,226 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static const char *sample_binary_kernel_source[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid] + 1;\n"
"\n"
"}\n" };
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
size_t binarySize;
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
test_error( error, "Unable to create program from source" );
// Build so we have a binary to get
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary;
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
unsigned char *buffers[ 1 ] = { binary };
// Do another sanity check here first
size_t size;
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
test_error( error, "Unable to get expected size of binaries array" );
if( size != sizeof( buffers ) )
{
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
free(binary);
return -1;
}
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
// No way to verify the binary is correct, so just be good with that
free(binary);
return 0;
}
int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
/* To test this in a self-contained fashion, we have to create a program with
source, then get the binary, then use that binary to reload the program, and then verify */
int error;
clProgramWrapper program, program_from_binary;
size_t binarySize;
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
test_error( error, "Unable to create program from source" );
// Build so we have a binary to get
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
// Get the size of the resulting binary (only one device)
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
test_error( error, "Unable to get binary size" );
// Sanity check
if( binarySize == 0 )
{
log_error( "ERROR: Binary size of program is zero\n" );
return -1;
}
// Create a buffer and get the actual binary
unsigned char *binary = (unsigned char*)malloc(binarySize);
const unsigned char *buffers[ 1 ] = { binary };
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary" );
cl_int loadErrors[ 1 ];
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
test_error( error, "Unable to load valid program binary" );
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program" );
// Get the size of the binary built from the first binary
size_t binary2Size;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARY_SIZES, sizeof( binary2Size ), &binary2Size, NULL );
test_error( error, "Unable to get size for the binary program" );
// Now get the binary one more time and verify it loaded the right binary
unsigned char *binary2 = (unsigned char*)malloc(binary2Size);
buffers[ 0 ] = binary2;
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary second time" );
// Try again, this time without passing the status ptr in, to make sure we still
// get a valid binary
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binary2Size, buffers, NULL, &error );
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build binary program created without binary_status" );
// Get the size of the binary created without passing binary_status
size_t binary3Size;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARY_SIZES, sizeof( binary3Size ), &binary3Size, NULL );
test_error( error, "Unable to get size for the binary program created without binary_status" );
// Now get the binary one more time
unsigned char *binary3 = (unsigned char*)malloc(binary3Size);
buffers[ 0 ] = binary3;
error = clGetProgramInfo( programWithoutStatus, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
test_error( error, "Unable to get program binary from the program created without binary_status" );
// We no longer need these intermediate binaries
free(binary);
free(binary2);
free(binary3);
// Now execute them both to see that they both do the same thing.
clMemWrapper in, out, out_binary;
clKernelWrapper kernel, kernel_binary;
cl_int *out_data, *out_data_binary;
cl_float *in_data;
size_t size_to_run = 1000;
// Allocate some data
in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
memset(out_data, 0, sizeof(cl_int)*size_to_run);
memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
for (size_t i=0; i<size_to_run; i++)
in_data[i] = (cl_float)i;
// Create the buffers
in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
test_error( error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
test_error( error, "clCreateBuffer failed");
out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
test_error( error, "clCreateBuffer failed");
// Create the kernels
kernel = clCreateKernel(program, "sample_test", &error);
test_error( error, "clCreateKernel failed");
kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
test_error( error, "clCreateKernel from binary failed");
// Set the arguments
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
test_error( error, "clSetKernelArg failed");
// Execute the kernels
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
// Finish up
error = clFinish(queue);
test_error( error, "clFinish failed");
// Get the results back
error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
// Compare the results
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
{
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
return -1;
}
// All done!
free(in_data);
free(out_data);
free(out_data_binary);
return 0;
}

View File

@@ -0,0 +1,52 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
const char *kernel_with_bool[] = {
"__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
" if(myBool)\n"
" {\n"
" dst[tid] = (int)src[tid];\n"
" }\n"
" else\n"
" {\n"
" dst[tid] = 0;\n"
" }\n"
"\n"
"}\n"
};
int test_for_bool_type(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int err = create_single_kernel_helper(context,
&program,
&kernel,
1, kernel_with_bool,
"kernel_with_bool" );
return err;
}

View File

@@ -0,0 +1,130 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
clContextWrapper context_to_test;
clCommandQueueWrapper queue_to_test;
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
int i;
RandomSeed seed( gRandomSeed );
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
cl_device_type type;
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
cl_platform_id platform;
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
cl_context_properties properties[3] = {
(cl_context_properties)CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
NULL
};
context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
test_error(error, "clCreateContextFromType failed");
if (context_to_test == NULL) {
log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
return -1;
}
queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
test_error(error, "clCreateCommandQueue failed");
if (queue_to_test == NULL) {
log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,643 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
const char *sample_single_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
size_t sample_single_kernel_lengths[1];
const char *sample_two_kernels[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernel_lengths[2];
const char *sample_two_kernels_in_1[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
size_t sample_two_kernels_in_1_lengths[1];
const char *repeate_test_kernel =
"__kernel void test_kernel(__global int *src, __global int *dst)\n"
"{\n"
" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
"}\n";
int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
cl_program testProgram;
clKernelWrapper kernel;
cl_context testContext;
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
size_t realSize;
/* Preprocess: calc the length of each source file line */
sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
/* Create a program */
program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create single kernel program" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build single kernel program" );
error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
test_error( error, "Unable to create single kernel program" );
/* Check program and context pointers */
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
test_error( error, "Unable to get kernel's program" );
if( (cl_program)testProgram != (cl_program)program )
{
log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
return -1;
}
if( realSize != sizeof( cl_program ) )
{
log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
test_error( error, "Unable to get kernel's context" );
if( (cl_context)testContext != (cl_context)context )
{
log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
return -1;
}
if( realSize != sizeof( cl_context ) )
{
log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
return -1;
}
/* Test arg count */
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
test_error( error, "Unable to get size of arg count info from kernel" );
if( realSize != sizeof( testArgCount ) )
{
log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: Kernel arg count does not match!\n" );
return -1;
}
/* Test function name */
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
test_error( error, "Unable to get name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel names do not match!\n" );
return -1;
}
if( realSize != strlen( (char *)testName ) + 1 )
{
log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[ 512 ];
cl_uint testArgCount;
/* Preprocess: calc the length of each source file line */
sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
/* Now create a test program */
program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program!" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build dual kernel program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from second kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel[2];
unsigned int numKernels;
cl_char testName[512];
cl_uint testArgCount;
/* Preprocess: calc the length of each source file line */
sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build dual kernel program" );
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
test_error( error, "Unable to create dual kernel program" );
if( numKernels != 2 )
{
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
return -1;
}
/* Check first kernel */
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
int found_kernel1 = 0, found_kernel2 = 0;
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
test_error( error, "Unable to get arg count from kernel" );
if( testArgCount != 2 )
{
log_error( "ERROR: wrong # of args for kernel\n" );
return -1;
}
/* Check second kernel */
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
test_error( error, "Unable to get function name from kernel" );
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
if (found_kernel1) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel1 = 1;
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
if (found_kernel2) {
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
return -1;
}
found_kernel2 = 1;
} else {
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
return -1;
}
if( !found_kernel1 || !found_kernel2 )
{
log_error( "ERROR: Kernel names do not match.\n" );
if (!found_kernel1)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
if (!found_kernel2)
log_error("Kernel \"%s\" not returned.\n", "sample_test");
return -1;
}
/* All done */
return 0;
}
int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build kernel program" );
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test2", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel1, kernel2;
int error;
size_t i;
/* Now create a test program */
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
if( program == NULL || error != CL_SUCCESS )
{
print_error( error, "Unable to create dual kernel program" );
return -1;
}
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build kernel program" );
/* Lookup the number of kernels in the program. */
size_t total_kernels = 0;
error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
test_error( error, "Unable to get program info num kernels");
if (total_kernels != 2)
{
print_error( error, "Program did not contain two kernels" );
return -1;
}
/* Lookup the kernel names. */
const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
size_t kernel_names_len = 0;
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
test_error( error, "Unable to get length of kernel names list." );
if (kernel_names_len != (strlen(actual_names[0])+1))
{
print_error( error, "Kernel names length did not match");
return -1;
}
const size_t len = (kernel_names_len+1)*sizeof(char);
char* kernel_names = (char*)malloc(len);
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
test_error( error, "Unable to get kernel names list." );
/* Check to see if the kernel name array is null terminated. */
if (kernel_names[kernel_names_len-1] != '\0')
{
free(kernel_names);
print_error( error, "Kernel name list was not null terminated");
return -1;
}
/* Check to see if the correct kernel name string was returned. */
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
if( 0 == strcmp(actual_names[i],kernel_names) )
break;
if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
{
free(kernel_names);
log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
log_error( "\t\t\"%s\"\n", actual_names[0] );
return -1;
}
free(kernel_names);
/* Try manually creating kernels (backwards just in case) */
kernel1 = clCreateKernel( program, "sample_test", &error );
if( kernel1 == NULL || error != CL_SUCCESS )
{
print_error( error, "Could not get kernel 1" );
return -1;
}
kernel2 = clCreateKernel( program, "sample_test2", &error );
if( kernel2 == NULL )
{
print_error( error, "Could not get kernel 2" );
return -1;
}
return 0;
}
static const char *single_task_kernel[] = {
"__kernel void sample_test(__global int *dst, int count)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" for( int i = 0; i < count; i++ )\n"
" dst[i] = tid + i;\n"
"\n"
"}\n" };
int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper output;
cl_int count;
if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
return -1;
// Create args
count = 100;
output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
test_error( error, "Unable to set kernel argument" );
// Run task
error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
test_error( error, "Unable to run task" );
// Read results
cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Validate
for( cl_int i = 0; i < count; i++ )
{
if( results[ i ] != i )
{
log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
free(results);
return -1;
}
}
/* All done */
free(results);
return 0;
}
#define TEST_SIZE 1000
int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_context local_context;
cl_command_queue local_queue;
cl_program local_program;
cl_kernel local_kernel;
cl_mem local_mem_in, local_mem_out;
cl_event local_event;
size_t global_dim[3];
int i, j, error;
global_dim[0] = TEST_SIZE;
global_dim[1] = 1; global_dim[2] = 1;
cl_int *inData, *outData;
cl_int status;
inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
for (i=0; i<TEST_SIZE; i++) {
inData[i] = i;
}
for (i=0; i<100; i++) {
memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
test_error( error, "clCreateContext failed");
local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
test_error( error, "clCreateCommandQueue failed");
local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
test_error( error, "clCreateProgramWithSource failed");
error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
test_error( error, "clBuildProgram failed");
local_kernel = clCreateKernel(local_program, "test_kernel", &error);
test_error( error, "clCreateKernel failed");
local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
test_error( error, "clCreateBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
test_error( error, "clSetKernelArg failed");
error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
test_error( error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
test_error( error, "clEnqueueNDRangeKernel failed");
error = clWaitForEvents(1, &local_event);
test_error( error, "clWaitForEvents failed");
error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
test_error( error, "clGetEventInfo failed");
if (status != CL_COMPLETE) {
log_error( "Kernel execution not complete: status %d.\n", status);
free(inData);
free(outData);
return -1;
}
error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
test_error( error, "clEnqueueReadBuffer failed");
clReleaseEvent(local_event);
clReleaseMemObject(local_mem_in);
clReleaseMemObject(local_mem_out);
clReleaseKernel(local_kernel);
clReleaseProgram(local_program);
clReleaseCommandQueue(local_queue);
clReleaseContext(local_context);
for (j=0; j<TEST_SIZE; j++) {
if (outData[j] != inData[j] + 1) {
log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
free(inData);
free(outData);
return -1;
}
}
}
free(inData);
free(outData);
return 0;
}

View File

@@ -0,0 +1,60 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/testHarness.h"
#ifndef _WIN32
#include <unistd.h>
#endif
int IsAPowerOfTwo( unsigned long x )
{
return 0 == (x & (x-1));
}
int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
cl_uint min_alignment;
if (gHasLong)
min_alignment = sizeof(cl_long)*16;
else
min_alignment = sizeof(cl_int)*16;
int error = 0;
cl_uint alignment;
error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
// Verify the size is large enough
if (alignment < min_alignment*8) {
log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
(gHasLong) ? "long" : "int",
(unsigned long)(min_alignment*8), (unsigned long)alignment);
return -1;
}
// Verify the size is a power of two
if (!IsAPowerOfTwo((unsigned long)alignment)) {
log_error("ERROR: alignment is not a power of two.\n");
return -1;
}
return 0;
}

View File

@@ -0,0 +1,141 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
extern "C" { extern cl_uint gRandomSeed;}
// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
// potentially overlapping) to make sure each kernel gets the right arguments
// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
const char *inspect_image_kernel_source[] = {
"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
"{\n"
" int tid = get_global_id(0), i;\n"
" for( i = 0; i < 100000; i++ ); \n"
" outDimensions[tid * 2] = get_image_width(src) * tid;\n"
" outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
"\n"
"}\n" };
#define NUM_TRIES 100
#define NUM_THREADS 2048
int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i;
clMemWrapper images[ NUM_TRIES ];
size_t sizes[ NUM_TRIES ][ 2 ];
clMemWrapper results[ NUM_TRIES ];
cl_image_format imageFormat;
size_t maxWidth, maxHeight;
size_t threads[1], localThreads[1];
cl_int resultArray[ NUM_THREADS * 2 ];
char errStr[ 128 ];
RandomSeed seed( gRandomSeed );
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Just get any ol format to test with
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
test_error( error, "Unable to obtain suitable image format to test with!" );
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get max dimensions for each of our images
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
test_error( error, "Unable to get max image dimensions for device" );
// Get the number of threads we'll be able to run
threads[0] = NUM_THREADS;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create a variety of images and output arrays
for( i = 0; i < NUM_TRIES; i++ )
{
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
if( images[i] == NULL )
{
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
return -1;
}
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
if( results[i] == NULL)
{
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
return -1;
}
}
// Start setting arguments and executing kernels
for( i = 0; i < NUM_TRIES; i++ )
{
// Set the arguments for this try
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
test_error( error, errStr );
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
test_error( error, errStr );
// Queue up execution
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
sprintf( errStr, "Unable to execute kernel try %d", i );
test_error( error, errStr );
}
// Read the results back out, one at a time, and verify
for( i = 0; i < NUM_TRIES; i++ )
{
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
sprintf( errStr, "Unable to read results for kernel try %d", i );
test_error( error, errStr );
// Verify. Each entry should be n * the (width/height) of image i
for( int j = 0; j < NUM_THREADS; j++ )
{
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
return -1;
}
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
{
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
return -1;
}
}
}
// If we got here, everything verified successfully
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,277 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/conversions.h"
// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
const char *multi_arg_kernel_source_pattern =
"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst1[tid] = src1[tid];\n"
" dst2[tid] = src2[tid];\n"
" dst3[tid] = src3[tid];\n"
"}\n";
extern cl_uint gRandomSeed;
#define MAX_ERROR_TOLERANCE 0.0005f
int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
ExplicitType vec1Type, int vec1Size,
ExplicitType vec2Type, int vec2Size,
ExplicitType vec3Type, int vec3Size, MTdata d)
{
clProgramWrapper program;
clKernelWrapper kernel;
int error, i, j;
clMemWrapper streams[ 6 ];
size_t threads[1], localThreads[1];
char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
const char *ptr;
void *initData[3], *resultData[3];
// Create the program source
sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
sprintf( programSrc, multi_arg_kernel_source_pattern,
vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
ptr = programSrc;
// Create our testing kernel
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
test_error( error, "Unable to create testing kernel" );
// Get thread dimensions
threads[0] = 1024;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size for kernel" );
// Create input streams
initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
test_error( error, "Unable to create testing stream" );
initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
test_error( error, "Unable to create testing stream" );
streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
test_error( error, "Unable to create testing stream" );
// Set the arguments
error = 0;
for( i = 0; i < 6; i++ )
error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
test_error( error, "Unable to set arguments for kernel" );
// Execute!
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to execute kernel" );
// Read results
resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
test_error( error, "Unable to read result stream" );
// Verify
char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
size_t span = get_explicit_type_size( vec1Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec1Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec1Size;
ptr2 += span * vec1Size;
}
ptr1 = (char *)initData[ 1 ];
ptr2 = (char *)resultData[ 1 ];
span = get_explicit_type_size( vec2Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec2Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec2Size;
ptr2 += span * vec2Size;
}
ptr1 = (char *)initData[ 2 ];
ptr2 = (char *)resultData[ 2 ];
span = get_explicit_type_size( vec3Type );
for( i = 0; i < (int)threads[0]; i++ )
{
for( j = 0; j < vec3Size; j++ )
{
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
{
log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return -1;
}
}
ptr1 += span * vec3Size;
ptr2 += span * vec3Size;
}
// If we got here, everything verified successfully
free( initData[ 0 ] );
free( initData[ 1 ] );
free( initData[ 2 ] );
free( resultData[ 0 ] );
free( resultData[ 1 ] );
free( resultData[ 2 ] );
return 0;
}
int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through every combination of input and output types
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
log_info( "\n" ); // for formatting
for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
{
for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
{
for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
{
log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
log_info(".");
fflush( stdout);
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
}
}
log_info( "\n" );
return 0;
}
int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
// Loop through a selection of combinations
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
int type1, type2, type3;
int size1, size2, size3;
RandomSeed seed( gRandomSeed );
num_elements = 3*3*3*4;
log_info( "Testing %d random configurations\n", num_elements );
// Loop through every combination of vector size
for( size1 = 2; size1 <= 8; size1 <<= 1 )
{
for( size2 = 2; size2 <= 8; size2 <<= 1 )
{
for( size3 = 2; size3 <= 8; size3 <<= 1 )
{
// Loop through 4 type combinations for each size combination
int n;
for (n=0; n<4; n++) {
type1 = (int)get_random_float(0,4, seed);
type2 = (int)get_random_float(0,4, seed);
type3 = (int)get_random_float(0,4, seed);
log_info( "\ttesting %s%d, %s%d, %s%d...\n",
get_explicit_type_name( types[ type1 ] ), size1,
get_explicit_type_name( types[ type2 ] ), size2,
get_explicit_type_name( types[ type3 ] ), size3 );
if( test_multi_arg_set( device, context, queue,
types[ type1 ], size1,
types[ type2 ], size2,
types[ type3 ], size3, seed ) )
return -1;
}
}
}
}
return 0;
}

View File

@@ -0,0 +1,704 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
const char *sample_single_test_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
const char *sample_struct_test_kernel[] = {
"typedef struct {\n"
"__global int *A;\n"
"__global int *B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src->A[tid] + src->B[tid];\n"
"\n"
"}\n" };
const char *sample_struct_array_test_kernel[] = {
"typedef struct {\n"
"int A;\n"
"int B;\n"
"} input_pair_t;\n"
"\n"
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid].A + src[tid].B;\n"
"\n"
"}\n" };
const char *sample_const_test_kernel[] = {
"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + src2[tid];\n"
"\n"
"}\n" };
const char *sample_const_global_test_kernel[] = {
"__constant int addFactor = 1024;\n"
"__kernel void sample_test(__global int *src1, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src1[tid] + addFactor;\n"
"\n"
"}\n" };
const char *sample_two_kernel_program[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n",
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (float)src[tid];\n"
"\n"
"}\n" };
int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program, testProgram;
cl_context testContext;
cl_kernel kernel;
cl_char name[ 512 ];
cl_uint numArgs, numInstances;
size_t paramSize;
/* Create reference */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel function name param size" );
if( paramSize != strlen( "sample_test" ) + 1 )
{
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
test_error( error, "Unable to get kernel function name" );
if( strcmp( (char *)name, "sample_test" ) != 0 )
{
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel arg count param size" );
if( paramSize != sizeof( numArgs ) )
{
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
test_error( error, "Unable to get kernel arg count" );
if( numArgs != 2 )
{
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, &paramSize );
test_error( error, "Unable to get kernel reference count param size" );
if( paramSize != sizeof( numInstances ) )
{
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
test_error( error, "Unable to get kernel reference count" );
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, &paramSize );
test_error( error, "Unable to get kernel program param size" );
if( paramSize != sizeof( testProgram ) )
{
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
test_error( error, "Unable to get kernel program" );
if( testProgram != program )
{
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
return -1;
}
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
test_error( error, "Unable to get kernel context" );
if( testContext != context )
{
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
return -1;
}
/* Release memory */
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[100];
cl_int outputData[100];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<100; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Set the arguments */
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set kernel arguments" );
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)100;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* Try again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* And again */
if( localThreads[0] > 1 )
localThreads[0] /= 2;
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
localThreads[0]--;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
/* One more time */
localThreads[0] = (unsigned int)1;
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<100; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_float inputData[10];
cl_int outputData[10];
RandomSeed seed( gRandomSeed );
int i;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Write some test data */
memset( outputData, 0, sizeof( outputData ) );
for (i=0; i<10; i++)
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
test_error( error, "Unable to set testing kernel data" );
/* Test setting the arguments by index manually */
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != (int)inputData[i])
{
log_error( "ERROR: Data did not verify on first pass!\n" );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
void *args[2];
cl_mem outStream;
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
MTdata d;
struct img_pair_t
{
cl_mem streamA;
cl_mem streamB;
} image_pair;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d);
randomTestDataB[i] = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
args[0] = &image_pair;
args[1] = outStream;
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
clReleaseMemObject( image_pair.streamA );
clReleaseMemObject( image_pair.streamB );
clReleaseMemObject( outStream );
clReleaseKernel( kernel );
clReleaseProgram( program );
return 0;
}
int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[3];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10], randomTestDataB[10];
cl_ulong maxSize;
MTdata d;
/* Verify our test buffer won't be bigger than allowed */
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( error, "Unable to get max constant buffer size" );
if( maxSize < sizeof( cl_int ) * 10 )
{
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
return -1;
}
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
test_error( error, "Creating test array failed" );
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
return -1;
}
}
return 0;
}
int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
MTdata d;
typedef struct img_pair_type
{
int A;
int B;
} image_pair_t;
image_pair_t image_pair[ 10 ];
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
image_pair[i].A = (cl_int)genrand_int32(d);
image_pair[i].A = (cl_int)genrand_int32(d);
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != image_pair[i].A + image_pair[i].B)
{
log_error( "ERROR: Data did not verify!\n" );
return -1;
}
}
return 0;
}
int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel[3];
unsigned int kernelCount;
/* Create a test program */
program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
if( program == NULL || error != CL_SUCCESS )
{
log_error( "ERROR: Unable to create test program!\n" );
return -1;
}
/* Build */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build test program" );
/* Try getting the kernel count */
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
test_error( error, "Unable to get kernel count for built program" );
if( kernelCount != 2 )
{
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
return -1;
}
/* Try actually getting the kernels */
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
test_error( error, "Unable to get kernels for built program" );
clReleaseKernel( kernel[0] );
clReleaseKernel( kernel[1] );
clReleaseProgram( program );
return 0;
}
int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[2];
size_t threads[1], localThreads[1];
cl_int outputData[10];
int i;
cl_int randomTestDataA[10];
MTdata d;
/* Create a kernel to test with */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
{
return -1;
}
/* Create some I/O streams */
d = init_genrand( gRandomSeed );
for( i = 0; i < 10; i++ )
{
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
}
free_mtdata(d); d = NULL;
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
test_error( error, "Unable to set indexed kernel arguments" );
/* Test running the kernel and verifying it */
threads[0] = (size_t)10;
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
for (i=0; i<10; i++)
{
if (outputData[i] != randomTestDataA[i] + 1024)
{
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,750 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/testHarness.h"
extern cl_uint gRandomSeed;
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get mem object " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n", \
expected, (cast)val, __FILE__, __LINE__ ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
(int)sizeof( val ), (int)size , __FILE__, __LINE__ ); \
return -1; \
}
static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
{
free( data );
}
static unsigned int
get_image_dim(MTdata *d, unsigned int mod)
{
unsigned int val = 0;
do
{
val = (unsigned int)genrand_int32(*d) % mod;
} while (val == 0);
return val;
}
int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
int error;
size_t size;
void * buffer = NULL;
clMemWrapper bufferObject;
clMemWrapper subBufferObject;
cl_mem_flags bufferFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
cl_mem_flags subBufferFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY,
CL_MEM_WRITE_ONLY,
0,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | 0,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | 0,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | 0,
};
// Get the address alignment, so we can make sure the sub-buffer test later works properly.
cl_uint addressAlignBits;
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
size_t addressAlign = addressAlignBits/8;
if ( addressAlign < 128 )
{
addressAlign = 128;
}
for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
{
//printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
void * ptr;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
}
else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
}
else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
{
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
}
else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
{
// Create a buffer object to test against.
buffer = malloc( addressAlign * 4 );
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
if ( error )
{
free( buffer );
test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
test_error( error, "Unable to set mem object destructor callback" );
}
else
{
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer to test with" );
}
// Perform buffer object queries.
cl_mem_object_type type;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
cl_mem_flags flags;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
size_t sz;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
cl_uint mapCount;
error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_uint refCount;
error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_context otherCtx;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
cl_mem origObj;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
size_t offset;
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
cl_buffer_region region;
region.origin = addressAlign;
region.size = addressAlign;
// Loop over possible sub-buffer objects to create.
for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
{
if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
continue; // Buffer must be read_write for sub-buffer to be read_write.
}
if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
}
if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
{
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
}
if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
{
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
}
if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
{
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
}
//printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
test_error( error, "Unable to create sub-buffer to test against" );
// Perform sub-buffer object queries.
cl_mem_object_type type;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
cl_mem_flags flags;
cl_mem_flags inheritedFlags = subBufferFlags[ j ];
if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
{
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
}
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
{
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
}
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
{
void * ptr;
void * offsetInBuffer = (char *)buffer + addressAlign;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
}
cl_uint mapCount;
error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_uint refCount;
error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
cl_context otherCtx;
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
clReleaseMemObject( subBufferObject );
subBufferObject = NULL;
}
clReleaseMemObject( bufferObject );
bufferObject = NULL;
}
return CL_SUCCESS;
}
int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
{
int error;
size_t size;
cl_mem_object_type type;
cl_mem_flags flags;
cl_uint mapCount;
cl_uint refCount;
size_t rowPitchMultiplier;
size_t slicePitchMultiplier;
cl_context otherCtx;
size_t offset;
size_t sz;
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
test_error( error, "Unable to get mem size" );
// The size returned is not constrained by the spec.
error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
test_error( error, "Unable to get mem object map count" );
if( size != sizeof( mapCount ) )
{
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get mem object reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
return -1;
}
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
return CL_SUCCESS;
}
int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
{
int error;
size_t size;
void * image = NULL;
cl_mem imageObject;
cl_image_desc imageInfo;
cl_mem_flags imageFlags[] = {
CL_MEM_READ_WRITE,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
cl_image_format imageFormat;
size_t pixelSize = 4;
imageFormat.image_channel_order = CL_RGBA;
imageFormat.image_channel_data_type = CL_UNORM_INT8;
imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
imageInfo.image_array_size = 0;
imageInfo.num_mip_levels = imageInfo.num_samples = 0;
imageInfo.buffer = NULL;
d = init_genrand( gRandomSeed );
for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
{
imageInfo.image_row_pitch = 0;
imageInfo.image_slice_pitch = 0;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
break;
case CL_MEM_OBJECT_IMAGE2D:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_height = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
break;
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_width = get_image_dim(&d, 127);
imageInfo.image_height = get_image_dim(&d, 127);
imageInfo.image_depth = get_image_dim(&d, 127);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
break;
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
imageInfo.image_width = get_image_dim(&d, 1023);
imageInfo.image_array_size = get_image_dim(&d, 1023);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
break;
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
imageInfo.image_width = get_image_dim(&d, 255);
imageInfo.image_height = get_image_dim(&d, 255);
imageInfo.image_array_size = get_image_dim(&d, 255);
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
break;
}
if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
void * ptr;
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
// release image object
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
}
// Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
// release image object
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
{
// Create an image object to test against.
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
{
// Create an image object to test against.
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
clReleaseMemObject(imageObject);
// Try again with non-zero rowPitch.
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
switch (type)
{
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
case CL_MEM_OBJECT_IMAGE3D:
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
break;
}
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
if ( error )
{
free( image );
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
}
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
test_error( error, "Unable to set mem object destructor callback" );
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
else
{
// Create an image object to test against.
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
test_error( error, "Unable to create image to test with" );
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
if (ret)
return ret;
}
clReleaseMemObject( imageObject );
}
return CL_SUCCESS;
}
int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
}
int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
}
int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
}
int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
}
int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
{
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
}

View File

@@ -0,0 +1,108 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
static volatile cl_int sDestructorIndex;
void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
{
int * userPtr = (int *)userData;
// ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
*userPtr = ++sDestructorIndex;
}
#ifndef ABS
#define ABS( x ) ( ( x < 0 ) ? -x : x )
#endif
int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
{
cl_int error;
int i;
// Set up some variables to catch the order in which callbacks are called
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
sDestructorIndex = 0;
// Set up the callbacks
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
test_error( error, "Unable to set destructor callback" );
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
test_error( error, "Unable to set destructor callback" );
// Now release the buffer, which SHOULD call the callbacks
error = clReleaseMemObject( memObject );
test_error( error, "Unable to release test buffer" );
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
memObject = NULL;
// At this point, all three callbacks should have already been called
int numErrors = 0;
for( i = 0; i < 3; i++ )
{
// Spin waiting for the release to finish. If you don't call the mem_destructor_callback, you will not
// pass the test. bugzilla 6316
while( 0 == callbackOrders[i] )
{}
if( ABS( callbackOrders[ i ] ) != 3-i )
{
log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
i+1, ABS( callbackOrders[ i ] ), i );
numErrors++;
}
}
return ( numErrors > 0 ) ? -1 : 0;
}
int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper testBuffer, testImage;
cl_int error;
// Create a buffer and an image to test callbacks against
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
{
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
return -1;
}
if( checkForImageSupport( deviceID ) == 0 )
{
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
test_error( error, "Unable to create testing image" );
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
{
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,121 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#ifndef _WIN32
#include <unistd.h>
#endif
#include "../../test_common/harness/conversions.h"
extern cl_uint gRandomSeed;
static void CL_CALLBACK test_native_kernel_fn( void *userData )
{
struct arg_struct {
cl_int * source;
cl_int * dest;
cl_int count;
} *args = (arg_struct *)userData;
for( cl_int i = 0; i < args->count; i++ )
args->dest[ i ] = args->source[ i ];
}
int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
int error;
RandomSeed seed( gRandomSeed );
// Check if we support native kernels
cl_device_exec_capabilities capabilities;
error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
return 0;
}
clMemWrapper streams[ 2 ];
#if !(defined (_WIN32) && defined (_MSC_VER))
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
#else
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
#endif
clEventWrapper finishEvent;
struct arg_struct
{
cl_mem inputStream;
cl_mem outputStream;
cl_int count;
} args;
// Create some input values
generate_random_data( kInt, n_elems, seed, inBuffer );
// Create I/O streams
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
test_error( error, "Unable to create I/O stream" );
// Set up the arrays to call with
args.inputStream = streams[ 0 ];
args.outputStream = streams[ 1 ];
args.count = n_elems;
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
// Run the kernel
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
&args, sizeof( args ),
2, &streams[ 0 ],
(const void **)memLocs,
0, NULL, &finishEvent );
test_error( error, "Unable to queue native kernel" );
// Finish and wait for the kernel to complete
error = clFinish( queue );
test_error(error, "clFinish failed");
error = clWaitForEvents( 1, &finishEvent );
test_error(error, "clWaitForEvents failed");
// Now read the results and verify
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( int i = 0; i < n_elems; i++ )
{
if( inBuffer[ i ] != outBuffer[ i ] )
{
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
return 1;
}
}
return 0;
}

View File

@@ -0,0 +1,162 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <stdio.h>
#if defined(__APPLE__)
#include <OpenCL/opencl.h>
#include <OpenCL/cl_platform.h>
#else
#include <CL/opencl.h>
#include <CL/cl_platform.h>
#endif
#include "procs.h"
enum { SUCCESS, FAILURE };
typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
#define NITEMS 4096
/* places the casted long value of the src ptr into each element of the output
* array, to allow testing that the kernel actually _gets_ the NULL value */
const char *kernel_string =
"kernel void test_kernel(global float *src, global long *dst)\n"
"{\n"
" uint tid = get_global_id(0);\n"
" dst[tid] = (long)src;\n"
"}\n";
/*
* The guts of the test:
* call setKernelArgs with a regular buffer, &NULL, or NULL depending on
* the value of 'test_type'
*/
static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
cl_mem test_buf, cl_mem result_buf, test_type type)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
char *typestr;
if (type == NON_NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "non-NULL";
} else if (type == ADDROF_NULL_PATH) {
test_buf = NULL;
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
typestr = "&NULL";
} else if (type == NULL_PATH) {
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
typestr = "NULL";
}
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
if (status != CL_SUCCESS) {
log_error("clSetKernelArg failed with status: %d\n", status);
return FAILURE; // no point in continuing *this* test
}
size_t global = NITEMS;
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
NULL, 0, NULL, NULL);
test_error(status, "NDRangeKernel failed.");
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
test_error(status, "ReadBuffer failed.");
// in the non-null case, we expect NONZERO values:
if (type == NON_NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] == 0) {
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
test_success = FAILURE; break;
}
}
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
for (i=0; i<NITEMS; i++) {
if (host_result[i] != 0) {
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
test_success = FAILURE; break;
}
}
}
free(host_result);
if (test_success == SUCCESS) {
log_info("\t%s ok.\n", typestr);
}
return test_success;
}
int test_null_buffer_arg(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
unsigned int test_success = 0;
unsigned int i;
cl_int status;
cl_program program;
cl_kernel kernel;
// prep kernel:
program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
test_error(status, "CreateProgramWithSource failed.");
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
test_error(status, "BuildProgram failed.");
kernel = clCreateKernel(program, "test_kernel", &status);
test_error(status, "CreateKernel failed.");
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
NULL, NULL);
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
NULL, NULL);
// set the destination buffer normally:
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
test_error(status, "SetKernelArg failed.");
//
// we test three cases:
//
// - typical case, used everyday: non-null buffer
// - the case of src as &NULL (the spec-compliance test)
// - the case of src as NULL (the backwards-compatibility test, Apple only)
//
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
#ifdef __APPLE__
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
#endif
// clean up:
if (dev_src) clReleaseMemObject(dev_src);
clReleaseMemObject(dev_dst);
clReleaseKernel(kernel);
clReleaseProgram(program);
return test_success;
}

View File

@@ -0,0 +1,289 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include <string.h>
#define EXTENSION_NAME_BUF_SIZE 4096
#define PRINT_EXTENSION_INFO 0
int test_platform_extensions(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
const char * extensions[] = {
"cl_khr_byte_addressable_store",
// "cl_APPLE_SetMemObjectDestructor",
"cl_khr_global_int32_base_atomics",
"cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics",
"cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics",
"cl_khr_int64_extended_atomics",
// need to put in entires for various atomics
"cl_khr_3d_image_writes",
"cl_khr_fp16",
"cl_khr_fp64",
NULL
};
bool extensionsSupported[] = {
false, //"cl_khr_byte_addressable_store",
false, // need to put in entires for various atomics
false, // "cl_khr_global_int32_base_atomics",
false, // "cl_khr_global_int32_extended_atomics",
false, // "cl_khr_local_int32_base_atomics",
false, // "cl_khr_local_int32_extended_atomics",
false, // "cl_khr_int64_base_atomics",
false, // "cl_khr_int64_extended_atomics",
false, //"cl_khr_3d_image_writes",
false, //"cl_khr_fp16",
false, //"cl_khr_fp64",
false //NULL
};
int extensionIndex;
cl_platform_id platformID;
cl_int err;
char platform_extensions[EXTENSION_NAME_BUF_SIZE];
char device_extensions[EXTENSION_NAME_BUF_SIZE];
// Okay, so what we're going to do is just check the device indicated by
// deviceID against the platform that includes this device
// pass CL_DEVICE_PLATFORM to clGetDeviceInfo
// to get a result of type cl_platform_id
err = clGetDeviceInfo(deviceID,
CL_DEVICE_PLATFORM,
sizeof(cl_platform_id),
(void *)(&platformID),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get platformID from device\n");
return -1;
}
// now we grab the set of extensions specified by the platform
err = clGetPlatformInfo(platformID,
CL_PLATFORM_EXTENSIONS,
sizeof(platform_extensions),
(void *)(&platform_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from platform\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
#endif
// here we parse the platform extensions, to look for the "important" ones
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
{
// we found it
#if PRINT_EXTENSION_INFO
log_info("Found \"%s\" in platform extensions\n",
extensions[extensionIndex]);
#endif
extensionsSupported[extensionIndex] = true;
}
}
// and then we grab the set of extensions specified by the device
// (this can be turned into a "loop over all devices in this platform")
err = clGetDeviceInfo(deviceID,
CL_DEVICE_EXTENSIONS,
sizeof(device_extensions),
(void *)(&device_extensions[0]),
NULL);
if(err != CL_SUCCESS)
{
vlog_error("test_platform_extensions : could not get extension string from device\n");
return -1;
}
#if PRINT_EXTENSION_INFO
log_info("Device extensions include \"%s\"\n\n", device_extensions);
#endif
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
{
if(extensionsSupported[extensionIndex] == false)
{
continue; // skip this one
}
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
{
// device does not support it
vlog_error("Platform supports extension \"%s\" but device does not\n",
extensions[extensionIndex]);
return -1;
}
}
return 0;
}
int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
cl_platform_id platforms[16];
cl_uint num_platforms;
char *string_returned;
string_returned = (char*)malloc(8192);
int total_errors = 0;
int err = CL_SUCCESS;
err = clGetPlatformIDs(16, platforms, &num_platforms);
test_error(err, "clGetPlatformIDs failed");
if (num_platforms <= 16) {
// Try with NULL
err = clGetPlatformIDs(num_platforms, platforms, NULL);
test_error(err, "clGetPlatformIDs failed with NULL for return size");
}
if (num_platforms < 1) {
log_error("Found 0 platforms.\n");
return -1;
}
log_info("Found %d platforms.\n", num_platforms);
for (int p=0; p<(int)num_platforms; p++) {
cl_device_id *devices;
cl_uint num_devices;
size_t size;
log_info("Platform %d (%p):\n", p, platforms[p]);
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
if (strlen(string_returned)+1 != size) {
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
total_errors++;
}
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
test_error(err, "clGetDeviceIDs size failed.\n");
devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
memset(devices, 0, sizeof(cl_device_id)*num_devices);
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
test_error(err, "clGetDeviceIDs failed.\n");
log_info("\tPlatform has %d devices.\n", (int)num_devices);
for (int d=0; d<(int)num_devices; d++) {
size_t returned_size;
cl_platform_id returned_platform;
cl_context context;
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
if (returned_size != sizeof(cl_platform_id)) {
log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
total_errors++;
}
memset(string_returned, 0, 8192);
err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
// Try creating a context for the platform
context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
memset(properties, 0, sizeof(cl_context_properties)*3);
err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
if (returned_size != sizeof(cl_context_properties)*3) {
log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
returned_size, sizeof(cl_context_properties)*3);
total_errors++;
}
if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
(void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
total_errors++;
}
err = clReleaseContext(context);
test_error(err, "clReleaseContext failed");
}
free(devices);
}
free(string_returned);
return total_errors;
}

View File

@@ -0,0 +1,635 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#include "../../test_common/harness/imageHelpers.h"
#include <stdlib.h>
#include <ctype.h>
int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_platform_id platform;
cl_int error;
char buffer[ 4098 ];
size_t length;
// Get the platform to use
error = clGetPlatformIDs(1, &platform, NULL);
test_error( error, "Unable to get platform" );
// Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get platform profile string" );
log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
{
log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
return -1;
}
if( strlen( buffer )+1 != length )
{
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
(int)strlen( buffer )+1, (int)length );
return -1;
}
// Check just length return
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, 0, NULL, &length );
test_error( error, "Unable to get platform profile length" );
if( strlen( (char *)buffer )+1 != length )
{
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
(int)strlen( (char *)buffer )+1, (int)length );
return -1;
}
// Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
test_error( error, "Unable to get platform version string" );
log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
{
log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
return -1;
}
char *p1 = (char *)buffer + strlen( "OpenCL " );
while( *p1 == ' ' )
p1++;
char *p2 = p1;
while( isdigit( *p2 ) )
p2++;
if( *p2 != '.' )
{
log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
return -1;
}
char *p3 = p2 + 1;
while( isdigit( *p3 ) )
p3++;
if( *p3 != ' ' )
{
log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
return -1;
}
*p2 = ' '; // Put in a space for atoi below.
p2++;
// make sure it is null terminated
for( ; p3 != buffer + length; p3++ )
if( *p3 == '\0' )
break;
if( p3 == buffer + length )
{
log_error( "ERROR: platform version string is not NUL terminated!\n" );
return -1;
}
int major = atoi( p1 );
int minor = atoi( p2 );
int minor_revision = 2;
if( major * 10 + minor < 10 + minor_revision )
{
log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
return -1;
}
// Sanity checks on the returned values
if( length != strlen( (char *)buffer ) + 1)
{
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
return -1;
}
// Check just length
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &length );
test_error( error, "Unable to get platform version length" );
if( length != strlen( (char *)buffer )+1 )
{
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
return -1;
}
return 0;
}
int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t size;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
test_error( error, "Unable to create sampler to test with" );
cl_uint refCount;
error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get sampler ref count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
return -1;
}
cl_context otherCtx;
error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
test_error( error, "Unable to get sampler context" );
if( otherCtx != context )
{
log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
return -1;
}
if( size != sizeof( otherCtx ) )
{
log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
return -1;
}
cl_addressing_mode mode;
error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
test_error( error, "Unable to get sampler addressing mode" );
if( mode != CL_ADDRESS_CLAMP )
{
log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
return -1;
}
if( size != sizeof( mode ) )
{
log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
return -1;
}
cl_filter_mode fmode;
error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
test_error( error, "Unable to get sampler filter mode" );
if( fmode != CL_FILTER_LINEAR )
{
log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
return -1;
}
if( size != sizeof( fmode ) )
{
log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
return -1;
}
cl_int norm;
error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
test_error( error, "Unable to get sampler normalized flag" );
if( norm != CL_TRUE )
{
log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
return -1;
}
if( size != sizeof( norm ) )
{
log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
return -1;
}
return 0;
}
#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \
error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get command queue " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
}
int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_command_queue_properties device_props;
clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
test_error( error, "Unable to create command queue to test with" );
cl_uint refCount;
error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
test_error( error, "Unable to get command queue reference count" );
if( size != sizeof( refCount ) )
{
log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
return -1;
}
cl_context otherCtx;
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
cl_device_id otherDevice;
error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
test_error(error, "clGetCommandQueue failed.");
if (size != sizeof(cl_device_id)) {
log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
return -1;
}
/* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
cl_uint otherDevice_vid, deviceID_vid;
error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
if( otherDevice_vid != deviceID_vid )
{
log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
return -1;
}
cl_command_queue_properties props;
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
return 0;
}
int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_context_properties props;
error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
test_error( error, "Unable to get context props" );
if (size == 0) {
// Valid size
return 0;
} else if (size == sizeof(cl_context_properties)) {
// Data must be NULL
if (props != 0) {
log_error("ERROR: Returned properties is no NULL.\n");
return -1;
}
// Valid data and size
return 0;
}
// Size was not 0 or 1
log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
(int)sizeof(cl_context_properties), (int)size );
return -1;
}
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get mem object " name ); \
if( val != expected ) \
{ \
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val ); \
return -1; \
} \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
}
// All possible combinations of valid cl_mem_flags.
static cl_mem_flags all_flags[16] = {
0,
CL_MEM_READ_WRITE,
CL_MEM_READ_ONLY,
CL_MEM_WRITE_ONLY,
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
};
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
} \
log_info( "\tReported device " name " : " type "\n", (cast)val );
#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div ) \
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
test_error( error, "Unable to get device " name ); \
if( size != sizeof( val ) ) \
{ \
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
return -1; \
} \
log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
{
int error;
size_t size;
cl_uint vendorID;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
char extensions[ 10240 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
test_error( error, "Unable to get device extensions" );
if( size != strlen( extensions ) + 1 )
{
log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
return -1;
}
log_info( "\tReported device extensions: %s \n", extensions );
cl_uint preferred;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
// Note that even if cl_khr_fp64, the preferred width for double can be non-zero. For example, vendors
// extensions can support double but may not support cl_khr_fp64, which implies math library support.
cl_uint baseAddrAlign;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
cl_uint maxDataAlign;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
cl_device_mem_cache_type cacheType;
error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
test_error( error, "Unable to get device global mem cache type" );
if( size != sizeof( cacheType ) )
{
log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
return -1;
}
const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
cl_uint cachelineSize;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
cl_ulong cacheSize;
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
cl_ulong memSize;
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
cl_device_local_mem_type localMemType;
error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
test_error( error, "Unable to get device local mem type" );
if( size != sizeof( cacheType ) )
{
log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
return -1;
}
const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
log_info( "\tReported device local mem type: %s \n", localMemTypeName );
cl_bool errSupport;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
size_t timerResolution;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
cl_bool endian;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
cl_bool avail;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
cl_bool compilerAvail;
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
char profile[ 1024 ];
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
test_error( error, "Unable to get device profile" );
if( size != strlen( profile ) + 1 )
{
log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
return -1;
}
if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
{
log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
return -1;
}
log_info( "\tReported device profile: %s \n", profile );
return 0;
}
static const char *sample_compile_size[2] = {
"__kernel void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n",
"__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" dst[tid] = src[tid];\n"
"\n"
"}\n" };
int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
size_t realSize;
size_t kernel_max_workgroup_size;
size_t global[] = {64,14,10};
size_t local[] = {0,0,0};
cl_uint max_dimensions;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
{
clProgramWrapper program;
clKernelWrapper kernel;
error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
if( error != 0 )
return error;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
size_t size[ 3 ];
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
test_error( error, "Unable to get work group info" );
if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
{
log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
return -1;
}
if( realSize != sizeof( size ) )
{
log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
return -1;
}
// Determine some local dimensions to use for the test.
if (max_dimensions == 1) {
error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
test_error( error, "get_max_common_work_group_size failed");
log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
} else if (max_dimensions == 2) {
error = get_max_common_2D_work_group_size(context, kernel, global, local);
test_error( error, "get_max_common_2D_work_group_size failed");
log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
} else {
error = get_max_common_3D_work_group_size(context, kernel, global, local);
test_error( error, "get_max_common_3D_work_group_size failed");
log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
(int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
}
}
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper in, out;
//char source[1024];
char *source = (char*)malloc(1024);
source[0] = '\0';
sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
if( error != 0 )
return error;
size_t size[ 3 ];
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
test_error( error, "Unable to get work group info" );
if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
{
log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
(int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
return -1;
}
// Verify that the kernel will only execute with that size.
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
test_error(error, "clCreateBuffer failed");
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
test_error(error, "clCreateBuffer failed");
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
test_error(error, "clSetKernelArg failed");
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
test_error(error, "clSetKernelArg failed");
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
error = clFinish(queue);
test_error(error, "clFinish failed");
log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
local[0]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
if (max_dimensions == 1) {
free(source);
return 0;
}
local[0]--; local[1]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
if (max_dimensions == 2) {
return 0;
free(source);
}
local[1]--; local[2]++;
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
if (error != CL_INVALID_WORK_GROUP_SIZE) {
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
return -1;
}
error = clFinish(queue);
test_error(error, "clFinish failed");
free(source);
}
return 0;
}

View File

@@ -0,0 +1,234 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif // !_WIN32
// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
// this define.
//#define VERIFY_AFTER_RELEASE 1
#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
return -1; }
int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
cl_uint numInstances;
int err;
/* Create a test queue */
queue = clCreateCommandQueue( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
{
cl_command_queue queue;
unsigned int numInstances, i;
int err;
/* Create a test program */
queue = clCreateCommandQueue( context, deviceID, 0, &err );
test_error( err, "Unable to create command queue to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainCommandQueue( queue );
}
/* Test the instance count */
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseCommandQueue( queue );
}
GET_QUEUE_INSTANCE_COUNT( queue );
test_error( err, "Unable to get queue instance count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseCommandQueue( queue );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_QUEUE_INSTANCE_COUNT( queue );
if( err != CL_INVALID_COMMAND_QUEUE )
{
print_error( err, "Command queue was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
cl_uint numInstances;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* Now release the program */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}
int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem object;
unsigned int numInstances, i;
int err;
/* Create a test object */
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
test_error( err, "Unable to create buffer to test with" );
/* Increment 9 times, which should bring the count to 10 */
for( i = 0; i < 9; i++ )
{
clRetainMemObject( object );
}
/* Test the instance count */
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 10 );
/* Now release 5 times, which should take us to 5 */
for( i = 0; i < 5; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 5 );
/* Retain again three times, which should take us to 8 */
for( i = 0; i < 3; i++ )
{
clRetainMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 8 );
/* Release 7 times, which should take it to 1 */
for( i = 0; i < 7; i++ )
{
clReleaseMemObject( object );
}
GET_MEM_INSTANCE_COUNT( object );
test_error( err, "Unable to get mem object count" );
VERIFY_INSTANCE_COUNT( numInstances, 1 );
/* And one last one */
clReleaseMemObject( object );
#ifdef VERIFY_AFTER_RELEASE
/* We're not allowed to get the instance count after the object has been completely released. But that's
exactly how we can tell the release worked--by making sure getting the instance count fails! */
GET_MEM_INSTANCE_COUNT( object );
if( err != CL_INVALID_MEM_OBJECT )
{
print_error( err, "Mem object was not properly released" );
return -1;
}
#endif
return 0;
}

View File

@@ -0,0 +1,109 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "testBase.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include "../../test_common/harness/compat.h"
int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel;
int error;
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
/* Create a test program */
program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
test_error( error, "Unable to create program to test with" );
/* Compile the program */
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
test_error( error, "Unable to build sample program to test with" );
/* And create a kernel from it */
kernel = clCreateKernel( program, "sample_test", &error );
test_error( error, "Unable to create kernel" );
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
clReleaseProgram( program );
clReleaseKernel( kernel );
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
return 0;
}
const char *sample_delay_kernel[] = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
" for( int i = 0; i < 1000000; i++ ); \n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n" };
int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_kernel kernel;
cl_mem streams[2];
size_t threads[1] = { 10 }, localThreadSize;
/* We now need an event to test. So we'll execute a kernel to get one */
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
{
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
test_error( error, "Creating test array failed" );
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
test_error( error, "Unable to set indexed kernel arguments" );
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
test_error( error, "Unable to calc local thread size" );
/* Execute the kernel */
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
clReleaseMemObject( streams[ 1 ] );
clReleaseMemObject( streams[ 0 ] );
clReleaseKernel( kernel );
clReleaseProgram( program );
/* Now make sure we're really finished before we go on. */
error = clFinish(queue);
test_error( error, "Unable to finish context.");
return 0;
}

View File

@@ -0,0 +1,65 @@
set(MODULE_NAME COMPATIBILITY_BASIC)
set(${MODULE_NAME}_SOURCES
main.c
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
test_hiloeo.c test_local.c test_pointercast.c
test_if.c test_loop.c
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
test_multireadimageonefmt.c test_multireadimagemultifmt.c
test_imagedim.c
test_vloadstore.c
test_int2float.c test_float2int.c
test_createkernelsinprogram.c
test_hostptr.c
test_explicit_s2v.cpp
test_constant.c
test_image_multipass.c
test_imagereadwrite.c test_imagereadwrite3d.c
test_image_param.c
test_imagenpot.c
test_image_r8.c
test_barrier.c
test_basic_parameter_types.c
test_arrayreadwrite.c
test_arraycopy.c
test_imagearraycopy.c
test_imagearraycopy3d.c
test_imagecopy.c
test_imagerandomcopy.c
test_arrayimagecopy.c
test_arrayimagecopy3d.c
test_imagecopy3d.c
test_enqueue_map.cpp
test_work_item_functions.cpp
test_astype.cpp
test_async_copy.cpp
test_sizeof.c
test_vector_creation.cpp
test_vec_type_hint.c
test_numeric_constants.cpp
test_constant_source.cpp
test_bufferreadwriterect.c
test_async_strided_copy.cpp
test_preprocessors.cpp
test_kernel_memory_alignment.cpp
test_global_work_offsets.cpp
test_kernel_call_kernel_function.cpp
test_local_kernel_scope.cpp
../../test_common/harness/errorHelpers.c
../../test_common/harness/threadTesting.c
../../test_common/harness/testHarness.c
../../test_common/harness/kernelHelpers.c
../../test_common/harness/typeWrappers.cpp
../../test_common/harness/imageHelpers.cpp
../../test_common/harness/mt19937.c
../../test_common/harness/conversions.c
../../test_common/harness/rounding_mode.c
../../test_common/harness/msvc9.c
)
include(../../../CMakeCommon.txt)

View File

@@ -0,0 +1,75 @@
project
: requirements
<toolset>gcc:<cflags>-xc++
<toolset>msvc:<cflags>"/TP"
;
exe test_basic
: main.c
test_arraycopy.c
test_arrayimagecopy3d.c
test_arrayimagecopy.c
test_arrayreadwrite.c
test_astype.cpp
test_async_copy.cpp
test_barrier.c
test_basic_parameter_types.c
test_constant.c
test_createkernelsinprogram.c
test_enqueue_map.cpp
test_explicit_s2v.cpp
test_float2int.c
test_fpmath_float2.c
test_fpmath_float4.c
test_fpmath_float.c
test_hiloeo.c
test_hostptr.c
test_if.c
test_imagearraycopy3d.c
test_imagearraycopy.c
test_imagecopy3d.c
test_imagecopy.c
test_imagedim.c
test_image_multipass.c
test_imagenpot.c
test_image_param.c
test_image_r8.c
test_imagerandomcopy.c
test_imagereadwrite3d.c
test_imagereadwrite.c
test_int2float.c
test_intmath_int2.c
test_intmath_int4.c
test_intmath_int.c
test_intmath_long2.c
test_intmath_long4.c
test_intmath_long.c
test_local.c
test_loop.c
test_multireadimagemultifmt.c
test_multireadimageonefmt.c
test_pointercast.c
test_readimage3d.c
test_readimage3d_fp32.c
test_readimage3d_int16.c
test_readimage.c
test_readimage_fp32.c
test_readimage_int16.c
test_sizeof.c
test_vec_type_hint.c
test_vector_creation.cpp
test_vloadstore.c
test_work_item_functions.cpp
test_writeimage.c
test_writeimage_fp32.c
test_writeimage_int16.c
test_numeric_constants.cpp
test_kernel_call_kernel_function.cpp
;
install dist
: test_basic
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
<variant>release:<location>$(DIST)/release/tests/test_conformance/basic
;

View File

@@ -0,0 +1,94 @@
ifdef BUILD_WITH_ATF
ATF = -framework ATF
USE_ATF = -DUSE_ATF
endif
SRCS = main.c \
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c \
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
test_if.c test_sizeof.c test_loop.c \
test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
test_multireadimageonefmt.c test_multireadimagemultifmt.c \
test_imagedim.c \
test_vloadstore.c \
test_int2float.c test_float2int.c \
test_createkernelsinprogram.c \
test_hostptr.c \
test_explicit_s2v.cpp \
test_constant.c \
test_constant_source.cpp \
test_image_multipass.c \
test_imagereadwrite.c test_imagereadwrite3d.c \
test_bufferreadwriterect.c \
test_image_param.c \
test_imagenpot.c \
test_image_r8.c \
test_barrier.c \
test_arrayreadwrite.c \
test_arraycopy.c \
test_imagearraycopy.c \
test_imagearraycopy3d.c \
test_imagecopy.c \
test_imagerandomcopy.c \
test_arrayimagecopy.c \
test_arrayimagecopy3d.c\
test_imagecopy3d.c \
test_enqueue_map.cpp \
test_work_item_functions.cpp \
test_astype.cpp \
test_async_copy.cpp \
test_async_strided_copy.cpp \
test_numeric_constants.cpp \
test_kernel_call_kernel_function.cpp \
test_basic_parameter_types.c \
test_vector_creation.cpp \
test_vec_type_hint.c \
test_preprocessors.cpp \
test_kernel_memory_alignment.cpp \
test_global_work_offsets.cpp \
../../test_common/harness/errorHelpers.c \
../../test_common/harness/threadTesting.c \
../../test_common/harness/testHarness.c \
../../test_common/harness/rounding_mode.c \
../../test_common/harness/kernelHelpers.c \
../../test_common/harness/typeWrappers.cpp \
../../test_common/harness/imageHelpers.cpp \
../../test_common/harness/mt19937.c \
../../test_common/harness/conversions.c
DEFINES =
SOURCES = $(abspath $(SRCS))
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
LIBPATH += -L.
FRAMEWORK = $(SOURCES)
HEADERS =
TARGET = test_basic
INCLUDE =
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
CC = c++
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
OBJECTS := ${SOURCES:.c=.o}
OBJECTS := ${OBJECTS:.cpp=.o}
TARGETOBJECT =
all: $(TARGET)
$(TARGET): $(OBJECTS)
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
clean:
rm -f $(TARGET) $(OBJECTS)
.DEFAULT:
@echo The target \"$@\" does not exist in Makefile.

View File

@@ -0,0 +1,263 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#if !defined(_WIN32)
#include <unistd.h>
#endif
#include <stdio.h>
#include <string.h>
#include "../../test_common/harness/testHarness.h"
#include "procs.h"
// FIXME: To use certain functions in ../../test_common/harness/imageHelpers.h
// (for example, generate_random_image_data()), the tests are required to declare
// the following variables:
cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
bool gTestRounding = false;
basefn basefn_list[] = {
test_hostptr,
test_fpmath_float,
test_fpmath_float2,
test_fpmath_float4,
test_intmath_int,
test_intmath_int2,
test_intmath_int4,
test_intmath_long,
test_intmath_long2,
test_intmath_long4,
test_hiloeo,
test_if,
test_sizeof,
test_loop,
test_pointer_cast,
test_local_arg_def,
test_local_kernel_def,
test_local_kernel_scope,
test_constant,
test_constant_source,
test_readimage,
test_readimage_int16,
test_readimage_fp32,
test_writeimage,
test_writeimage_int16,
test_writeimage_fp32,
test_multireadimageonefmt,
test_multireadimagemultifmt,
test_image_r8,
test_barrier,
test_int2float,
test_float2int,
test_imagereadwrite,
test_imagereadwrite3d,
test_readimage3d,
test_readimage3d_int16,
test_readimage3d_fp32,
test_bufferreadwriterect,
test_arrayreadwrite,
test_arraycopy,
test_imagearraycopy,
test_imagearraycopy3d,
test_imagecopy,
test_imagecopy3d,
test_imagerandomcopy,
test_arrayimagecopy,
test_arrayimagecopy3d,
test_imagenpot,
test_vload_global,
test_vload_local,
test_vload_constant,
test_vload_private,
test_vstore_global,
test_vstore_local,
test_vstore_private,
test_createkernelsinprogram,
test_imagedim_pow2,
test_imagedim_non_pow2,
test_image_param,
test_image_multipass_integer_coord,
test_image_multipass_float_coord,
test_explicit_s2v_bool,
test_explicit_s2v_char,
test_explicit_s2v_uchar,
test_explicit_s2v_short,
test_explicit_s2v_ushort,
test_explicit_s2v_int,
test_explicit_s2v_uint,
test_explicit_s2v_long,
test_explicit_s2v_ulong,
test_explicit_s2v_float,
test_explicit_s2v_double,
test_enqueue_map_buffer,
test_enqueue_map_image,
test_work_item_functions,
test_astype,
test_async_copy_global_to_local,
test_async_copy_local_to_global,
test_async_strided_copy_global_to_local,
test_async_strided_copy_local_to_global,
test_prefetch,
test_kernel_call_kernel_function,
test_host_numeric_constants,
test_kernel_numeric_constants,
test_kernel_limit_constants,
test_kernel_preprocessor_macros,
test_basic_parameter_types,
test_vector_creation,
test_vec_type_hint,
test_kernel_memory_alignment_local,
test_kernel_memory_alignment_global,
test_kernel_memory_alignment_constant,
test_kernel_memory_alignment_private,
test_global_work_offsets,
test_get_global_offset
};
const char *basefn_names[] = {
"hostptr",
"fpmath_float",
"fpmath_float2",
"fpmath_float4",
"intmath_int",
"intmath_int2",
"intmath_int4",
"intmath_long",
"intmath_long2",
"intmath_long4",
"hiloeo",
"if",
"sizeof",
"loop",
"pointer_cast",
"local_arg_def",
"local_kernel_def",
"local_kernel_scope",
"constant",
"constant_source",
"readimage",
"readimage_int16",
"readimage_fp32",
"writeimage",
"writeimage_int16",
"writeimage_fp32",
"mri_one",
"mri_multiple",
"image_r8",
"barrier",
"int2float",
"float2int",
"imagereadwrite",
"imagereadwrite3d",
"readimage3d",
"readimage3d_int16",
"readimage3d_fp32",
"bufferreadwriterect",
"arrayreadwrite",
"arraycopy",
"imagearraycopy",
"imagearraycopy3d",
"imagecopy",
"imagecopy3d",
"imagerandomcopy",
"arrayimagecopy",
"arrayimagecopy3d",
"imagenpot",
"vload_global",
"vload_local",
"vload_constant",
"vload_private",
"vstore_global",
"vstore_local",
"vstore_private",
"createkernelsinprogram",
"imagedim_pow2",
"imagedim_non_pow2",
"image_param",
"image_multipass_integer_coord",
"image_multipass_float_coord",
"explicit_s2v_bool",
"explicit_s2v_char",
"explicit_s2v_uchar",
"explicit_s2v_short",
"explicit_s2v_ushort",
"explicit_s2v_int",
"explicit_s2v_uint",
"explicit_s2v_long",
"explicit_s2v_ulong",
"explicit_s2v_float",
"explicit_s2v_double",
"enqueue_map_buffer",
"enqueue_map_image",
"work_item_functions",
"astype",
"async_copy_global_to_local",
"async_copy_local_to_global",
"async_strided_copy_global_to_local",
"async_strided_copy_local_to_global",
"prefetch",
"kernel_call_kernel_function",
"host_numeric_constants",
"kernel_numeric_constants",
"kernel_limit_constants",
"kernel_preprocessor_macros",
"parameter_types",
"vector_creation",
"vec_type_hint",
"kernel_memory_alignment_local",
"kernel_memory_alignment_global",
"kernel_memory_alignment_constant",
"kernel_memory_alignment_private",
"global_work_offsets",
"get_global_offset",
};
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0])) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
int num_fns = sizeof(basefn_names) / sizeof(char *);
int main(int argc, const char *argv[])
{
int err = runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
return err;
}

View File

@@ -0,0 +1,142 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/kernelHelpers.h"
#include "../../test_common/harness/testHarness.h"
#include "../../test_common/harness/errorHelpers.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/rounding_mode.h"
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
extern int test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
extern int test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );

View File

@@ -0,0 +1,3 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayreadwrite arraycopy bufferreadwriterect $@

View File

@@ -0,0 +1,3 @@
#!/bin/sh
cd `dirname $0`
./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy

View File

@@ -0,0 +1,17 @@
#!/bin/sh
cd `dirname $0`
./test_basic \
imagecopy imagerandomcopy \
imagearraycopy imagearraycopy3d \
image_r8 \
readimage readimage_int16 readimage_fp32 \
writeimage writeimage_int16 writeimage_fp32 \
imagenpot \
image_param \
image_multipass_integer_coord \
readimage3d \
readimage3d_int16 \
readimage3d_fp32 \
imagereadwrite3d \
imagereadwrite \
$@

View File

@@ -0,0 +1,4 @@
#!/bin/sh
cd `dirname $0`
./test_basic mri_one mri_multiple

View File

@@ -0,0 +1,201 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *copy_kernel_code =
"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = src[tid];\n"
"}\n";
int
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_uint *input_ptr, *output_ptr;
cl_mem streams[4], results;
cl_program program;
cl_kernel kernel;
unsigned num_elements = 128 * 1024;
cl_uint num_copies = 1;
size_t delta_offset;
unsigned i;
cl_int err;
MTdata d;
int error_count = 0;
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
// results
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
/*****************************************************************************************************************************************/
#pragma mark client backing
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// client backing
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
// Try upload from client backing
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
}
}
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
#pragma mark framework backing (no client data)
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
// no backing
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_copies; i++)
{
size_t offset = i * delta_offset;
// Copy the array up from host ptr
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
test_error(err, "clEnqueueCopyBuffer failed");
}
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
if (err)
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
else
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
/*****************************************************************************************************************************************/
#pragma mark kernel copy test
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
// randomize data
for (i=0; i<num_elements; i++)
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
free_mtdata(d); d= NULL;
// client backing
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
test_error(err, "clCreateBuffer failed");
err = create_single_kernel_helper(context, &program, &kernel, 1, &copy_kernel_code, "test_copy" );
test_error(err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
test_error(err, "clSetKernelArg failed");
size_t threads[3] = {num_elements, 0, 0};
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (i=0; i<num_elements; i++)
{
if (input_ptr[i] != output_ptr[i])
{
err = -1;
error_count++;
break;
}
}
// Keep track of multiple errors.
if (error_count != 0)
err = error_count;
if (err)
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
else
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseMemObject(results);
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,143 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 512;
int img_height = 512;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_2d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, 0, NULL, &err);
test_error(err, "create_image_2d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("ARRAY to IMAGE copy test failed\n");
else
log_info("ARRAY to IMAGE copy test passed\n");
return err;
}

View File

@@ -0,0 +1,144 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
{
cl_uchar *bufptr, *imgptr;
clMemWrapper buffer, image;
int img_width = 128;
int img_height = 128;
int img_depth = 32;
size_t elem_size;
size_t buffer_size;
int i;
cl_int err;
MTdata d;
cl_event copyevent;
log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
image = create_image_3d(context, (cl_mem_flags)(CL_MEM_READ_WRITE), format, img_width, img_height, img_depth, 0, 0, NULL, &err);
test_error(err, "create_image_3d failed");
err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
test_error(err, "clGetImageInfo failed");
buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
buffer = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), buffer_size, NULL, &err);
test_error(err, "clCreateBuffer failed");
bufptr = (cl_uchar*)malloc(buffer_size);
d = init_genrand( gRandomSeed );
bufptr = (cl_uchar*)malloc(buffer_size);
for (i=0; i<(int)buffer_size; i++) {
bufptr[i] = (cl_uchar)genrand_int32(d);
}
free_mtdata(d); d = NULL;
size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
test_error(err, "clEnqueueCopyImageToBuffer failed");
imgptr = (cl_uchar*)malloc(buffer_size);
err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
test_error(err, "clEnqueueReadBuffer failed");
if (memcmp(bufptr, imgptr, buffer_size) != 0) {
log_error( "ERROR: Results did not validate!\n" );
unsigned char * inchar = (unsigned char*)bufptr;
unsigned char * outchar = (unsigned char*)imgptr;
int failuresPrinted = 0;
int i;
for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
int failed = 0;
int j;
for (j=0; j<(int)elem_size; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
int j;
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
sprintf(values + strlen(values), "] != expected [");
for (j=0; j<(int)elem_size; j++)
sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
err = -1;
}
free(bufptr);
free(imgptr);
if (err)
log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
(unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
return err;
}
int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_int err;
cl_image_format *formats;
cl_uint num_formats;
cl_uint i;
PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
test_error(err, "clGetSupportedImageFormats failed");
formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE3D, num_formats, formats, NULL);
test_error(err, "clGetSupportedImageFormats failed");
for (i = 0; i < num_formats; i++) {
err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
}
if (err)
log_error("ARRAY to IMAGE3D copy test failed\n");
else
log_info("ARRAY to IMAGE3D copy test passed\n");
return err;
}

View File

@@ -0,0 +1,94 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int
test_arrayreadwrite(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_uint *inptr, *outptr;
cl_mem streams[1];
int num_tries = 400;
num_elements = 1024 * 1024 * 4;
int i, j, err;
MTdata d;
inptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
outptr = (cl_uint*)malloc(num_elements*sizeof(cl_uint));
// randomize data
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
inptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed");
for (i=0; i<num_tries; i++)
{
int offset;
int cb;
do {
offset = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (offset > 0 && offset < num_elements)
break;
} while (1);
cb = (int)(genrand_int32(d) & 0x7FFFFFFF);
if (cb > (num_elements - offset))
cb = num_elements - offset;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), sizeof(cl_uint)*cb,&inptr[offset], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, offset*sizeof(cl_uint), cb*sizeof(cl_uint), &outptr[offset], 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed");
for (j=offset; j<offset+cb; j++)
{
if (inptr[j] != outptr[j])
{
log_error("ARRAY read, write test failed\n");
err = -1;
break;
}
}
if (err)
break;
}
free_mtdata(d);
clReleaseMemObject(streams[0]);
free(inptr);
free(outptr);
if (!err)
log_info("ARRAY read, write test passed\n");
return err;
}

View File

@@ -0,0 +1,289 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
static const char *astype_kernel_pattern =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( src[ tid ] );\n"
" dst[ tid ] = tmp;\n"
"}\n";
static const char *astype_kernel_pattern_V3srcV3dst =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the third and fifth argument, each of which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3dst =
"%s\n"
"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s3 tmp = as_%s3( src[ tid ] );\n"
" vstore3(tmp,tid,dst);\n"
"}\n";
// in the printf, remove the fifth argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
static const char *astype_kernel_pattern_V3src =
"%s\n"
"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
" dst[ tid ] = tmp;\n"
"}\n";
// in the printf, remove the third argument, which
// should be a "3", when copying from the printf for astype_kernel_pattern
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
int numElements )
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
MTdata d;
// Create program
if(outVecSize == 3 && vecSize == 3) {
// astype_kernel_pattern_V3srcV3dst
sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
} else if(outVecSize == 3) {
// astype_kernel_pattern_V3dst
sprintf( programSrc, astype_kernel_pattern_V3dst,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ),
get_explicit_type_name( outVecType ));
} else if(vecSize == 3) {
// astype_kernel_pattern_V3src
sprintf( programSrc, astype_kernel_pattern_V3src,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
} else {
sprintf( programSrc, astype_kernel_pattern,
(outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
}
const char *ptr = programSrc;
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
char *inBuffer = (char*)malloc( inBufferSize );
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
char *outBuffer = (char*)malloc( outBufferSize );
d = init_genrand( gRandomSeed );
generate_random_data( inVecType, numElements * vecSize,
d, inBuffer );
free_mtdata(d); d = NULL;
// Create I/O streams and set arguments
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
// Run the kernel
threads[ 0 ] = numElements;
error = get_max_common_work_group_size( context, kernel, threads[ 0 ], &localThreads[ 0 ] );
test_error( error, "Unable to get group size to run with" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
char *expected = inBuffer;
char *actual = outBuffer;
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
}
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
free(inBuffer);
free(outBuffer);
return 0;
}
if(outVecSize != 3 && vecSize != 3 && outVecSize != vecSize)
{
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
free(inBuffer);
free(outBuffer);
return 0;
}
for( int i = 0; i < numElements; i++ )
{
if( memcmp( expected, actual, compSize ) != 0 )
{
char expectedString[ 1024 ], actualString[ 1024 ];
log_error( "ERROR: Data sample %d of %d for as_%s%d( %s%d ) did not validate (expected {%s}, got {%s})\n",
(int)i, (int)numElements, get_explicit_type_name( outVecType ), vecSize, get_explicit_type_name( inVecType ), vecSize,
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
programSrc, (int)threads[0],(int) localThreads[0]);
free(inBuffer);
free(outBuffer);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// Note: although casting to different vector element sizes that match the same size (i.e. short2 -> char4) is
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
continue;
if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
continue;
for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
continue;
}
if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
continue;
}
log_info( " (%s->%s)\n", get_explicit_type_name( vecTypes[ inTypeIdx ] ), get_explicit_type_name( vecTypes[ outTypeIdx ] ) );
fflush( stdout );
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
vecSizes[outSizeIdx]*outTypeSize )
{
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
}
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {
// as_type3(vec4) allowed, as_type4(vec3) not allowed
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 3, 4, n_elems );
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], 4, 3, n_elems );
}
}
}
return error;
}

View File

@@ -0,0 +1,276 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
static const char *async_global_to_local_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );\n"
// Wait for the copy to complete, then verify by manually copying to the dest
" wait_group_events( 1, &event );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
static const char *async_local_to_global_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
// Do this to verify all kernels are done copying to the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_copy((__global %s*)(dst+copiesPerWorkgroup*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, 0 );\n"
" wait_group_events( 1, &event );\n"
"}\n" ;
static const char *prefetch_kernel =
"%s\n" // optional pragma string
"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )\n"
"{\n"
" // Ignore this: %s%s%s\n"
" int i;\n"
" prefetch( (const __global %s*)(src+copiesPerWorkItem*get_global_id(0)), copiesPerWorkItem);\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode,
ExplicitType vecType, int vecSize
)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
else
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
size_t elementSize = get_explicit_type_size(vecType)*vecSize;
log_info("Testing %s\n", vecNameString);
cl_long max_local_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
unsigned int num_of_compute_devices;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
char programSource[4096]; programSource[0]=0;
char *programPtr;
sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
size_t max_workgroup_size;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
size_t max_local_workgroup_size[3];
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
size_t numberOfCopiesPerWorkitem = 13;
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
// Calculation can return 0 on embedded devices due to 1KB local mem limit
if(maxLocalWorkgroupSize == 0)
{
maxLocalWorkgroupSize = 1;
}
size_t localWorkgroupSize = maxLocalWorkgroupSize;
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
size_t numberOfLocalWorkgroups = 1111;
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global buffer %db, each work group will copy %d elements and each work item item will copy %d elements.\n",
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, copiesPerWorkgroup, copiesPerWorkItemInt);
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
test_error( error, "Unable to set kernel argument" );
// Enqueue
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify
if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
{
log_error( "ERROR: Results of copy did not validate!\n" );
unsigned char * inchar = (unsigned char*)inBuffer;
unsigned char * outchar = (unsigned char*)outBuffer;
int failuresPrinted = 0;
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
int failed = 0;
for (int j=0; j<(int)elementSize; j++)
if (inchar[i+j] != outchar[i+j])
failed = 1;
char values[4096];
values[0] = 0;
if (failed) {
sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i+j]);
sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
failuresPrinted++;
}
if (failuresPrinted > 5) {
log_error("Not printing further failures...\n");
break;
}
}
return -1;
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int size, typeIndex;
int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
if (test_copy( deviceID, context, queue, kernelCode, vecType[typeIndex],vecSizes[size] )) {
errors++;
}
}
}
if (errors)
return -1;
return 0;
}
int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
}
int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_local_to_global_kernel );
}
int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, prefetch_kernel );
}

View File

@@ -0,0 +1,267 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
static const char *async_strided_global_to_local_kernel =
"%s\n" // optional pragma string
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_strided_copy( (__local %s*)localBuffer, (__global const %s*)(src+copiesPerWorkgroup*stride*get_group_id(0)), (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
// Wait for the copy to complete, then verify by manually copying to the dest
" wait_group_events( 1, &event );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" dst[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];\n"
"}\n" ;
static const char *async_strided_local_to_global_kernel =
"%s\n" // optional pragma string
"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (%s)(%s)0;\n"
// Do this to verify all kernels are done zeroing the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" for(i=0; i<copiesPerWorkItem; i++)\n"
" localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = src[ get_global_id( 0 )*copiesPerWorkItem*stride+i*stride ];\n"
// Do this to verify all kernels are done copying to the local buffer before we try the copy
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" event_t event;\n"
" event = async_work_group_strided_copy((__global %s*)(dst+copiesPerWorkgroup*stride*get_group_id(0)), (__local const %s*)localBuffer, (size_t)copiesPerWorkgroup, (size_t)stride, 0 );\n"
" wait_group_events( 1, &event );\n"
"}\n" ;
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
void *inBuffer, *outBuffer;
MTdata d;
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
else
sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), vecSize);
size_t elementSize = get_explicit_type_size(vecType)*vecSize;
log_info("Testing %s\n", vecNameString);
cl_long max_local_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
unsigned int num_of_compute_devices;
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(num_of_compute_devices), &num_of_compute_devices, NULL);
test_error( error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
char programSource[4096]; programSource[0]=0;
char *programPtr;
sprintf(programSource, kernelCode,
vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
"",
vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
size_t max_workgroup_size;
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size), &max_workgroup_size, NULL);
test_error (error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
size_t max_local_workgroup_size[3];
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
cl_ulong max_global_mem_size;
error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(max_global_mem_size), &max_global_mem_size, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
cl_bool unified_mem;
error = clGetDeviceInfo(deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(unified_mem), &unified_mem, NULL);
test_error (error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
int number_of_global_mem_buffers = (unified_mem) ? 4 : 2;
size_t numberOfCopiesPerWorkitem = 3;
size_t localStorageSpacePerWorkitem = numberOfCopiesPerWorkitem*elementSize;
size_t maxLocalWorkgroupSize = (((int)max_local_mem_size/2)/localStorageSpacePerWorkitem);
size_t localWorkgroupSize = maxLocalWorkgroupSize;
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
size_t localBufferSize = localWorkgroupSize*elementSize*numberOfCopiesPerWorkitem;
size_t numberOfLocalWorkgroups = 579;//1111;
// Reduce the numberOfLocalWorkgroups so that no more than 1/2 of CL_DEVICE_GLOBAL_MEM_SIZE is consumed
// by the allocated buffer. This is done to avoid resource errors resulting from address space fragmentation.
size_t numberOfLocalWorkgroupsLimit = max_global_mem_size / (2 * number_of_global_mem_buffers * localBufferSize * stride);
if (numberOfLocalWorkgroups > numberOfLocalWorkgroupsLimit) numberOfLocalWorkgroups = numberOfLocalWorkgroupsLimit;
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
inBuffer = (void*)malloc(globalBufferSize);
outBuffer = (void*)malloc(globalBufferSize);
memset(outBuffer, 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
copiesPerWorkgroup = (int)(numberOfCopiesPerWorkitem*localWorkgroupSize);
log_info("Global: %d, local %d, local buffer %db, global buffer %db, copy stride %d, each work group will copy %d elements and each work item item will copy %d elements.\n",
(int) globalWorkgroupSize, (int)localWorkgroupSize, (int)localBufferSize, (int)globalBufferSize, (int)stride, copiesPerWorkgroup, copiesPerWorkItemInt);
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
d = init_genrand( gRandomSeed );
generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
free_mtdata(d); d = NULL;
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
test_error( error, "Unable to create input buffer" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 2, localBufferSize, NULL );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 3, sizeof(copiesPerWorkgroup), &copiesPerWorkgroup );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 4, sizeof(copiesPerWorkItemInt), &copiesPerWorkItemInt );
test_error( error, "Unable to set kernel argument" );
error = clSetKernelArg( kernel, 5, sizeof(stride), &stride );
test_error( error, "Unable to set kernel argument" );
// Enqueue
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to queue kernel" );
// Read
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
{
if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, elementSize) != 0 )
{
unsigned char * inchar = (unsigned char*)inBuffer + i;
unsigned char * outchar = (unsigned char*)outBuffer + i;
char values[4096];
values[0] = 0;
log_error( "ERROR: Results of copy did not validate!\n" );
sprintf(values + strlen( values), "%d -> [", i);
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
sprintf(values + strlen(values), "] != [");
for (int j=0; j<(int)elementSize; j++)
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
return -1;
}
}
free(inBuffer);
free(outBuffer);
return 0;
}
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
{
ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
unsigned int size, typeIndex, stride;
int errors = 0;
for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
{
if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
for( stride = 0; strideSizes[ stride ] != 0; stride++)
{
if (test_strided_copy( deviceID, context, queue, kernelCode, vecType[typeIndex], vecSizes[size], strideSizes[stride] ))
{
errors++;
}
}
}
}
if (errors)
return -1;
return 0;
}
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
}
int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_local_to_global_kernel );
}

View File

@@ -0,0 +1,158 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *barrier_kernel_code =
"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
"{\n"
" int tid = get_local_id(0);\n"
" int lsize = get_local_size(0);\n"
" int i;\n"
"\n"
" tmp_sum[tid] = 0;\n"
" for (i=tid; i<n; i+=lsize)\n"
" tmp_sum[tid] += a[i];\n"
" \n"
" // updated to work for any workgroup size \n"
" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
" {\n"
" barrier(CLK_GLOBAL_MEM_FENCE);\n"
" if (tid + i < lsize)\n"
" tmp_sum[tid] += tmp_sum[tid + i];\n"
" lsize = i; \n"
" }\n"
"\n"
" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
" if (tid == 0)\n"
" *sum = tmp_sum[0];\n"
"}\n";
static int
verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
{
int r = 0;
int i;
for (i=0; i<n; i++)
{
r += inptr[i];
}
if (r != outptr[0])
{
log_error("BARRIER test failed\n");
return -1;
}
log_info("BARRIER test passed\n");
return 0;
}
int
test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
size_t local_threads[3];
int err;
int i;
size_t max_local_workgroup_size[3];
size_t max_threadgroup_size = 0;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
test_error(err, "Failed to build kernel/program.");
err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
test_error(err, "clGetKernelWorkgroupInfo failed.");
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
// Pick the minimum of the device and the kernel
if (max_threadgroup_size > max_local_workgroup_size[0])
max_threadgroup_size = max_local_workgroup_size[0];
// work group size must divide evenly into the global size
while( num_elements % max_threadgroup_size )
max_threadgroup_size--;
input_ptr = (int*)malloc(sizeof(int) * num_elements);
output_ptr = (int*)malloc(sizeof(int));
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int), NULL, &err);
test_error(err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * max_threadgroup_size, NULL, &err);
test_error(err, "clCreateBuffer failed.");
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed.");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
test_error(err, "clSetKernelArg failed.");
global_threads[0] = max_threadgroup_size;
local_threads[0] = max_threadgroup_size;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
test_error(err, "clEnqueueReadBuffer failed.");
err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,302 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *kernel_code =
"__kernel void test_kernel(\n"
"char%s c, uchar%s uc, short%s s, ushort%s us, int%s i, uint%s ui, float%s f,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(c);\n"
" result[1] = %s(uc);\n"
" result[2] = %s(s);\n"
" result[3] = %s(us);\n"
" result[4] = %s(i);\n"
" result[5] = %s(ui);\n"
" result[6] = f;\n"
"}\n";
const char *kernel_code_long =
"__kernel void test_kernel_long(\n"
"long%s l, ulong%s ul,\n"
"__global float%s *result)\n"
"{\n"
" result[0] = %s(l);\n"
" result[1] = %s(ul);\n"
"}\n";
int
test_basic_parameter_types_long(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[2*16];
int count, index;
const char* types[] = { "long", "ulong" };
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_long l[16]={-21,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ulong ul[16]={22,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_long) + sizeof(cl_ulong);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*2*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code_long,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel_long");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 2; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_long)*sizes[size_to_test], &l); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_ulong)*sizes[size_to_test], &ul); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*2*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 2; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)l[index]; break;
case 1: expected = (float)ul[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
return total_errors;
}
int
test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clMemWrapper results;
int error;
size_t global[3] = {1, 1, 1};
float results_back[7*16];
int count, index;
const char* types[] = {"char", "uchar", "short", "ushort", "int", "uint", "float"};
char kernel_string[8192];
int sizes[] = {1, 2, 4, 8, 16};
const char* size_strings[] = {"", "2", "4", "8", "16"};
float expected;
int total_errors = 0;
int size_to_test;
char *ptr;
char convert_string[1024];
size_t max_parameter_size;
// We don't really care about the contents since we're just testing that the types work.
cl_char c[16]={0,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uchar uc[16]={16,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_short s[16]={-17,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_ushort us[16]={18,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_int i[16]={-19,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
cl_uint ui[16]={20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
cl_float f[16]={-23,-1,2,-3,4,-5,6,-7,8,-9,10,-11,12,-13,14,-15};
// Calculate how large our paramter size is to the kernel
size_t parameter_size = sizeof(cl_char) + sizeof(cl_uchar) +
sizeof(cl_short) +sizeof(cl_ushort) +
sizeof(cl_int) +sizeof(cl_uint) +
sizeof(cl_float);
// Init our strings.
kernel_string[0] = '\0';
convert_string[0] = '\0';
// Get the maximum parameter size allowed
error = clGetDeviceInfo( device, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( max_parameter_size ), &max_parameter_size, NULL );
test_error( error, "Unable to get max parameter size from device" );
// Create the results buffer
results = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float)*7*16, NULL, &error);
test_error(error, "clCreateBuffer failed");
// Go over all the vector sizes
for (size_to_test = 0; size_to_test < 5; size_to_test++) {
clProgramWrapper program;
clKernelWrapper kernel;
size_t total_parameter_size = parameter_size*sizes[size_to_test] + sizeof(cl_mem);
if (total_parameter_size > max_parameter_size) {
log_info("Can not test with vector size %d because it would exceed the maximum allowed parameter size to the kernel. (%d > %d)\n",
(int)sizes[size_to_test], (int)total_parameter_size, (int)max_parameter_size);
continue;
}
log_info("Testing vector size %d\n", sizes[size_to_test]);
// If size is > 1, then we need a explicit convert call.
if (sizes[size_to_test] > 1) {
sprintf(convert_string, "convert_float%s", size_strings[size_to_test]);
} else {
sprintf(convert_string, " ");
}
// Build the kernel
sprintf(kernel_string, kernel_code,
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test], size_strings[size_to_test],
size_strings[size_to_test], size_strings[size_to_test],
convert_string, convert_string, convert_string,
convert_string, convert_string, convert_string
);
ptr = kernel_string;
error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&ptr, "test_kernel");
test_error(error, "create single kernel failed");
// Set the arguments
for (count = 0; count < 7; count++) {
switch (count) {
case 0: error = clSetKernelArg(kernel, count, sizeof(cl_char)*sizes[size_to_test], &c); break;
case 1: error = clSetKernelArg(kernel, count, sizeof(cl_uchar)*sizes[size_to_test], &uc); break;
case 2: error = clSetKernelArg(kernel, count, sizeof(cl_short)*sizes[size_to_test], &s); break;
case 3: error = clSetKernelArg(kernel, count, sizeof(cl_ushort)*sizes[size_to_test], &us); break;
case 4: error = clSetKernelArg(kernel, count, sizeof(cl_int)*sizes[size_to_test], &i); break;
case 5: error = clSetKernelArg(kernel, count, sizeof(cl_uint)*sizes[size_to_test], &ui); break;
case 6: error = clSetKernelArg(kernel, count, sizeof(cl_float)*sizes[size_to_test], &f); break;
default: log_error("Test error"); break;
}
if (error)
log_error("Setting kernel arg %d %s%s: ", count, types[count], size_strings[size_to_test]);
test_error(error, "clSetKernelArgs failed");
}
error = clSetKernelArg(kernel, 7, sizeof(cl_mem), &results);
test_error(error, "clSetKernelArgs failed");
// Execute
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
test_error(error, "clEnqueueNDRangeKernel failed");
// Read back the results
error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0, sizeof(cl_float)*7*16, results_back, 0, NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");
// Verify the results
for (count = 0; count < 7; count++) {
for (index=0; index < sizes[size_to_test]; index++) {
switch (count) {
case 0: expected = (float)c[index]; break;
case 1: expected = (float)uc[index]; break;
case 2: expected = (float)s[index]; break;
case 3: expected = (float)us[index]; break;
case 4: expected = (float)i[index]; break;
case 5: expected = (float)ui[index]; break;
case 6: expected = (float)f[index]; break;
default: log_error("Test error"); break;
}
if (results_back[count*sizes[size_to_test]+index] != expected) {
total_errors++;
log_error("Conversion from %s%s failed: index %d got %g, expected %g.\n", types[count], size_strings[size_to_test],
index, results_back[count*sizes[size_to_test]+index], expected);
}
}
}
}
if (gHasLong) {
log_info("Testing long types...\n");
total_errors += test_basic_parameter_types_long( device, context, queue, num_elements );
}
else {
log_info("Longs unsupported, skipping.");
}
return total_errors;
}

View File

@@ -0,0 +1,529 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#define CL_EXIT_ERROR(cmd,format,...) \
{ \
if ((cmd) != CL_SUCCESS) { \
log_error("CL ERROR: %s %u: ", __FILE__,__LINE__); \
log_error(format,## __VA_ARGS__ ); \
log_error("\n"); \
/*abort();*/ \
} \
}
typedef unsigned char BufferType;
// Globals for test
cl_command_queue queue;
// Width and height of each pair of images.
enum { TotalImages = 8 };
size_t width [TotalImages];
size_t height [TotalImages];
size_t depth [TotalImages];
// cl buffer and host buffer.
cl_mem buffer [TotalImages];
BufferType* verify[TotalImages];
BufferType* backing[TotalImages];
// Temporary buffer used for read and write operations.
BufferType* tmp_buffer;
size_t tmp_buffer_size;
size_t num_tries = 50; // Number of randomly selected operations to perform.
size_t alloc_scale = 2; // Scale term applied buffer allocation size.
MTdata mt;
// Initialize a buffer in host memory containing random values of the specified size.
static void initialize_image(BufferType* ptr, size_t w, size_t h, size_t d, MTdata mt)
{
enum { ElementSize = sizeof(BufferType)/sizeof(unsigned char) };
unsigned char* buf = (unsigned char*)ptr;
size_t size = w*h*d*ElementSize;
for (size_t i = 0; i != size; i++) {
buf[i] = (unsigned char)(genrand_int32(mt) % 0xff);
}
}
// This function prints the contents of a buffer to standard error.
void print_buffer(BufferType* buf, size_t w, size_t h, size_t d) {
log_error("Size = %lux%lux%lu (%lu total)\n",w,h,d,w*h*d);
for (unsigned k=0; k!=d;++k) {
log_error("Slice: %u\n",k);
for (unsigned j=0; j!=h;++j) {
for (unsigned i=0;i!=w;++i) {
log_error("%02x",buf[k*(w*h)+j*w+i]);
}
log_error("\n");
}
log_error("\n");
}
}
// Returns true if the two specified regions overlap.
bool check_overlap(const size_t src_offset[3], const size_t dst_offset[3], const size_t region[3]) {
const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
const size_t src_max[] = {src_offset[0]+region[0], src_offset[1]+region[1], src_offset[2]+region[2]};
const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
const size_t dst_max[] = {dst_offset[0]+region[0], dst_offset[1]+region[1], dst_offset[2]+region[2]};
// Check for overlap, using the span space formulation.
bool overlap = true;
unsigned i;
for (i=0; i != 3; ++i) {
overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] > dst_min[i]);
}
return overlap;
}
// This function invokes the CopyBufferRect CL command and then mirrors the operation on the host side verify buffers.
int copy_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
// Copy between cl buffers.
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
cl_int err;
if (check_overlap(soffset,doffset,sregion)) {
log_info( "Copy overlap reported, skipping copy buffer rect\n" );
return CL_SUCCESS;
} else {
if ((err = clEnqueueCopyBufferRect(queue,
buffer[src],buffer[dst],
soffset, doffset,
sregion,/*dregion,*/
width[src], src_slice_pitch,
width[dst], dst_slice_pitch,
0, NULL, NULL)) != CL_SUCCESS)
{
CL_EXIT_ERROR(err, "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
}
}
// Copy between host buffers.
size_t total = sregion[0] * sregion[1] * sregion[2];
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
size_t dz = sz;
size_t dy = sy;
size_t dx = sx;
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
verify[dst][d_idx] = verify[src][s_idx];
}
return 0;
}
// This function compares the destination region in the buffer pointed
// to by device, to the source region of the specified verify buffer.
int verify_region(BufferType* device, size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3]) {
// Copy between host buffers.
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
size_t total = sregion[0] * sregion[1] * sregion[2];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+sz)*dslice + (doffset[1]+sy)*dpitch + doffset[0]+sx;
if (device[d_idx] != verify[src][s_idx]) {
log_error("Verify failed on comparsion %lu: coordinate (%lu, %lu, %lu) of region\n",i,sx,sy,sz);
log_error("0x%02x != 0x%02x\n", device[d_idx], verify[src][s_idx]);
#if 0
// Uncomment this section to print buffers.
log_error("Device (copy): [%lu]\n",dst);
print_buffer(device,width[dst],height[dst],depth[dst]);
log_error("\n");
log_error("Verify: [%lu]\n",src);
print_buffer(verify[src],width[src],height[src],depth[src]);
log_error("\n");
abort();
#endif
return -1;
}
}
return 0;
}
// This function invokes ReadBufferRect to read a region from the
// specified source buffer into a temporary destination buffer. The
// contents of the temporary buffer are then compared to the source
// region of the corresponding verify buffer.
int read_verify_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
// Clear the temporary destination host buffer.
memset(tmp_buffer, 0xff, tmp_buffer_size);
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
CL_EXIT_ERROR(clEnqueueReadBufferRect(queue,
buffer[src],
CL_TRUE,
soffset,doffset,
sregion,
width[src], src_slice_pitch,
width[dst], dst_slice_pitch,
tmp_buffer,
0, NULL, NULL), "clEnqueueCopyBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
return verify_region(tmp_buffer,src,soffset,sregion,dst,doffset);
}
// This function performs the same verification check as
// read_verify_region, except a MapBuffer command is used to access the
// device buffer data instead of a ReadBufferRect, and the whole
// buffer is checked.
int map_verify_region(size_t src) {
size_t size_bytes = width[src]*height[src]*depth[src]*sizeof(BufferType);
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
cl_int err;
BufferType* mapped = (BufferType*)clEnqueueMapBuffer(queue,buffer[src],CL_TRUE,CL_MAP_READ,0,size_bytes,0,NULL,NULL,&err);
CL_EXIT_ERROR(err, "clEnqueueMapBuffer failed for buffer %u",(unsigned)src);
size_t soffset[] = { 0, 0, 0 };
size_t sregion[] = { width[src], height[src], depth[src] };
int ret = verify_region(mapped,src,soffset,sregion,src,soffset);
CL_EXIT_ERROR(clEnqueueUnmapMemObject(queue,buffer[src],mapped,0,NULL,NULL),
"clEnqueueUnmapMemObject failed for buffer %u",(unsigned)src);
return ret;
}
// This function generates a new temporary buffer and then writes a
// region of it to a region in the specified destination buffer.
int write_region(size_t src, size_t soffset[3], size_t sregion[3], size_t dst, size_t doffset[3], size_t dregion[3]) {
initialize_image(tmp_buffer, tmp_buffer_size, 1, 1, mt);
// memset(tmp_buffer, 0xf0, tmp_buffer_size);
size_t src_slice_pitch = (width[src]*height[src] != 1) ? width[src]*height[src] : 0;
size_t dst_slice_pitch = (width[dst]*height[dst] != 1) ? width[dst]*height[dst] : 0;
// Copy the source region of the cl buffer, to the destination region of the temporary buffer.
CL_EXIT_ERROR(clEnqueueWriteBufferRect(queue,
buffer[dst],
CL_TRUE,
doffset,soffset,
/*sregion,*/dregion,
width[dst], dst_slice_pitch,
width[src], src_slice_pitch,
tmp_buffer,
0, NULL, NULL), "clEnqueueWriteBufferRect failed between %u and %u",(unsigned)src,(unsigned)dst);
// Copy from the temporary buffer to the host buffer.
size_t spitch = width[src];
size_t sslice = width[src]*height[src];
size_t dpitch = width[dst];
size_t dslice = width[dst]*height[dst];
size_t total = sregion[0] * sregion[1] * sregion[2];
for (size_t i = 0; i != total; ++i) {
// Compute the coordinates of the element within the source and destination regions.
size_t rslice = sregion[0]*sregion[1];
size_t sz = i / rslice;
size_t sy = (i % rslice) / sregion[0];
size_t sx = (i % rslice) % sregion[0];
size_t dz = sz;
size_t dy = sy;
size_t dx = sx;
// Compute the offset in bytes of the source and destination.
size_t s_idx = (soffset[2]+sz)*sslice + (soffset[1]+sy)*spitch + soffset[0]+sx;
size_t d_idx = (doffset[2]+dz)*dslice + (doffset[1]+dy)*dpitch + doffset[0]+dx;
verify[dst][d_idx] = tmp_buffer[s_idx];
}
return 0;
}
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
{
free( data );
}
// This is the main test function for the conformance test.
int
test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements)
{
queue = queue_;
cl_int err;
// Initialize the random number generator.
mt = init_genrand( gRandomSeed );
// Compute a maximum buffer size based on the number of test images and the device maximum.
cl_ulong max_mem_alloc_size = 0;
CL_EXIT_ERROR(clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, NULL),"Could not get device info");
log_info("CL_DEVICE_MAX_MEM_ALLOC_SIZE = %llu bytes.\n", max_mem_alloc_size);
// Confirm that the maximum allocation size is not zero.
if (max_mem_alloc_size == 0) {
log_error("Error: CL_DEVICE_MAX_MEM_ALLOC_SIZE is zero bytes\n");
return -1;
}
// Guess at a reasonable maximum dimension.
size_t max_mem_alloc_dim = (size_t)cbrt((double)(max_mem_alloc_size/sizeof(BufferType)))/alloc_scale;
if (max_mem_alloc_dim == 0) {
max_mem_alloc_dim = max_mem_alloc_size;
}
log_info("Using maximum dimension = %lu.\n", max_mem_alloc_dim);
// Create pairs of cl buffers and host buffers on which operations will be mirrored.
log_info("Creating %u pairs of random sized host and cl buffers.\n", TotalImages);
size_t max_size = 0;
size_t total_bytes = 0;
for (unsigned i=0; i != TotalImages; ++i) {
// Determine a width and height for this buffer.
size_t size_bytes;
size_t tries = 0;
size_t max_tries = 1048576;
do {
width[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
height[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
depth[i] = get_random_size_t(1, max_mem_alloc_dim, mt);
++tries;
} while ((tries < max_tries) && (size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType)) > max_mem_alloc_size);
// Check to see if adequately sized buffers were found.
if (tries >= max_tries) {
log_error("Error: Could not find random buffer sized less than %llu bytes in %lu tries.\n",
max_mem_alloc_size, max_tries);
return -1;
}
// Keep track of the dimensions of the largest buffer.
max_size = (size_bytes > max_size) ? size_bytes : max_size;
total_bytes += size_bytes;
log_info("Buffer[%u] is (%lu,%lu,%lu) = %lu MB (truncated)\n",i,width[i],height[i],depth[i],(size_bytes)/1048576);
}
log_info( "Total size: %lu MB (truncated)\n", total_bytes/1048576 );
// Allocate a temporary buffer for read and write operations.
tmp_buffer_size = max_size;
tmp_buffer = (BufferType*)malloc(tmp_buffer_size);
// Initialize cl buffers
log_info( "Initializing buffers\n" );
for (unsigned i=0; i != TotalImages; ++i) {
size_t size_bytes = width[i]*height[i]*depth[i]*sizeof(BufferType);
// Allocate a host copy of the buffer for verification.
verify[i] = (BufferType*)malloc(size_bytes);
CL_EXIT_ERROR(verify[i] ? CL_SUCCESS : -1, "malloc of host buffer failed for buffer %u", i);
// Allocate the buffer in host memory.
backing[i] = (BufferType*)malloc(size_bytes);
CL_EXIT_ERROR(backing[i] ? CL_SUCCESS : -1, "malloc of backing buffer failed for buffer %u", i);
// Generate a random buffer.
log_info( "Initializing buffer %u\n", i );
initialize_image(verify[i], width[i], height[i], depth[i], mt);
// Copy the image into a buffer which will passed to CL.
memcpy(backing[i], verify[i], size_bytes);
// Create the CL buffer.
buffer[i] = clCreateBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, size_bytes, backing[i], &err);
CL_EXIT_ERROR(err,"clCreateBuffer failed for buffer %u", i);
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
err = clSetMemObjectDestructorCallback( buffer[i], mem_obj_destructor_callback, backing[i] );
CL_EXIT_ERROR(err, "Unable to set mem object destructor callback" );
}
// Main test loop, run num_tries times.
log_info( "Executing %u test operations selected at random.\n", (unsigned)num_tries );
for (size_t iter = 0; iter < num_tries; ++iter) {
// Determine a source and a destination.
size_t src = get_random_size_t(0,TotalImages,mt);
size_t dst = get_random_size_t(0,TotalImages,mt);
// Determine the minimum dimensions.
size_t min_width = width[src] < width[dst] ? width[src] : width[dst];
size_t min_height = height[src] < height[dst] ? height[src] : height[dst];
size_t min_depth = depth[src] < depth[dst] ? depth[src] : depth[dst];
// Generate a random source rectangle within the minimum dimensions.
size_t mx = get_random_size_t(0, min_width-1, mt);
size_t my = get_random_size_t(0, min_height-1, mt);
size_t mz = get_random_size_t(0, min_depth-1, mt);
size_t sw = get_random_size_t(1, (min_width - mx), mt);
size_t sh = get_random_size_t(1, (min_height - my), mt);
size_t sd = get_random_size_t(1, (min_depth - mz), mt);
size_t sx = get_random_size_t(0, width[src]-sw, mt);
size_t sy = get_random_size_t(0, height[src]-sh, mt);
size_t sz = get_random_size_t(0, depth[src]-sd, mt);
size_t soffset[] = { sx, sy, sz };
size_t sregion[] = { sw, sh, sd };
// Generate a destination rectangle of the same size.
size_t dw = sw;
size_t dh = sh;
size_t dd = sd;
// Generate a random destination offset within the buffer.
size_t dx = get_random_size_t(0, (width[dst] - dw), mt);
size_t dy = get_random_size_t(0, (height[dst] - dh), mt);
size_t dz = get_random_size_t(0, (depth[dst] - dd), mt);
size_t doffset[] = { dx, dy, dz };
size_t dregion[] = { dw, dh, dd };
// Execute one of three operations:
// - Copy: Copies between src and dst within each set of host, buffer, and images.
// - Read & verify: Reads src region from buffer and image, and compares to host.
// - Write: Generates new buffer with src dimensions, and writes to cl buffer and image.
enum { TotalOperations = 3 };
size_t operation = get_random_size_t(0,TotalOperations,mt);
switch (operation) {
case 0:
log_info("%lu Copy %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = copy_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
case 1:
log_info("%lu Read %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = read_verify_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
case 2:
log_info("%lu Write %lu offset (%lu,%lu,%lu) -> %lu offset (%lu,%lu,%lu) region (%lux%lux%lu = %lu)\n",
iter,
src, soffset[0], soffset[1], soffset[2],
dst, doffset[0], doffset[1], doffset[2],
sregion[0], sregion[1], sregion[2],
sregion[0]*sregion[1]*sregion[2]);
if ((err = write_region(src, soffset, sregion, dst, doffset, dregion)))
return err;
break;
}
#if 0
// Uncomment this section to verify each operation.
// If commented out, verification won't occur until the end of the
// test, and it will not be possible to determine which operation failed.
log_info("Verify src %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", src, 0, 0, 0, width[src], height[src], depth[src]);
if (err = map_verify_region(src))
return err;
log_info("Verify dst %lu offset (%u,%u,%u) region (%lux%lux%lu)\n", dst, 0, 0, 0, width[dst], height[dst], depth[dst]);
if (err = map_verify_region(dst))
return err;
#endif
} // end main for loop.
for (unsigned i=0;i<TotalImages;++i) {
log_info("Verify %u offset (%u,%u,%u) region (%lux%lux%lu)\n", i, 0, 0, 0, width[i], height[i], depth[i]);
if ((err = map_verify_region(i)))
return err;
}
// Clean-up.
free_mtdata(mt);
for (unsigned i=0;i<TotalImages;++i) {
free( verify[i] );
clReleaseMemObject( buffer[i] );
}
free( tmp_buffer );
if (!err) {
log_info("RECT read, write test passed\n");
}
return err;
}

View File

@@ -0,0 +1,275 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_kernel_code =
"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" float ftmp = tmpF[tid]; \n"
" float Itmp = tmpI[tid]; \n"
" out[tid] = ftmp * Itmp; \n"
"}\n";
const char *loop_constant_kernel_code =
"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
"{\n"
" int tid = get_global_id(0);\n"
" float sum = 0;\n"
" for (int i = 0; i < num; i++) {\n"
" float pos = i_pos[i*3];\n"
" sum += pos;\n"
" }\n"
" out[tid] = sum;\n"
"}\n";
static int
verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
{
int i;
for (i=0; i < n; i++)
{
float f = tmpF[i] * tmpI[i];
if( out[i] != f )
{
log_error("CONSTANT test failed\n");
return -1;
}
}
log_info("CONSTANT test passed\n");
return 0;
}
static int
verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
{
int i;
cl_int j;
for (i=0; i < n; i++)
{
float sum = 0;
for (j=0; j < l; ++j)
sum += tmp[j*3];
if( out[i] != sum )
{
log_error("loop CONSTANT test failed\n");
return -1;
}
}
log_info("loop CONSTANT test passed\n");
return 0;
}
int
test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[3];
cl_int *tmpI;
cl_float *tmpF, *out;
cl_program program;
cl_kernel kernel;
size_t global_threads[3];
int err;
unsigned int i;
cl_ulong maxSize, maxGlobalSize, maxAllocSize;
size_t num_floats, num_ints, constant_values;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
/* Verify our test buffer won't be bigger than allowed */
err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
test_error( err, "Unable to get max constant buffer size" );
log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
// Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
if (maxSize > maxGlobalSize / 4)
maxSize = maxGlobalSize / 4;
err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
if (maxSize > maxAllocSize)
maxSize = maxAllocSize;
maxSize/=4;
num_ints = (size_t)maxSize/sizeof(cl_int);
num_floats = (size_t)maxSize/sizeof(cl_float);
if (num_ints >= num_floats) {
constant_values = num_floats;
} else {
constant_values = num_ints;
}
log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * constant_values, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * constant_values, NULL, NULL);
if (!streams[2])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<constant_values; i++) {
tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
}
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
if (err) {
log_error("Failed to create kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
global_threads[0] = constant_values;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
err = verify(tmpF, tmpI, out, (int)constant_values);
if (isRTZ)
(void)set_round(oldRoundMode, kfloat);
// Loop constant buffer test
cl_program loop_program;
cl_kernel loop_kernel;
cl_int limit = 2;
memset(out, 0, sizeof(cl_float) * constant_values);
err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
&loop_constant_kernel_code, "loop_constant_kernel" );
if (err) {
log_error("Failed to create loop kernel and program: %d\n", err);
return -1;
}
err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
if (err != CL_SUCCESS) {
log_error("clSetKernelArgs for loop kernel failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueNDRangeKernel failed: %d\n", err);
return -1;
}
err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
if (err != CL_SUCCESS) {
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseKernel(loop_kernel);
clReleaseProgram(loop_program);
free(tmpI);
free(tmpF);
free(out);
return err;
}

View File

@@ -0,0 +1,100 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *constant_source_kernel_code[] = {
"__constant int outVal = 42;\n"
"__constant int outIndex = 7;\n"
"__constant int outValues[ 16 ] = { 17, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };\n"
"\n"
"__kernel void constant_kernel( __global int *out )\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if( tid == 0 )\n"
" {\n"
" out[ 0 ] = outVal;\n"
" out[ 1 ] = outValues[ outIndex ];\n"
" }\n"
" else\n"
" {\n"
" out[ tid + 1 ] = outValues[ tid ];\n"
" }\n"
"}\n" };
int test_constant_source(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper outStream;
cl_int outValues[ 17 ];
cl_int expectedValues[ 17 ] = { 42, 1985, 01, 11, 12, 1955, 11, 5, 1985, 113, 1, 24, 1984, 7, 23, 1979, 97 };
cl_int error;
// Create a kernel to test with
error = create_single_kernel_helper( context, &program, &kernel, 1, constant_source_kernel_code, "constant_kernel" );
test_error( error, "Unable to create testing kernel" );
// Create our output buffer
outStream = clCreateBuffer( context, CL_MEM_WRITE_ONLY, sizeof( outValues ), NULL, &error );
test_error( error, "Unable to create output buffer" );
// Set the argument
error = clSetKernelArg( kernel, 0, sizeof( outStream ), &outStream );
test_error( error, "Unable to set kernel argument" );
// Run test kernel
size_t threads[ 1 ] = { 16 };
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "Unable to enqueue kernel" );
// Read results
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof( outValues ), outValues, 0, NULL, NULL );
test_error( error, "Unable to read results" );
// Verify results
for( int i = 0; i < 17; i++ )
{
if( expectedValues[ i ] != outValues[ i ] )
{
if( i == 0 )
log_error( "ERROR: Output value %d from constant source global did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else if( i == 1 )
log_error( "ERROR: Output value %d from constant-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
else
log_error( "ERROR: Output value %d from variable-indexed constant array did not validate! (Expected %d, got %d)\n", i, expectedValues[ i ], outValues[ i ] );
return -1;
}
}
return 0;
}

View File

@@ -0,0 +1,121 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *sample_single_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
const char *sample_double_kernel = {
"__kernel void sample_test(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"
"__kernel void sample_test2(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n"};
int
test_createkernelsinprogram(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_program program;
cl_kernel kernel[2];
unsigned int num_kernels;
size_t lengths[2];
int err;
lengths[0] = strlen(sample_single_kernel);
program = clCreateProgramWithSource(context, 1, &sample_single_kernel, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 1, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 1) )
{
log_error("clCreateKernelsInProgram test failed for a single kernel\n");
return -1;
}
clReleaseKernel(kernel[0]);
clReleaseProgram(program);
lengths[0] = strlen(sample_double_kernel);
program = clCreateProgramWithSource(context, 1, &sample_double_kernel, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
err = clCreateKernelsInProgram(program, 2, kernel, &num_kernels);
if ( (err != CL_SUCCESS) || (num_kernels != 2) )
{
log_error("clCreateKernelsInProgram test failed for two kernels\n");
return -1;
}
log_info("clCreateKernelsInProgram test passed\n");
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
clReleaseProgram(program);
return err;
}

View File

@@ -0,0 +1,253 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
CL_MEM_USE_HOST_PTR,
CL_MEM_COPY_HOST_PTR,
0
};
const char* flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
};
int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
const size_t bufferSize = 256*256;
int src_flag_id;
MTdata d = init_genrand( gRandomSeed );
cl_char *initialData = (cl_char*)malloc(bufferSize);
cl_char *finalData = (cl_char*)malloc(bufferSize);
for (src_flag_id=0; src_flag_id < 5; src_flag_id++)
{
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kChar, (unsigned int)bufferSize, d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), initialData, &error);
else
memObject = clCreateBuffer(context, flag_set[src_flag_id], bufferSize * sizeof( cl_char ), NULL, &error);
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
{
error = clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize * sizeof( cl_char ), initialData, 0, NULL, NULL);
test_error( error, "clEnqueueWriteBuffer failed");
}
for( int i = 0; i < 128; i++ )
{
size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, length, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapBuffer call failed" );
log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
// Write into the region
for( size_t j = 0; j < length; j++ )
{
cl_char spin = (cl_char)genrand_int32( d );
// Test read AND write in one swipe
cl_char value = mappedRegion[ j ];
value = spin - value;
mappedRegion[ j ] = value;
// Also update the initial data array
value = initialData[ offset + j ];
value = spin - value;
initialData[ offset + j ] = value;
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, sizeof( cl_char ) * bufferSize, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < bufferSize; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d did not validate! Got %d, expected %d\n", (int)q, (int)finalData[ q ], (int)initialData[ q ] );
free( initialData );
free( finalData );
free_mtdata(d);
return -1;
}
}
} // cl_mem flags
free( initialData );
free( finalData );
free_mtdata(d);
return 0;
}
int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int error;
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
const size_t imageSize = 256;
int src_flag_id;
cl_uint *initialData;
cl_uint *finalData;
MTdata d;
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
initialData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
finalData = (cl_uint*)malloc(imageSize * imageSize * 4 *sizeof(cl_uint));
if( !is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &format ) )
{
log_error( "ERROR: Test requires basic OpenCL 1.0 format CL_RGBA:CL_UNSIGNED_INT32, which is unsupported by this device!\n" );
free(initialData);
free(finalData);
return -1;
}
d = init_genrand( gRandomSeed );
for (src_flag_id=0; src_flag_id < 5; src_flag_id++) {
clMemWrapper memObject;
log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
generate_random_data( kUInt, (unsigned int)( imageSize * imageSize ), d, initialData );
if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, initialData, &error );
else
memObject = create_image_2d( context, CL_MEM_READ_WRITE | flag_set[src_flag_id], &format,
imageSize, imageSize, 0, NULL, &error );
test_error( error, "Unable to create testing buffer" );
if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, NULL, NULL, initialData, 0, NULL, NULL);
test_error( error, "Unable to write to testing buffer" );
}
for( int i = 0; i < 128; i++ )
{
size_t offset[3], region[3];
size_t rowPitch;
offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
offset[ 2 ] = 0;
region[ 2 ] = 1;
cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
if( error != CL_SUCCESS )
{
print_error( error, "clEnqueueMapImage call failed" );
log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
// Write into the region
cl_uint *mappedPtr = mappedRegion;
for( size_t y = 0; y < region[ 1 ]; y++ )
{
for( size_t x = 0; x < region[ 0 ] * 4; x++ )
{
cl_int spin = (cl_int)random_in_range( 16, 1024, d );
cl_int value;
// Test read AND write in one swipe
value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
value = spin - value;
mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
// Also update the initial data array
value = initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ];
value = spin - value;
initialData[ ( ( offset[ 1 ] + y ) * imageSize + offset[ 0 ] ) * 4 + x ] = value;
}
}
// Unmap
error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
test_error( error, "Unable to unmap buffer" );
}
// Final validation: read actual values of buffer and compare against our reference
size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
test_error( error, "Unable to read results" );
for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
{
if( initialData[ q ] != finalData[ q ] )
{
log_error( "ERROR: Sample %d (coord %d,%d) did not validate! Got %d, expected %d\n", (int)q, (int)( ( q / 4 ) % imageSize ), (int)( ( q / 4 ) / imageSize ),
(int)finalData[ q ], (int)initialData[ q ] );
free(initialData);
free(finalData);
free_mtdata(d);
return -1;
}
}
} // cl_mem_flags
free(initialData);
free(finalData);
free_mtdata(d);
return 0;
}

View File

@@ -0,0 +1,384 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/conversions.h"
#include "../../test_common/harness/typeWrappers.h"
#define DECLARE_S2V_IDENT_KERNEL(srctype,dsttype,size) \
"__kernel void test_conversion(__global " srctype " *sourceValues, __global " dsttype #size " *destValues )\n" \
"{\n" \
" int tid = get_global_id(0);\n" \
" " srctype " src = sourceValues[tid];\n" \
"\n" \
" destValues[tid] = (" dsttype #size ")src;\n" \
"\n" \
"}\n"
#define DECLARE_S2V_IDENT_KERNELS(srctype,dsttype) \
{ \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,2), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,4), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,8), \
DECLARE_S2V_IDENT_KERNEL(srctype,#dsttype,16) \
}
#define DECLARE_EMPTY { NULL, NULL, NULL, NULL, NULL }
/* Note: the next four arrays all must match in order and size to the ExplicitTypes enum in conversions.h!!! */
#define DECLARE_S2V_IDENT_KERNELS_SET(srctype) \
{ \
DECLARE_S2V_IDENT_KERNELS(#srctype,bool), \
DECLARE_S2V_IDENT_KERNELS(#srctype,char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uchar), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned char), \
DECLARE_S2V_IDENT_KERNELS(#srctype,short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ushort), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned short), \
DECLARE_S2V_IDENT_KERNELS(#srctype,int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,uint), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned int), \
DECLARE_S2V_IDENT_KERNELS(#srctype,long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,ulong), \
DECLARE_S2V_IDENT_KERNELS(#srctype,unsigned long), \
DECLARE_S2V_IDENT_KERNELS(#srctype,float), \
DECLARE_EMPTY \
}
#define DECLARE_EMPTY_SET \
{ \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY, \
DECLARE_EMPTY \
}
/* The overall array */
const char * kernel_explicit_s2v_set[kNumExplicitTypes][kNumExplicitTypes][5] = {
DECLARE_S2V_IDENT_KERNELS_SET(bool),
DECLARE_S2V_IDENT_KERNELS_SET(char),
DECLARE_S2V_IDENT_KERNELS_SET(uchar),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned char),
DECLARE_S2V_IDENT_KERNELS_SET(short),
DECLARE_S2V_IDENT_KERNELS_SET(ushort),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned short),
DECLARE_S2V_IDENT_KERNELS_SET(int),
DECLARE_S2V_IDENT_KERNELS_SET(uint),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned int),
DECLARE_S2V_IDENT_KERNELS_SET(long),
DECLARE_S2V_IDENT_KERNELS_SET(ulong),
DECLARE_S2V_IDENT_KERNELS_SET(unsigned long),
DECLARE_S2V_IDENT_KERNELS_SET(float),
DECLARE_EMPTY_SET
};
int test_explicit_s2v_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *programSrc,
ExplicitType srcType, unsigned int count, ExplicitType destType, unsigned int vecSize, void *inputData )
{
clProgramWrapper program;
clKernelWrapper kernel;
int error;
clMemWrapper streams[2];
void *outData;
unsigned char convertedData[ 8 ]; /* Max type size is 8 bytes */
size_t threadSize[3], groupSize[3];
unsigned int i, s;
unsigned char *inPtr, *outPtr;
size_t paramSize, destTypeSize;
const char* finalProgramSrc[2] = {
"", // optional pragma
programSrc
};
if (srcType == kDouble || destType == kDouble) {
finalProgramSrc[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
}
if( programSrc == NULL )
return 0;
paramSize = get_explicit_type_size( srcType );
destTypeSize = get_explicit_type_size( destType );
size_t destStride = destTypeSize * vecSize;
outData = malloc( destStride * count );
if( create_single_kernel_helper( context, &program, &kernel, 2, finalProgramSrc, "test_conversion" ) )
{
log_info( "****** %s%s *******\n", finalProgramSrc[0], finalProgramSrc[1] );
return -1;
}
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), paramSize * count, inputData, &error);
test_error( error, "clCreateBuffer failed");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), destStride * count, NULL, &error);
test_error( error, "clCreateBuffer failed");
/* Set the arguments */
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1] );
test_error( error, "Unable to set indexed kernel arguments" );
/* Run the kernel */
threadSize[0] = count;
error = get_max_common_work_group_size( context, kernel, threadSize[0], &groupSize[0] );
test_error( error, "Unable to get work group size to use" );
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threadSize, groupSize, 0, NULL, NULL );
test_error( error, "Unable to execute test kernel" );
/* Now verify the results. Each value should have been duplicated four times, and we should be able to just
do a memcpy instead of relying on the actual type of data */
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, destStride * count, outData, 0, NULL, NULL );
test_error( error, "Unable to read output values!" );
inPtr = (unsigned char *)inputData;
outPtr = (unsigned char *)outData;
for( i = 0; i < count; i++ )
{
/* Convert the input data element to our output data type to compare against */
convert_explicit_value( (void *)inPtr, (void *)convertedData, srcType, false, kDefaultRoundingType, destType );
/* Now compare every element of the vector */
for( s = 0; s < vecSize; s++ )
{
if( memcmp( convertedData, outPtr + destTypeSize * s, destTypeSize ) != 0 )
{
unsigned int *p = (unsigned int *)outPtr;
log_error( "ERROR: Output value %d:%d does not validate for size %d:%d!\n", i, s, vecSize, (int)destTypeSize );
log_error( " Input: 0x%0*x\n", (int)( paramSize * 2 ), *(unsigned int *)inPtr & ( 0xffffffff >> ( 32 - paramSize * 8 ) ) );
log_error( " Actual: 0x%08x 0x%08x 0x%08x 0x%08x\n", p[ 0 ], p[ 1 ], p[ 2 ], p[ 3 ] );
return -1;
}
}
inPtr += paramSize;
outPtr += destStride;
}
free( outData );
return 0;
}
int test_explicit_s2v_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, ExplicitType srcType,
unsigned int count, void *inputData )
{
unsigned int sizes[] = { 2, 4, 8, 16, 0 };
int i, dstType, failed = 0;
for( dstType = kBool; dstType < kNumExplicitTypes; dstType++ )
{
if( dstType == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
continue;
if (( dstType == kLong || dstType == kULong ) && !gHasLong )
continue;
for( i = 0; sizes[i] != 0; i++ )
{
if( dstType != srcType )
continue;
if( strchr( get_explicit_type_name( (ExplicitType)srcType ), ' ' ) != NULL ||
strchr( get_explicit_type_name( (ExplicitType)dstType ), ' ' ) != NULL )
continue;
if( test_explicit_s2v_function( deviceID, context, queue, kernel_explicit_s2v_set[ srcType ][ dstType ][ i ],
srcType, count, (ExplicitType)dstType, sizes[ i ], inputData ) != 0 )
{
log_error( "ERROR: Explicit cast of scalar %s to vector %s%d FAILED; skipping other %s vector tests\n",
get_explicit_type_name(srcType), get_explicit_type_name((ExplicitType)dstType), sizes[i], get_explicit_type_name((ExplicitType)dstType) );
failed = -1;
break;
}
}
}
return failed;
}
int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
log_info( "NOTE: Boolean vectors not defined in OpenCL 1.0. Skipping test.\n" );
return 0;
#if 0
bool data[128];
generate_random_data( kBool, 128, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kBool, 128, data );
#endif
}
int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kChar, 128, seed, data );
return test_explicit_s2v_function_set( deviceID, context, queue, kChar, 128, data );
}
int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned char data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUChar, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUChar, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedChar, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned short data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUShort, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUShort, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedShort, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
unsigned int data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kUInt, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kUInt, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedInt, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_long data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kLong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
cl_ulong data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kULong, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kULong, 128, data ) != 0 )
return -1;
if( test_explicit_s2v_function_set( deviceID, context, queue, kUnsignedLong, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
float data[128];
RandomSeed seed(gRandomSeed);
generate_random_data( kFloat, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kFloat, 128, data ) != 0 )
return -1;
return 0;
}
int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
double data[128];
RandomSeed seed(gRandomSeed);
if( !is_extension_available( deviceID, "cl_khr_fp64" ) ) {
log_info("Extension cl_khr_fp64 not supported. Skipping test.\n");
return 0;
}
generate_random_data( kDouble, 128, seed, data );
if( test_explicit_s2v_function_set( deviceID, context, queue, kDouble, 128, data ) != 0 )
return -1;
return 0;
}

View File

@@ -0,0 +1,160 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *float2int_kernel_code =
"__kernel void test_float2int(__global float *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = (int)src[tid];\n"
"\n"
"}\n";
int
verify_float2int(cl_float *inptr, cl_int *outptr, int n)
{
int i;
for (i=0; i<n; i++)
{
if (outptr[i] != (int)inptr[i])
{
log_error("FLOAT2INT test failed\n");
return -1;
}
}
log_info("FLOAT2INT test passed\n");
return 0;
}
int
test_float2int(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_float *input_ptr;
cl_int *output_ptr;
cl_program program;
cl_kernel kernel;
void *values[2];
size_t lengths[1];
size_t threads[1];
int err;
int i;
MTdata d;
input_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * num_elements, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
d = init_genrand( gRandomSeed );
for (i=0; i<num_elements; i++)
input_ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*num_elements, (void *)input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clWriteArray failed\n");
return -1;
}
lengths[0] = strlen(float2int_kernel_code);
program = clCreateProgramWithSource(context, 1, &float2int_kernel_code, lengths, NULL);
if (!program)
{
log_error("clCreateProgramWithSource failed\n");
return -1;
}
err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clBuildProgramExecutable failed\n");
return -1;
}
kernel = clCreateKernel(program, "test_float2int", NULL);
if (!kernel)
{
log_error("clCreateKernel failed\n");
return -1;
}
values[0] = streams[0];
values[1] = streams[1];
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
err = verify_float2int(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,270 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
const char *fpadd_kernel_code =
"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub_kernel_code =
"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul_kernel_code =
"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
int
verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float test failed\n");
return -1;
}
}
log_info("FP_ADD float test passed\n");
return 0;
}
int
verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float test failed\n");
return -1;
}
}
log_info("FP_SUB float test passed\n");
return 0;
}
int
verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float test failed\n");
return -1;
}
}
log_info("FP_MUL float test passed\n");
return 0;
}
int
test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error( err, "clEnqueueWriteBuffer failed.");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 1:
err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
case 2:
err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
free_mtdata( d );
return err;
}

View File

@@ -0,0 +1,268 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "../../test_common/harness/rounding_mode.h"
#include "procs.h"
const char *fpadd2_kernel_code =
"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub2_kernel_code =
"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul2_kernel_code =
"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float2 test failed\n");
return -1;
}
}
log_info("FP_ADD float2 test passed\n");
return 0;
}
int
verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float2 test failed\n");
return -1;
}
}
log_info("FP_SUB float2 test passed\n");
return 0;
}
int
verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float2 test failed\n");
return -1;
}
}
log_info("FP_MUL float2 test passed\n");
return 0;
}
int
test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 2 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*2; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
free_mtdata(d);
d = NULL;
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 1:
err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
case 2:
err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,269 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/rounding_mode.h"
const char *fpadd4_kernel_code =
"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
const char *fpsub4_kernel_code =
"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] - srcB[tid];\n"
"}\n";
const char *fpmul4_kernel_code =
"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] * srcB[tid];\n"
"}\n";
int
verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
log_error("FP_ADD float4 test failed\n");
return -1;
}
}
log_info("FP_ADD float4 test passed\n");
return 0;
}
int
verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] - inptrB[i];
if (r != outptr[i])
{
log_error("FP_SUB float4 test failed\n");
return -1;
}
}
log_info("FP_SUB float4 test passed\n");
return 0;
}
int
verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
{
float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] * inptrB[i];
if (r != outptr[i])
{
log_error("FP_MUL float4 test failed\n");
return -1;
}
}
log_info("FP_MUL float4 test passed\n");
return 0;
}
int
test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[4];
cl_program program[3];
cl_kernel kernel[3];
cl_float *input_ptr[3], *output_ptr, *p;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_float) * 4 * num_elements;
int isRTZ = 0;
RoundingMode oldMode = kDefaultRoundingMode;
// check for floating point capabilities
cl_device_fp_config single_config = 0;
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
if (err) {
log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
test_finish();
return -1;
}
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
{
//Check to make sure we are an embedded device
char profile[32];
err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
if( err )
{
log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
test_finish();
return -1;
}
if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
{
log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
test_finish();
return -1;
}
isRTZ = 1;
oldMode = get_round();
}
input_ptr[0] = (cl_float*)malloc(length);
input_ptr[1] = (cl_float*)malloc(length);
input_ptr[2] = (cl_float*)malloc(length);
output_ptr = (cl_float*)malloc(length);
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), length, NULL, &err);
test_error( err, "clCreateBuffer failed.");
p = input_ptr[0];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[1];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
p = input_ptr[2];
for (i=0; i<num_elements*4; i++)
p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
free_mtdata(d);
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
test_error(err, "clEnqueueWriteBuffer failed");
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
test_error( err, "create_single_kernel_helper failed");
err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
test_error( err, "create_single_kernel_helper failed");
err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
test_error( err, "clSetKernelArgs failed.");
threads[0] = (unsigned int)num_elements;
for (i=0; i<3; i++)
{
err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
test_error( err, "clEnqueueNDRangeKernel failed.");
err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
test_error( err, "clEnqueueReadBuffer failed.");
if( isRTZ )
set_round( kRoundTowardZero, kfloat );
switch (i)
{
case 0:
err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 1:
err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
case 2:
err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
break;
}
if( isRTZ )
set_round( oldMode, kfloat );
if (err)
break;
}
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseMemObject(streams[2]);
clReleaseMemObject(streams[3]);
for (i=0; i<3; i++)
{
clReleaseKernel(kernel[i]);
clReleaseProgram(program[i]);
}
free(input_ptr[0]);
free(input_ptr[1]);
free(input_ptr[2]);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,284 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "procs.h"
#include <ctype.h>
const char *work_offset_test[] = {
"__kernel void test( __global int * outputID_A, \n"
" __global int * outputID_B, __global int * outputID_C )\n"
"{\n"
" size_t id0 = get_local_id( 0 ) + get_group_id( 0 ) * get_local_size( 0 );\n"
" size_t id1 = get_local_id( 1 ) + get_group_id( 1 ) * get_local_size( 1 );\n"
" size_t id2 = get_local_id( 2 ) + get_group_id( 2 ) * get_local_size( 2 );\n"
" size_t id = ( id2 * get_global_size( 0 ) * get_global_size( 1 ) ) + ( id1 * get_global_size( 0 ) ) + id0;\n"
"\n"
" outputID_A[ id ] = get_global_id( 0 );\n"
" outputID_B[ id ] = get_global_id( 1 );\n"
" outputID_C[ id ] = get_global_id( 2 );\n"
"}\n"
};
#define MAX_TEST_ITEMS 16 * 16 * 16
#define NUM_TESTS 16
#define MAX_OFFSET 256
#define CHECK_RANGE( v, m, c ) \
if( ( v >= (cl_int)m ) || ( v < 0 ) ) \
{ \
log_error( "ERROR: ouputID_%c[%lu]: %d is < 0 or >= %lu\n", c, i, v, m ); \
return -1; \
}
int check_results( size_t threads[], size_t offsets[], cl_int outputA[], cl_int outputB[], cl_int outputC[] )
{
size_t offsettedSizes[ 3 ] = { threads[ 0 ] + offsets[ 0 ], threads[ 1 ] + offsets[ 1 ], threads[ 2 ] + offsets[ 2 ] };
size_t limit = threads[ 0 ] * threads[ 1 ] * threads[ 2 ];
static char counts[ MAX_OFFSET + 32 ][ MAX_OFFSET + 16 ][ MAX_OFFSET + 16 ];
memset( counts, 0, sizeof( counts ) );
for( size_t i = 0; i < limit; i++ )
{
// Check ranges first
CHECK_RANGE( outputA[ i ], offsettedSizes[ 0 ], 'A' )
CHECK_RANGE( outputB[ i ], offsettedSizes[ 1 ], 'B' )
CHECK_RANGE( outputC[ i ], offsettedSizes[ 2 ], 'C' )
// Now set the value in the map
counts[ outputA[ i ] ][ outputB[ i ] ][ outputC[ i ] ]++;
}
// Now check the map
int missed = 0, multiple = 0, errored = 0, corrected = 0;
for( size_t x = 0; x < offsettedSizes[ 0 ]; x++ )
{
for( size_t y = 0; y < offsettedSizes[ 1 ]; y++ )
{
for( size_t z = 0; z < offsettedSizes[ 2 ]; z++ )
{
const char * limitMsg = " (further errors of this type suppressed)";
if( ( x >= offsets[ 0 ] ) && ( y >= offsets[ 1 ] ) && ( z >= offsets[ 2 ] ) )
{
if( counts[ x ][ y ][ z ] < 1 )
{
if( missed < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was missed%s\n", x, y, z, ( missed == 2 ) ? limitMsg : "" );
missed++;
}
else if( counts[ x ][ y ][ z ] > 1 )
{
if( multiple < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was returned multiple times%s\n", x, y, z, ( multiple == 2 ) ? limitMsg : "" );
multiple++;
}
}
else
{
if( counts[ x ][ y ][ z ] > 0 )
{
if( errored < 3 )
log_error( "ERROR: Map value (%ld,%ld,%ld) was erroneously returned%s\n", x, y, z, ( errored == 2 ) ? limitMsg : "" );
errored++;
}
}
}
}
}
if( missed || multiple || errored )
{
size_t diffs[3] = { ( offsets[ 0 ] > threads[ 0 ] ? 0 : threads[ 0 ] - offsets[ 0 ] ),
( offsets[ 1 ] > threads[ 1 ] ? 0 : threads[ 1 ] - offsets[ 1 ] ),
( offsets[ 2 ] > threads[ 2 ] ? 0 : threads[ 2 ] - offsets[ 2 ] ) };
int diff = (int)( ( threads[ 0 ] - diffs[ 0 ] ) * ( threads[ 1 ] - diffs[ 1 ] ) * ( threads[ 2 ] - diffs[ 2 ] ) );
if( ( multiple == 0 ) && ( missed == diff ) && ( errored == diff ) )
log_error( "ERROR: Global work offset values are not being respected by get_global_id()\n" );
else
log_error( "ERROR: Global work offset values did not function as expected (%d missed, %d reported multiple times, %d erroneously hit)\n",
missed, multiple, errored );
}
return ( missed | multiple | errored | corrected );
}
int test_global_work_offsets(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 7 ];
int error;
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
cl_int outputA[ MAX_TEST_ITEMS ], outputB[ MAX_TEST_ITEMS ], outputC[ MAX_TEST_ITEMS ];
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, work_offset_test, "test" ) != 0 )
{
return -1;
}
//// Create some output streams
// Use just one output array to init them all (no need to init every single stack storage here)
memset( outputA, 0xff, sizeof( outputA ) );
for( int i = 0; i < 3; i++ )
{
streams[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof(outputA), outputA, &error );
test_error( error, "Unable to create output array" );
}
// Run a few different times
MTdata seed = init_genrand( gRandomSeed );
for( int test = 0; test < NUM_TESTS; test++ )
{
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
threads[ 0 ] = random_in_range( 1, 32, seed );
threads[ 1 ] = random_in_range( 1, 16, seed );
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
// Make sure we get the local thread count right
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
test_error( error, "Unable to determine local work group sizes" );
// Randomize some offsets
for( int j = 0; j < 3; j++ )
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
// Now set up and run
for( int i = 0; i < 3; i++ )
{
error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
test_error( error, "Unable to set indexed kernel arguments" );
}
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Read our results back now
cl_int * resultBuffers[] = { outputA, outputB, outputC };
for( int i = 0; i < 3; i++ )
{
error = clEnqueueReadBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( outputA ), resultBuffers[ i ], 0, NULL, NULL );
test_error( error, "Unable to get result data" );
}
// Now we need to check the results. The outputs should have one entry for each possible ID,
// but they won't be in order, so we need to construct a count map to determine what we got
if( check_results( threads, offsets, outputA, outputB, outputC ) )
{
log_error( "\t(Test failed for global dim %ld,%ld,%ld, local dim %ld,%ld,%ld, offsets %ld,%ld,%ld)\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
return -1;
}
}
free_mtdata(seed);
// All done!
return 0;
}
const char *get_offset_test[] = {
"__kernel void test( __global int * outOffsets )\n"
"{\n"
" // We use local ID here so we don't have to worry about offsets\n"
" // Also note that these should be the same for ALL threads, so we won't worry about contention\n"
" outOffsets[ 0 ] = (int)get_global_offset( 0 );\n"
" outOffsets[ 1 ] = (int)get_global_offset( 1 );\n"
" outOffsets[ 2 ] = (int)get_global_offset( 2 );\n"
"}\n"
};
int test_get_global_offset(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 1 ];
int error;
size_t threads[] = {1,1,1}, localThreads[] = {1,1,1}, offsets[] = {0,0,0};
cl_int outOffsets[ 3 ];
// Create the kernel
if( create_single_kernel_helper( context, &program, &kernel, 1, get_offset_test, "test" ) != 0 )
{
return -1;
}
// Create some output streams, and storage for a single control ID
memset( outOffsets, 0xff, sizeof( outOffsets ) );
streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR), sizeof( outOffsets ), outOffsets, &error );
test_error( error, "Unable to create control ID buffer" );
// Run a few different times
MTdata seed = init_genrand( gRandomSeed );
for( int test = 0; test < NUM_TESTS; test++ )
{
// Choose a random combination of thread size, but in total less than MAX_TEST_ITEMS
threads[ 0 ] = random_in_range( 1, 32, seed );
threads[ 1 ] = random_in_range( 1, 16, seed );
threads[ 2 ] = random_in_range( 1, MAX_TEST_ITEMS / (int)( threads[ 0 ] * threads[ 1 ] ), seed );
// Make sure we get the local thread count right
error = get_max_common_3D_work_group_size( context, kernel, threads, localThreads );
test_error( error, "Unable to determine local work group sizes" );
// Randomize some offsets
for( int j = 0; j < 3; j++ )
offsets[ j ] = random_in_range( 0, MAX_OFFSET, seed );
log_info( "\tTesting %ld,%ld,%ld (%ld,%ld,%ld) with offsets (%ld,%ld,%ld)...\n",
threads[ 0 ], threads[ 1 ], threads[ 2 ], localThreads[ 0 ], localThreads[ 1 ], localThreads[ 2 ],
offsets[ 0 ], offsets[ 1 ], offsets[ 2 ] );
// Now set up and run
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
test_error( error, "Unable to set indexed kernel arguments" );
error = clEnqueueNDRangeKernel( queue, kernel, 3, offsets, threads, localThreads, 0, NULL, NULL );
test_error( error, "Kernel execution failed" );
// Read our results back now
error = clEnqueueReadBuffer( queue, streams[ 0 ], CL_TRUE, 0, sizeof( outOffsets ), outOffsets, 0, NULL, NULL );
test_error( error, "Unable to get result data" );
// And check!
int errors = 0;
for( int j = 0; j < 3; j++ )
{
if( outOffsets[ j ] != (cl_int)offsets[ j ] )
{
log_error( "ERROR: get_global_offset( %d ) did not return expected value (expected %ld, got %d)\n", j, offsets[ j ], outOffsets[ j ] );
errors++;
}
}
if( errors > 0 )
return errors;
}
free_mtdata(seed);
// All done!
return 0;
}

View File

@@ -0,0 +1,421 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
int lo_offset( int index, int vectorSize) { return index; }
int even_offset( int index, int vectorSize ) { return index * 2; }
int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
typedef int (*OffsetFunc)( int index, int vectorSize );
static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16};
static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16};
static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
// if input is size vector_sizes[i], output is size
// vector_sizes[out_vector_idx[i]]
// input type name is strcat(gentype, vector_size_names[i]);
// and output type name is
// strcat(gentype, vector_size_names[out_vector_idx[i]]);
static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
-1,-1,-1,-1,-1,-1,-1,5};
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
cl_int *input_ptr, *output_ptr, *p;
int err;
cl_uint i;
int hasDouble = is_extension_available( device, "cl_khr_fp64" );
cl_uint vectorSize, operatorToUse;
cl_uint type;
MTdata d;
int expressionMode;
int numExpressionModes = 2;
size_t length = sizeof(cl_int) * 4 * n_elems;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
p = input_ptr;
d = init_genrand( gRandomSeed );
for (i=0; i<4 * (cl_uint) n_elems; i++)
p[i] = genrand_int32(d);
free_mtdata(d); d = NULL;
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
cl_mem streams[2];
// skip double if unavailable
if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
continue;
if( !gHasLong &&
( 0 == strcmp( test_str_names[type], "long" )) &&
( 0 == strcmp( test_str_names[type], "ulong" )))
continue;
log_info( "%s", test_str_names[type] );
fflush( stdout );
// Set up data streams for the type
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
{
log_info( " %s", operatorToUse_names[ operatorToUse ] );
fflush( stdout );
for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint outVectorSize = out_vector_idx[vectorSize];
char expression[1024];
const char *source[] = {
"", // optional pragma string
"__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
"(__global ", test_str_names[type], vector_size_names[vectorSize],
" *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
" *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" ", test_str_names[type],
vector_size_names[out_vector_idx[vectorSize]],
" tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
" dst[tid] = tmp;\n"
"}\n"
};
if(expressionMode == 0) {
sprintf(expression, "srcA[tid]");
} else if(expressionMode == 1) {
switch(vector_sizes[vectorSize]) {
case 16:
sprintf(expression,
"((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
test_str_names[type]
);
break;
case 8:
sprintf(expression,
"((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
test_str_names[type]
);
break;
case 4:
sprintf(expression,
"((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
test_str_names[type]
);
break;
case 3:
sprintf(expression,
"((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
test_str_names[type]
);
break;
case 2:
sprintf(expression,
"((%s2)(srcA[tid].s0, srcA[tid].s1))",
test_str_names[type]
);
break;
default :
sprintf(expression, "srcA[tid]");
log_info("Default\n");
}
} else {
sprintf(expression, "srcA[tid]");
}
if (0 == strcmp( test_str_names[type], "double" ))
source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
memset_pattern4( output_ptr, &pattern, length );
err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
size_t size = elementCount / (vector_aligns[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
char *inP = (char *)input_ptr;
char *outP = (char *)output_ptr;
outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
( vector_sizes[ out_vector_idx[vectorSize] ] ) );
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
for( size_t e = 0; e < size; e++ )
{
if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
log_info("e is %d\n", (int)e);
fflush(stdout);
// break;
return -1;
}
inP += kSizes[type] * ( vector_aligns[vectorSize] );
outP += kSizes[type] * ( vector_aligns[outVectorSize] );
}
clReleaseKernel( kernel );
clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
}
}
clReleaseMemObject( streams[0] );
clReleaseMemObject( streams[1] );
log_info( "done\n" );
}
log_info("HiLoEO test passed\n");
free(input_ptr);
free(output_ptr);
return err;
}
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
{
cl_ulong array[8];
void *p = array;
size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
// was 1 << (vectorSize-1);
OffsetFunc f = offsetFuncs[ operatorToUse ];
size_t elementSize = kSizes[type];
if(vector_size_names[vectorSize][0] == '3') {
if(operatorToUse_names[operatorToUse][0] == 'h' ||
operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
{
cmpVectorSize = 1; // special case for vec3 ignored values
}
}
switch( elementSize )
{
case 1:
{
char *i = (char*)in;
char *o = (char*)out;
size_t j;
cl_uint k;
OffsetFunc f = offsetFuncs[ operatorToUse ];
for( k = 0; k < elementCount; k++ )
{
char *o2 = (char*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", %d", i[j] );
log_info( " } --> { %d", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", %d", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 2:
{
short *i = (short*)in;
short *o = (short*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
short *o2 = (short*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", %d", i[j] );
log_info( " } --> { %d", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", %d", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 4:
{
int *i = (int*)in;
int *o = (int*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
int *o2 = (int *)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
for( j = 0; j < cmpVectorSize; j++ )
{
/* Allow float nans to be binary different */
if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
{
log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", 0x%8.8x", i[j] );
log_info( " } --> { 0x%8.8x", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", 0x%8.8x", o[j] );
log_info( " }\n" );
return -1;
}
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
case 8:
{
cl_ulong *i = (cl_ulong*)in;
cl_ulong *o = (cl_ulong*)out;
size_t j;
cl_uint k;
for( k = 0; k < elementCount; k++ )
{
cl_ulong *o2 = (cl_ulong*)p;
for( j = 0; j < halfVectorSize; j++ )
o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
if( memcmp( o, o2, elementSize * cmpVectorSize ) )
{
log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
for( j = 1; j < halfVectorSize * 2; j++ )
log_info( ", 0x%16.16llx", i[j] );
log_info( " } --> { 0x%16.16llx", o[0] );
for( j = 1; j < halfVectorSize; j++ )
log_info( ", 0x%16.16llx", o[j] );
log_info( " }\n" );
return -1;
}
i += 2 * halfVectorSize;
o += halfVectorSize;
}
}
break;
default:
log_info( "Internal error. Unknown data type\n" );
return -2;
}
return 0;
}

View File

@@ -0,0 +1,276 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *hostptr_kernel_code =
"__kernel void test_hostptr(__global float *srcA, __global float *srcB, __global float *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" dst[tid] = srcA[tid] + srcB[tid];\n"
"}\n";
static const float MAX_ERR = 1e-5f;
static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
{
cl_float r;
int i;
for (i=0; i<n; i++)
{
r = inptrA[i] + inptrB[i];
if (r != outptr[i])
{
return -1;
}
}
return 0;
}
static void make_random_data(unsigned count, float *ptr, MTdata d)
{
cl_uint i;
for (i=0; i<count; i++)
ptr[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), MAKE_HEX_FLOAT( 0x1.0p32f, 0x1, 32), d);
}
static unsigned char *
generate_rgba8_image(int w, int h, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * 4);
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static unsigned char *
randomize_rgba8_image(unsigned char *ptr, int w, int h, MTdata d)
{
int i;
for (i=0; i<w*h*4; i++)
ptr[i] = (unsigned char)genrand_int32(d);
return ptr;
}
static int
verify_rgba8_image(unsigned char *image, unsigned char *outptr, int w, int h)
{
int i;
for (i=0; i<w*h*4; i++)
{
if (outptr[i] != image[i])
return -1;
}
return 0;
}
int
test_hostptr(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_float *input_ptr[2], *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[3]={0,0,0};
cl_image_format img_format;
cl_uchar *rgba8_inptr, *rgba8_outptr;
void *lock_buffer;
int img_width = 512;
int img_height = 512;
cl_int err;
MTdata d;
RoundingMode oldRoundMode;
int isRTZ = 0;
// Block to mark deletion of streams before deletion of host_ptr
{
clMemWrapper streams[7];
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
// Alloc buffers
input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
d = init_genrand( gRandomSeed );
rgba8_inptr = (cl_uchar *)generate_rgba8_image(img_width, img_height, d);
rgba8_outptr = (cl_uchar *)malloc(sizeof(cl_uchar) * 4 * img_width * img_height);
// Random data
make_random_data(num_elements, input_ptr[0], d);
make_random_data(num_elements, input_ptr[1], d);
// Create host-side input
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[0], &err);
test_error(err, "clCreateBuffer 0 failed");
// Create a copied input
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_float) * num_elements, input_ptr[1], &err);
test_error(err, "clCreateBuffer 1 failed");
// Create a host-side output
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_float) * num_elements, output_ptr, &err);
test_error(err, "clCreateBuffer 2 failed");
// Create a host-side input
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[3] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
test_error(err, "create_image_2d 3 failed");
// Create a copied input
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[4] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_inptr, &err);
test_error(err, "create_image_2d 4 failed");
// Create a host-side output
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[5] = create_image_2d(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
test_error(err, "create_image_2d 5 failed");
// Create a copied output
img_format.image_channel_data_type = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
streams[6] = create_image_2d(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), &img_format, img_width, img_height, 0, rgba8_outptr, &err);
test_error(err, "create_image_2d 6 failed");
err = create_single_kernel_helper(context, &program, &kernel,1, &hostptr_kernel_code, "test_hostptr" );
test_error(err, "create_single_kernel_helper failed");
// Execute kernel
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
test_error(err, "clSetKernelArg failed");
threads[0] = (size_t)num_elements;
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
test_error(err, "clEnqueueNDRangeKernel failed");
cl_float *data = (cl_float*) clEnqueueMapBuffer( queue, streams[2], CL_TRUE, CL_MAP_READ, 0, sizeof(cl_float) * num_elements, 0, NULL, NULL, &err );
test_error( err, "clEnqueueMapBuffer failed" );
//If we only support rtz mode
if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
if (isRTZ)
oldRoundMode = set_round(kRoundTowardZero, kfloat);
// Verify that we got the expected results back on the host side
err = verify_hostptr(input_ptr[0], input_ptr[1], data, num_elements);
if (err)
{
log_error("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
"and a CL_MEM_USE_HOST_PTR output did not return the expected results.\n");
} else {
log_info("Checking mapped data for kernel executed with CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR inputs "
"and a CL_MEM_USE_HOST_PTR output returned the expected results.\n");
}
if (isRTZ)
set_round(oldRoundMode, kfloat);
err = clEnqueueUnmapMemObject( queue, streams[2], data, 0, NULL, NULL );
test_error( err, "clEnqueueUnmapMemObject failed" );
size_t origin[3]={0,0,0}, region[3]={img_width, img_height, 1};
randomize_rgba8_image(rgba8_outptr, img_width, img_height, d);
free_mtdata(d); d = NULL;
// Copy from host-side to host-side
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR...\n");
err = clEnqueueCopyImage(queue, streams[3], streams[5],
origin, origin, region, 0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
log_info("clEnqueueCopyImage from CL_MEM_USE_HOST_PTR to CL_MEM_USE_HOST_PTR image passed.\n");
// test the lock buffer interface
log_info("Mapping the CL_MEM_USE_HOST_PTR image with clEnqueueMapImage...\n");
size_t row_pitch;
lock_buffer = clEnqueueMapImage(queue, streams[5], CL_TRUE,
CL_MAP_READ, origin, region,
&row_pitch, NULL,
0, NULL, NULL, &err);
test_error(err, "clEnqueueMapImage failed");
err = verify_rgba8_image(rgba8_inptr, (unsigned char*)lock_buffer, img_width, img_height);
if (err != CL_SUCCESS)
{
log_error("verify_rgba8_image FAILED after clEnqueueMapImage\n");
return -1;
}
log_info("verify_rgba8_image passed after clEnqueueMapImage\n");
err = clEnqueueUnmapMemObject(queue, streams[5], lock_buffer, 0, NULL, NULL);
test_error(err, "clEnqueueUnmapMemObject failed");
// Copy host-side to device-side and read back
log_info("clEnqueueCopyImage CL_MEM_USE_HOST_PTR to CL_MEM_COPY_HOST_PTR...\n");
err = clEnqueueCopyImage(queue, streams[3], streams[5],
origin, origin, region,
0, NULL, NULL);
test_error(err, "clEnqueueCopyImage failed");
err = clEnqueueReadImage(queue, streams[5], CL_TRUE, origin, region, 4*img_width, 0, rgba8_outptr, 0, NULL, NULL);
test_error(err, "clEnqueueReadImage failed");
err = verify_rgba8_image(rgba8_inptr, rgba8_outptr, img_width, img_height);
if (err != CL_SUCCESS)
{
log_error("verify_rgba8_image FAILED after clEnqueueCopyImage, clEnqueueReadImage\n");
return -1;
}
log_info("verify_rgba8_image passed after clEnqueueCopyImage, clEnqueueReadImage\n");
}
// cleanup
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr[0]);
free(input_ptr[1]);
free(output_ptr);
free(rgba8_inptr);
free(rgba8_outptr);
return err;
}

View File

@@ -0,0 +1,165 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
const char *conditional_kernel_code =
"__kernel void test_if(__global int *src, __global int *dst)\n"
"{\n"
" int tid = get_global_id(0);\n"
"\n"
" if (src[tid] == 0)\n"
" dst[tid] = 0x12345678;\n"
" else if (src[tid] == 1)\n"
" dst[tid] = 0x23456781;\n"
" else if (src[tid] == 2)\n"
" dst[tid] = 0x34567812;\n"
" else if (src[tid] == 3)\n"
" dst[tid] = 0x45678123;\n"
" else if (src[tid] == 4)\n"
" dst[tid] = 0x56781234;\n"
" else if (src[tid] == 5)\n"
" dst[tid] = 0x67812345;\n"
" else if (src[tid] == 6)\n"
" dst[tid] = 0x78123456;\n"
" else if (src[tid] == 7)\n"
" dst[tid] = 0x81234567;\n"
" else\n"
" dst[tid] = 0x7FFFFFFF;\n"
"\n"
"}\n";
const int results[] = {
0x12345678,
0x23456781,
0x34567812,
0x45678123,
0x56781234,
0x67812345,
0x78123456,
0x81234567,
};
int
verify_if(int *inptr, int *outptr, int n)
{
int r, i;
for (i=0; i<n; i++)
{
if (inptr[i] <= 7)
r = results[inptr[i]];
else
r = 0x7FFFFFFF;
if (r != outptr[i])
{
log_error("IF test failed\n");
return -1;
}
}
log_info("IF test passed\n");
return 0;
}
int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
cl_mem streams[2];
cl_int *input_ptr, *output_ptr;
cl_program program;
cl_kernel kernel;
size_t threads[1];
int err, i;
MTdata d = init_genrand( gRandomSeed );
size_t length = sizeof(cl_int) * num_elements;
input_ptr = (cl_int*)malloc(length);
output_ptr = (cl_int*)malloc(length);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[0])
{
log_error("clCreateBuffer failed\n");
return -1;
}
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
if (!streams[1])
{
log_error("clCreateBuffer failed\n");
return -1;
}
for (i=0; i<num_elements; i++)
input_ptr[i] = (int)get_random_float(0, 32, d);
free_mtdata(d); d = NULL;
err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
if (err)
return -1;
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
threads[0] = (unsigned int)num_elements;
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadArray failed\n");
return -1;
}
err = verify_if(input_ptr, output_ptr, num_elements);
// cleanup
clReleaseMemObject(streams[0]);
clReleaseMemObject(streams[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
free(input_ptr);
free(output_ptr);
return err;
}

View File

@@ -0,0 +1,643 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
static const char *image_to_image_kernel_integer_coord_code =
"\n"
"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static const char *image_to_image_kernel_float_coord_code =
"\n"
"__kernel void image_to_image_copy(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color;\n"
"\n"
" color = read_imagef(srcimg, sampler, (float2)((float)tid_x, (float)tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
"\n"
"}\n";
static const char *image_sum_kernel_integer_coord_code =
"\n"
"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color0;\n"
" float4 color1;\n"
"\n"
" color0 = read_imagef(srcimg0, sampler, (int2)(tid_x, tid_y));\n"
" color1 = read_imagef(srcimg1, sampler, (int2)(tid_x, tid_y));\n"
" write_imagef(dstimg, (int2)(tid_x, tid_y), color0 + color1);\n"
"\n"
"}\n";
static const char *image_sum_kernel_float_coord_code =
"\n"
"__kernel void image_sum(read_only image2d_t srcimg0, read_only image2d_t srcimg1, write_only image2d_t dstimg, sampler_t sampler)\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" float4 color0;\n"
" float4 color1;\n"
"\n"
" color0 = read_imagef(srcimg0, sampler, (float2)((float)tid_x, (float)tid_y));\n"
" color1 = read_imagef(srcimg1, sampler, (float2)((float)tid_x, (float)tid_y));\n"
" write_imagef(dstimg,(int2)(tid_x, tid_y), color0 + color1);\n"
"\n"
"}\n";
static unsigned char *
generate_initial_byte_image(int w, int h, int num_elements, unsigned char value)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
int i;
for (i = 0; i < w*h*num_elements; i++)
ptr[i] = value;
return ptr;
}
static unsigned char *
generate_expected_byte_image(unsigned char **input_data, int num_inputs, int w, int h, int num_elements)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
int i;
for (i = 0; i < w*h*num_elements; i++)
{
int j;
ptr[i] = 0;
for (j = 0; j < num_inputs; j++)
{
unsigned char *input = *(input_data + j);
ptr[i] += input[i];
}
}
return ptr;
}
static unsigned char *
generate_byte_image(int w, int h, int num_elements, MTdata d)
{
unsigned char *ptr = (unsigned char*)malloc(w * h * num_elements);
int i;
for (i = 0; i < w*h*num_elements; i++)
ptr[i] = (unsigned char)genrand_int32(d) & 31;
return ptr;
}
static int
verify_byte_image(unsigned char *image, unsigned char *outptr, int w, int h, int num_elements)
{
int i;
for (i = 0; i < w*h*num_elements; i++)
{
if (outptr[i] != image[i])
{
return -1;
}
}
return 0;
}
int
test_image_multipass_integer_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
int img_width = 512;
int img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;
cl_mem *input_streams;
cl_mem accum_streams[2];
unsigned char *expected_output;
unsigned char *output_ptr;
cl_kernel kernel[2];
int err;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
expected_output = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
// Create the accum images with initial data.
{
unsigned char *initial_data;
cl_mem_flags flags;
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[0])
{
log_error("create_image_2d failed\n");
free(expected_output);
free(output_ptr);
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free(expected_output);
free(output_ptr);
return -1;
}
accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[1])
{
log_error("create_image_2d failed\n");
free(expected_output);
free(output_ptr);
return -1;
}
err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free(expected_output);
free(output_ptr);
return -1;
}
free(initial_data);
}
// Set up the input data.
{
cl_mem_flags flags;
unsigned char **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
MTdata d;
input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
int i;
d = init_genrand( gRandomSeed );
for ( i = 0; i < num_input_streams; i++)
{
input_data[i] = generate_byte_image(img_width, img_height, 4, d);
input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!input_streams[i])
{
log_error("create_image_2d failed\n");
free_mtdata(d);
free(expected_output);
free(output_ptr);
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
origin, region, 0, 0,
input_data[i], 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free_mtdata(d);
free(expected_output);
free(output_ptr);
free(input_streams);
return -1;
}
}
free_mtdata(d); d = NULL;
expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
for ( i = 0; i < num_input_streams; i++)
{
free(input_data[i]);
}
free( input_data );
}
// Set up the kernels.
{
cl_program program[4];
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_integer_coord_code, "image_to_image_copy");
if (err)
{
log_error("Failed to create kernel 0: %d\n", err);
return -1;
}
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_integer_coord_code, "image_sum");
if (err)
{
log_error("Failed to create kernel 1: %d\n", err);
return -1;
}
clReleaseProgram(program[0]);
clReleaseProgram(program[1]);
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
{
size_t threads[3] = {0, 0, 0};
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
int i;
{
cl_mem accum_input;
cl_mem accum_output;
err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
for (i = 1; i < num_input_streams; i++)
{
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
}
// Copy the last accum into the other one.
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
origin, region, 0, 0,
(void *)output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
if (err)
{
log_error("IMAGE_MULTIPASS test failed.\n");
}
else
{
log_info("IMAGE_MULTIPASS test passed\n");
}
}
clReleaseSampler(sampler);
}
// cleanup
clReleaseMemObject(accum_streams[0]);
clReleaseMemObject(accum_streams[1]);
{
int i;
for (i = 0; i < num_input_streams; i++)
{
clReleaseMemObject(input_streams[i]);
}
}
free(input_streams);
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
free(expected_output);
free(output_ptr);
return err;
}
int
test_image_multipass_float_coord(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
int img_width = 512;
int img_height = 512;
cl_image_format img_format;
int num_input_streams = 8;
cl_mem *input_streams;
cl_mem accum_streams[2];
unsigned char *expected_output;
unsigned char *output_ptr;
cl_kernel kernel[2];
int err;
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
img_format.image_channel_order = CL_RGBA;
img_format.image_channel_data_type = CL_UNORM_INT8;
output_ptr = (unsigned char*)malloc(sizeof(unsigned char) * 4 * img_width * img_height);
// Create the accum images with initial data.
{
unsigned char *initial_data;
cl_mem_flags flags;
initial_data = generate_initial_byte_image(img_width, img_height, 4, 0xF0);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
accum_streams[0] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[0])
{
log_error("create_image_2d failed\n");
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, accum_streams[0], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
return -1;
}
accum_streams[1] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!accum_streams[1])
{
log_error("create_image_2d failed\n");
return -1;
}
err = clEnqueueWriteImage(queue, accum_streams[1], CL_TRUE,
origin, region, 0, 0,
initial_data, 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
return -1;
}
free(initial_data);
}
// Set up the input data.
{
cl_mem_flags flags;
unsigned char **input_data = (unsigned char **)malloc(sizeof(unsigned char*) * num_input_streams);
MTdata d;
input_streams = (cl_mem*)malloc(sizeof(cl_mem) * num_input_streams);
flags = (cl_mem_flags)(CL_MEM_READ_WRITE);
int i;
d = init_genrand( gRandomSeed );
for ( i = 0; i < num_input_streams; i++)
{
input_data[i] = generate_byte_image(img_width, img_height, 4, d);
input_streams[i] = create_image_2d(context, flags, &img_format, img_width, img_height, 0, NULL, NULL);
if (!input_streams[i])
{
log_error("create_image_2d failed\n");
free(input_data);
free(input_streams);
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueWriteImage(queue, input_streams[i], CL_TRUE,
origin, region, 0, 0,
input_data[i], 0, NULL, NULL);
if (err)
{
log_error("clWriteImage failed: %d\n", err);
free(input_data);
free(input_streams);
return -1;
}
}
free_mtdata(d); d = NULL;
expected_output = generate_expected_byte_image(input_data, num_input_streams, img_width, img_height, 4);
for ( i = 0; i < num_input_streams; i++)
{
free(input_data[i]);
}
free(input_data);
}
// Set up the kernels.
{
cl_program program[2];
err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &image_to_image_kernel_float_coord_code, "image_to_image_copy");
if (err)
{
log_error("Failed to create kernel 2: %d\n", err);
return -1;
}
err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &image_sum_kernel_float_coord_code, "image_sum");
if (err)
{
log_error("Failed to create kernel 3: %d\n", err);
return -1;
}
clReleaseProgram(program[0]);
clReleaseProgram(program[1]);
}
cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
test_error(err, "clCreateSampler failed");
{
size_t threads[3] = {0, 0, 0};
threads[0] = (size_t)img_width;
threads[1] = (size_t)img_height;
int i;
{
cl_mem accum_input;
cl_mem accum_output;
err = clSetKernelArg(kernel[0], 0, sizeof input_streams[0], &input_streams[0]);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_streams[0], &accum_streams[0]);
err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
for (i = 1; i < num_input_streams; i++)
{
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[1], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[1], 1, sizeof input_streams[i], &input_streams[i]);
err |= clSetKernelArg(kernel[1], 2, sizeof accum_output, &accum_output);
err |= clSetKernelArg(kernel[1], 3, sizeof sampler, &sampler);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[1], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
}
// Copy the last accum into the other one.
accum_input = accum_streams[(i-1)%2];
accum_output = accum_streams[i%2];
err = clSetKernelArg(kernel[0], 0, sizeof accum_input, &accum_input);
err |= clSetKernelArg(kernel[0], 1, sizeof accum_output, &accum_output);
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed\n");
return -1;
}
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, NULL );
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel failed\n");
return -1;
}
size_t origin[3] = {0, 0, 0}, region[3] = {img_width, img_height, 1};
err = clEnqueueReadImage(queue, accum_output, CL_TRUE,
origin, region, 0, 0,
(void *)output_ptr, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clReadImage failed\n");
return -1;
}
err = verify_byte_image(expected_output, output_ptr, img_width, img_height, 4);
if (err)
{
log_error("IMAGE_MULTIPASS test failed.\n");
}
else
{
log_info("IMAGE_MULTIPASS test passed\n");
}
}
}
// cleanup
clReleaseSampler(sampler);
clReleaseMemObject(accum_streams[0]);
clReleaseMemObject(accum_streams[1]);
{
int i;
for (i = 0; i < num_input_streams; i++)
{
clReleaseMemObject(input_streams[i]);
}
}
clReleaseKernel(kernel[0]);
clReleaseKernel(kernel[1]);
free(expected_output);
free(output_ptr);
free(input_streams);
return err;
}

View File

@@ -0,0 +1,251 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "../../test_common/harness/compat.h"
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "procs.h"
#include "../../test_common/harness/typeWrappers.h"
#include "../../test_common/harness/imageHelpers.h"
#include "../../test_common/harness/conversions.h"
static const char *param_kernel[] = {
"__kernel void test_fn(read_only image2d_t srcimg, sampler_t sampler, __global float4 *results )\n"
"{\n"
" int tid_x = get_global_id(0);\n"
" int tid_y = get_global_id(1);\n"
" results[ tid_y * get_image_width( srcimg ) + tid_x ] = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
"\n"
"}\n" };
int validate_results( size_t width, size_t height, cl_image_format &format, char *inputData, cl_float *actualResults )
{
for( size_t i = 0; i < width * height; i++ )
{
cl_float expected[ 4 ], tolerance;
switch( format.image_channel_data_type )
{
case CL_UNORM_INT8:
{
cl_uchar *p = (cl_uchar *)inputData;
expected[ 0 ] = p[ 0 ] / 255.f;
expected[ 1 ] = p[ 1 ] / 255.f;
expected[ 2 ] = p[ 2 ] / 255.f;
expected[ 3 ] = p[ 3 ] / 255.f;
tolerance = 1.f / 255.f;
break;
}
case CL_SNORM_INT8:
{
cl_char *p = (cl_char *)inputData;
expected[ 0 ] = fmaxf( p[ 0 ] / 127.f, -1.f );
expected[ 1 ] = fmaxf( p[ 1 ] / 127.f, -1.f );
expected[ 2 ] = fmaxf( p[ 2 ] / 127.f, -1.f );
expected[ 3 ] = fmaxf( p[ 3 ] / 127.f, -1.f );
tolerance = 1.f / 127.f;
break;
}
case CL_UNSIGNED_INT8:
{
cl_uchar *p = (cl_uchar *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 1.f / 127.f;
break;
}
case CL_SIGNED_INT8:
{
cl_short *p = (cl_short *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 1.f / 127.f;
break;
}
case CL_UNORM_INT16:
{
cl_ushort *p = (cl_ushort *)inputData;
expected[ 0 ] = p[ 0 ] / 65535.f;
expected[ 1 ] = p[ 1 ] / 65535.f;
expected[ 2 ] = p[ 2 ] / 65535.f;
expected[ 3 ] = p[ 3 ] / 65535.f;
tolerance = 1.f / 65535.f;
break;
}
case CL_UNSIGNED_INT32:
{
cl_uint *p = (cl_uint *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 0.0001f;
break;
}
case CL_FLOAT:
{
cl_float *p = (cl_float *)inputData;
expected[ 0 ] = p[ 0 ];
expected[ 1 ] = p[ 1 ];
expected[ 2 ] = p[ 2 ];
expected[ 3 ] = p[ 3 ];
tolerance = 0.0001f;
break;
}
default:
// Should never get here
break;
}
if( format.image_channel_order == CL_BGRA )
{
cl_float tmp = expected[ 0 ];
expected[ 0 ] = expected[ 2 ];
expected[ 2 ] = tmp;
}
// Within an error tolerance, make sure the results match
cl_float error1 = fabsf( expected[ 0 ] - actualResults[ 0 ] );
cl_float error2 = fabsf( expected[ 1 ] - actualResults[ 1 ] );
cl_float error3 = fabsf( expected[ 2 ] - actualResults[ 2 ] );
cl_float error4 = fabsf( expected[ 3 ] - actualResults[ 3 ] );
if( error1 > tolerance || error2 > tolerance || error3 > tolerance || error4 > tolerance )
{
log_error( "ERROR: Sample %d did not validate against expected results for %d x %d %s:%s image\n", (int)i, (int)width, (int)height,
GetChannelOrderName( format.image_channel_order ), GetChannelTypeName( format.image_channel_data_type ) );
log_error( " Expected: %f %f %f %f\n", (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ] );
log_error( " Actual: %f %f %f %f\n", (float)actualResults[ 0 ], (float)actualResults[ 1 ], (float)actualResults[ 2 ], (float)actualResults[ 3 ] );
// Check real quick a special case error here
cl_float error1 = fabsf( expected[ 3 ] - actualResults[ 0 ] );
cl_float error2 = fabsf( expected[ 2 ] - actualResults[ 1 ] );
cl_float error3 = fabsf( expected[ 1 ] - actualResults[ 2 ] );
cl_float error4 = fabsf( expected[ 0 ] - actualResults[ 3 ] );
if( error1 <= tolerance && error2 <= tolerance && error3 <= tolerance && error4 <= tolerance )
{
log_error( "\t(Kernel did not respect change in channel order)\n" );
}
return -1;
}
// Increment and go
actualResults += 4;
inputData += get_format_type_size( &format ) * 4;
}
return 0;
}
int test_image_param(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
{
size_t sizes[] = { 64, 100, 128, 250, 512 };
cl_image_format formats[] = { { CL_RGBA, CL_UNORM_INT8 }, { CL_RGBA, CL_UNORM_INT16 }, { CL_RGBA, CL_FLOAT }, { CL_BGRA, CL_UNORM_INT8 } };
ExplicitType types[] = { kUChar, kUShort, kFloat, kUChar };
int error;
size_t i, j, idx;
size_t threads[ 2 ];
MTdata d;
const size_t numSizes = sizeof( sizes ) / sizeof( sizes[ 0 ] );
const size_t numFormats = sizeof( formats ) / sizeof( formats[ 0 ] );
const size_t numAttempts = numSizes * numFormats;
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ numAttempts ][ 2 ];
BufferOwningPtr<char> inputs[ numAttempts ];
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
d = init_genrand( gRandomSeed );
for( i = 0, idx = 0; i < numSizes; i++ )
{
for( j = 0; j < numFormats; j++, idx++ )
{
// For each attempt, we create a pair: an input image, whose parameters keep changing, and an output buffer
// that we can read values from. The output buffer will remain consistent to ensure that any changes we
// witness are due to the image changes
inputs[ idx ].reset(create_random_data( types[ j ], d, sizes[ i ] * sizes[ i ] * 4 ));
streams[ idx ][ 0 ] = create_image_2d( context, CL_MEM_COPY_HOST_PTR, &formats[ j ], sizes[ i ], sizes[ i ], 0, inputs[ idx ], &error );
{
char err_str[256];
sprintf(err_str, "Unable to create input image for format %s order %s" ,
GetChannelOrderName( formats[j].image_channel_order ),
GetChannelTypeName( formats[j].image_channel_data_type ));
test_error( error, err_str);
}
streams[ idx ][ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), NULL, &error );
test_error( error, "Unable to create output buffer" );
}
}
free_mtdata(d); d = NULL;
// Create a single kernel to use for all the tests
error = create_single_kernel_helper( context, &program, &kernel, 1, param_kernel, "test_fn" );
test_error( error, "Unable to create testing kernel" );
// Also create a sampler to use for all the runs
clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &error );
test_error( error, "clCreateSampler failed" );
// Set up the arguments for each and queue
for( i = 0, idx = 0; i < numSizes; i++ )
{
for( j = 0; j < numFormats; j++, idx++ )
{
error = clSetKernelArg( kernel, 0, sizeof( streams[ idx ][ 0 ] ), &streams[ idx ][ 0 ] );
error |= clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
error |= clSetKernelArg( kernel, 2, sizeof( streams[ idx ][ 1 ] ), &streams[ idx ][ 1 ]);
test_error( error, "Unable to set kernel arguments" );
threads[ 0 ] = threads[ 1 ] = (size_t)sizes[ i ];
error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
test_error( error, "clEnqueueNDRangeKernel failed" );
}
}
// Now go through each combo and validate the results
for( i = 0, idx = 0; i < numSizes; i++ )
{
for( j = 0; j < numFormats; j++, idx++ )
{
BufferOwningPtr<cl_float> output(malloc(sizeof(cl_float) * sizes[ i ] * sizes[ i ] * 4 ));
error = clEnqueueReadBuffer( queue, streams[ idx ][ 1 ], CL_TRUE, 0, sizes[ i ] * sizes[ i ] * 4 * sizeof( cl_float ), output, 0, NULL, NULL );
test_error( error, "Unable to read results" );
error = validate_results( sizes[ i ], sizes[ i ], formats[ j ], inputs[ idx ], output );
if( error )
return -1;
}
}
return 0;
}

Some files were not shown because too many files have changed in this diff Show More