mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Initial open source release of OpenCL 1.2 CTS.
This commit is contained in:
95
HOW_TO_ADD_TESTS.txt
Normal file
95
HOW_TO_ADD_TESTS.txt
Normal file
@@ -0,0 +1,95 @@
|
||||
2008-09-04 - created by David Black-Schaffer
|
||||
2008-09-31 - updated for reorganization
|
||||
|
||||
==============================================================
|
||||
*** Where to put tests:
|
||||
==============================================================
|
||||
|
||||
test_apps - complete applications used for testing
|
||||
test_common - frameworks used across multiple tests
|
||||
test_conformance - conformance tests
|
||||
test_development - tests used for development or being developed
|
||||
test_internal - tests for private functionality
|
||||
test_performance - performance tests
|
||||
|
||||
Tests placed in other locations will be moved without warning.
|
||||
|
||||
==============================================================
|
||||
*** How to setup tests:
|
||||
==============================================================
|
||||
To create a new test to run through OATS, you need to:
|
||||
1) write the test
|
||||
2) use ATF to report errors, info, and performance numbers
|
||||
3) make a Makefile that correctly builds with ATF for OATS and builds fat
|
||||
4) add the test to the local Makefile (e.g., test_conformance/Makefile)
|
||||
5) add the test to OATS
|
||||
6) add the test to the appropriate test suite on OATS
|
||||
7) and then add the test to the run_tests_local.py script so it can be run locally.
|
||||
8) If you want the tests distributed, add them to the zip_tests_for_drops.py script appropriately.
|
||||
|
||||
---------------------------------------------------------
|
||||
Use ATF (OATS's Automated Test Framework)
|
||||
---------------------------------------------------------
|
||||
ATF is the only way to report errors to OATS. If you don't use this OATS will have no way of knowing if a test failed or passed. You must use ATF for all output information and you should not use any printfs.
|
||||
|
||||
1) Make sure your Makefile for the test builds correctly with ATF. You need to include the ATF framework whenever the BUILD_WITH_ATF environment variable is set. This can be done as:
|
||||
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
...
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
|
||||
|
||||
2) Make sure you use ATF for logging. This means no printf() output. All errors should be output with log_error and info with log_info. This can be done with:
|
||||
|
||||
#if USE_ATF
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
|
||||
#define log_info ATFLogInfo
|
||||
#define log_error ATFLogError
|
||||
#define test_finish() ATFTestFinish()
|
||||
#else
|
||||
#define test_start()
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, _higherBetter?"higher is better":"lower is better" , _number)
|
||||
#define log_info printf
|
||||
#define log_error printf
|
||||
#define test_finish()
|
||||
#endif
|
||||
|
||||
3) All performance information should be output with log_perf(). You need to specify the value, whether bigger is better, the units, and a name.
|
||||
|
||||
4) You need to call test_start() and test_finish() exactly once each in each test. That is, if you have a test that may bail on a failure condition you need to be sure to call test_finish() at each of those points.
|
||||
|
||||
---------------------------------------------------------
|
||||
Building 32- and 64-bit
|
||||
---------------------------------------------------------
|
||||
1) Make sure your Makefile passes RC_CFLAGS into the compiler. E.g.,:
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
|
||||
|
||||
2) If you are using C++ code with g++ you also need to set:
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%)
|
||||
|
||||
and you may need to pass in RC_CFLAGS to the linker:
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
3) Verify that this works by building fat (make RC_CFLAGS="-arch i386 -arch x86_64") and then running file on the output binary. You should see:
|
||||
blackschaffer:test_basic dbs$ file test_basic
|
||||
test_basic: Mach-O universal binary with 2 architectures
|
||||
test_basic (for architecture i386): Mach-O executable i386
|
||||
test_basic (for architecture x86_64): Mach-O 64-bit executable x86_64
|
||||
|
||||
---------------------------------------------------------
|
||||
Setting up the test for OATS and adding it
|
||||
---------------------------------------------------------
|
||||
1) Make one or more run_subtest scripts in the directory that run the particular tests. Try to group the sub-tests together in logical units to make it easier to see them in OATS. (e.g., "run_step" runs step, stepf, smoothstep, and smoothstepf.) Note that these tests can only call one executable because OATS can only accept one tests_start()/test_end() per test.
|
||||
2) Add the test to OATS in the Test Admin page. Name the test "CL Test - subtest" and put in the path to the tests. (E.g., "CL Common Functions - step" points to "OpenCL_Tests/test_conformance/commonfns/run_step".)
|
||||
3) Add the test to the test suite (e.g., either OpenCL Tests or OpenCL Long Tests). Set the test run order such that basic functionality tests have a lower value (run first) and performance/application tests have a higher value (run last).
|
||||
4) Add the test directory to the appropriate Makefile, and verify that it builds. This file is used when the tests are built for OATS. You should just need to add the test directory to the list of directories at the top.
|
||||
|
||||
---------------------------------------------------------
|
||||
Setting up the test for running it locally
|
||||
---------------------------------------------------------
|
||||
1) Add the test run script to the list of tests in run_tests_local.py if the test is a short test.
|
||||
4
license.txt
Normal file
4
license.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
The code inside this directory and its subdirectories is
|
||||
"Open GL (including Open CL) Automated Test System - Common Code"
|
||||
and is subject to the license agreement between Apple and the licensee.
|
||||
|
||||
26
test_common/Makefile
Normal file
26
test_common/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
PRODUCTS = harness/\
|
||||
|
||||
# utils/
|
||||
|
||||
TOP=$(shell pwd)
|
||||
|
||||
all: $(PRODUCTS)
|
||||
|
||||
clean:
|
||||
@for testdir in $(dir $(PRODUCTS)) ; \
|
||||
do ( \
|
||||
echo "==================================================================================" ; \
|
||||
echo "Cleaning $$testdir" ; \
|
||||
echo "==================================================================================" ; \
|
||||
cd $$testdir && make clean \
|
||||
); \
|
||||
done \
|
||||
|
||||
$(PRODUCTS):
|
||||
@echo "==================================================================================" ;
|
||||
@echo "(`date "+%H:%M:%S"`) Make $@" ;
|
||||
@echo "==================================================================================" ;
|
||||
cd $(dir $@) && make
|
||||
|
||||
.PHONY: clean $(PRODUCTS) all
|
||||
52
test_common/gl/gl_headers.h
Normal file
52
test_common/gl/gl_headers.h
Normal file
@@ -0,0 +1,52 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _gl_headers_h
|
||||
#define _gl_headers_h
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenGL/OpenGL.h>
|
||||
#if defined(CGL_VERSION_1_3)
|
||||
#include <OpenGL/gl3.h>
|
||||
#include <OpenGL/gl3ext.h>
|
||||
#else
|
||||
#include <OpenGL/gl.h>
|
||||
#include <OpenGL/glext.h>
|
||||
#endif
|
||||
#include <GLUT/glut.h>
|
||||
#else
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
#include <GL/glew.h>
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glext.h>
|
||||
#ifdef _WIN32
|
||||
#include <GL/glut.h>
|
||||
#else
|
||||
#include <GL/freeglut.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
GLboolean gluCheckExtension(const GLubyte *extName, const GLubyte *extString);
|
||||
// No glutGetProcAddress in the standard glut v3.7.
|
||||
#define glutGetProcAddress(procName) wglGetProcAddress(procName)
|
||||
#endif
|
||||
|
||||
|
||||
#endif // __gl_headers_h
|
||||
|
||||
1622
test_common/gl/helpers.cpp
Normal file
1622
test_common/gl/helpers.cpp
Normal file
File diff suppressed because it is too large
Load Diff
288
test_common/gl/helpers.h
Normal file
288
test_common/gl/helpers.h
Normal file
@@ -0,0 +1,288 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _helpers_h
|
||||
#define _helpers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#include "gl_headers.h"
|
||||
#include <CL/cl_gl.h>
|
||||
#else
|
||||
#include "gl_headers.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLBuffer_fn)(cl_context context,
|
||||
cl_mem_flags flags,
|
||||
GLuint bufobj,
|
||||
int * errcode_ret);
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture2D_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLTexture3D_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLenum target ,
|
||||
GLint miplevel ,
|
||||
GLuint texture ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_mem
|
||||
(CL_API_CALL *clCreateFromGLRenderbuffer_fn)(cl_context context ,
|
||||
cl_mem_flags flags ,
|
||||
GLuint renderbuffer ,
|
||||
cl_int * errcode_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLObjectInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_object_type * gl_object_type ,
|
||||
GLuint * gl_object_name) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clGetGLTextureInfo_fn)(cl_mem memobj ,
|
||||
cl_gl_texture_info param_name ,
|
||||
size_t param_value_size ,
|
||||
void * param_value ,
|
||||
size_t * param_value_size_ret) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueAcquireGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
typedef cl_int
|
||||
(CL_API_CALL *clEnqueueReleaseGLObjects_fn)(cl_command_queue command_queue ,
|
||||
cl_uint num_objects ,
|
||||
const cl_mem * mem_objects ,
|
||||
cl_uint num_events_in_wait_list ,
|
||||
const cl_event * event_wait_list ,
|
||||
cl_event * event) ;
|
||||
|
||||
|
||||
extern clCreateFromGLBuffer_fn clCreateFromGLBuffer_ptr;
|
||||
extern clCreateFromGLTexture_fn clCreateFromGLTexture_ptr;
|
||||
extern clCreateFromGLTexture2D_fn clCreateFromGLTexture2D_ptr;
|
||||
extern clCreateFromGLTexture3D_fn clCreateFromGLTexture3D_ptr;
|
||||
extern clCreateFromGLRenderbuffer_fn clCreateFromGLRenderbuffer_ptr;
|
||||
extern clGetGLObjectInfo_fn clGetGLObjectInfo_ptr;
|
||||
extern clGetGLTextureInfo_fn clGetGLTextureInfo_ptr;
|
||||
extern clEnqueueAcquireGLObjects_fn clEnqueueAcquireGLObjects_ptr;
|
||||
extern clEnqueueReleaseGLObjects_fn clEnqueueReleaseGLObjects_ptr;
|
||||
|
||||
|
||||
class glBufferWrapper
|
||||
{
|
||||
public:
|
||||
glBufferWrapper() { mBuffer = 0; }
|
||||
glBufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glBufferWrapper() { if( mBuffer != 0 ) glDeleteBuffers( 1, &mBuffer ); }
|
||||
|
||||
glBufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glTextureWrapper
|
||||
{
|
||||
public:
|
||||
glTextureWrapper() { mHandle = 0; }
|
||||
glTextureWrapper( GLuint b ) { mHandle = b; }
|
||||
~glTextureWrapper() {
|
||||
if( mHandle != 0 ) glDeleteTextures( 1, &mHandle );
|
||||
}
|
||||
|
||||
glTextureWrapper & operator=( const GLuint &rhs ) { mHandle = rhs; return *this; }
|
||||
operator GLuint() { return mHandle; }
|
||||
operator GLuint *() { return &mHandle; }
|
||||
|
||||
GLuint * operator&() { return &mHandle; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mHandle == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
// The texture handle.
|
||||
GLuint mHandle;
|
||||
};
|
||||
|
||||
class glRenderbufferWrapper
|
||||
{
|
||||
public:
|
||||
glRenderbufferWrapper() { mBuffer = 0; }
|
||||
glRenderbufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glRenderbufferWrapper() { if( mBuffer != 0 ) glDeleteRenderbuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glRenderbufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
class glFramebufferWrapper
|
||||
{
|
||||
public:
|
||||
glFramebufferWrapper() { mBuffer = 0; }
|
||||
glFramebufferWrapper( GLuint b ) { mBuffer = b; }
|
||||
~glFramebufferWrapper() { if( mBuffer != 0 ) glDeleteFramebuffersEXT( 1, &mBuffer ); }
|
||||
|
||||
glFramebufferWrapper & operator=( const GLuint &rhs ) { mBuffer = rhs; return *this; }
|
||||
operator GLuint() { return mBuffer; }
|
||||
operator GLuint *() { return &mBuffer; }
|
||||
|
||||
GLuint * operator&() { return &mBuffer; }
|
||||
|
||||
bool operator==( GLuint rhs ) { return mBuffer == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
GLuint mBuffer;
|
||||
};
|
||||
|
||||
|
||||
// Helper functions (defined in helpers.cpp)
|
||||
|
||||
extern void * CreateGLTexture1DArray( size_t width, size_t length,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTexture2DArray( size_t width, size_t height, size_t length,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTextureBuffer( size_t width,
|
||||
GLenum target, GLenum glFormat, GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTex, GLuint *outBuf, int *outError,
|
||||
bool allocateMem, MTdata d);
|
||||
|
||||
extern void * CreateGLTexture1D(size_t width,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
extern void * CreateGLTexture2D( size_t width, size_t height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, bool allocateMem, MTdata d );
|
||||
|
||||
|
||||
extern void * CreateGLTexture3D( size_t width, size_t height, size_t depth,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type, GLuint *outTextureID,
|
||||
int *outError, MTdata d, bool allocateMem = true );
|
||||
|
||||
extern void * ReadGLTexture( GLenum glTarget, GLuint glTexture, GLuint glBuf, GLint width,
|
||||
GLenum glFormat, GLenum glInternalFormat,
|
||||
GLenum glType, ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern int CreateGLRenderbufferRaw( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer );
|
||||
|
||||
extern void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
|
||||
GLenum target, GLenum glFormat,
|
||||
GLenum internalFormat, GLenum glType,
|
||||
ExplicitType type,
|
||||
GLuint *outFramebuffer,
|
||||
GLuint *outRenderbuffer,
|
||||
int *outError, MTdata d, bool allocateMem );
|
||||
|
||||
extern void * ReadGLRenderbuffer( GLuint glFramebuffer, GLuint glRenderbuffer,
|
||||
GLenum attachment, GLenum glFormat,
|
||||
GLenum glInternalFormat, GLenum glType,
|
||||
ExplicitType typeToReadAs,
|
||||
size_t outWidth, size_t outHeight );
|
||||
|
||||
extern void DumpGLBuffer(GLenum type, size_t width, size_t height, void* buffer);
|
||||
extern const char *GetGLTypeName( GLenum type );
|
||||
extern const char *GetGLAttachmentName( GLenum att );
|
||||
extern const char *GetGLTargetName( GLenum tgt );
|
||||
extern const char *GetGLBaseFormatName( GLenum baseformat );
|
||||
extern const char *GetGLFormatName( GLenum format );
|
||||
|
||||
extern void* CreateRandomData( ExplicitType type, size_t count, MTdata d );
|
||||
|
||||
extern GLenum GetGLFormat(GLenum internalFormat);
|
||||
extern GLenum GetGLTypeForExplicitType(ExplicitType type);
|
||||
extern size_t GetGLTypeSize(GLenum type);
|
||||
extern ExplicitType GetExplicitTypeForGLType(GLenum type);
|
||||
|
||||
extern GLenum get_base_gl_target( GLenum target );
|
||||
|
||||
extern int init_clgl_ext( void );
|
||||
|
||||
#endif // _helpers_h
|
||||
|
||||
|
||||
|
||||
48
test_common/gl/setup.h
Normal file
48
test_common/gl/setup.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _setup_h
|
||||
#define _setup_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "gl_headers.h"
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
|
||||
// Note: the idea here is to have every platform define their own setup.cpp file that implements a GLEnvironment
|
||||
// subclass internally, then return it as a definition for GLEnvironment::Create
|
||||
|
||||
class GLEnvironment
|
||||
{
|
||||
public:
|
||||
GLEnvironment() {}
|
||||
virtual ~GLEnvironment() {}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 ) = 0;
|
||||
virtual cl_context CreateCLContext( void ) = 0;
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type) = 0;
|
||||
|
||||
static GLEnvironment * Instance( void );
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // _setup_h
|
||||
156
test_common/gl/setup_osx.cpp
Normal file
156
test_common/gl/setup_osx.cpp
Normal file
@@ -0,0 +1,156 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "setup.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include <OpenGL/CGLDevice.h>
|
||||
|
||||
class OSXGLEnvironment : public GLEnvironment
|
||||
{
|
||||
public:
|
||||
OSXGLEnvironment()
|
||||
{
|
||||
mCGLContext = NULL;
|
||||
}
|
||||
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
if (!use_opengl_32) {
|
||||
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
}
|
||||
|
||||
else {
|
||||
|
||||
CGLPixelFormatAttribute attribs[] = {
|
||||
kCGLPFAOpenGLProfile, (CGLPixelFormatAttribute)kCGLOGLPVersion_3_2_Core,
|
||||
kCGLPFAAllowOfflineRenderers,
|
||||
kCGLPFANoRecovery,
|
||||
kCGLPFAAccelerated,
|
||||
kCGLPFADoubleBuffer,
|
||||
(CGLPixelFormatAttribute)0
|
||||
};
|
||||
|
||||
CGLError err;
|
||||
CGLPixelFormatObj pix;
|
||||
GLint npix;
|
||||
err = CGLChoosePixelFormat (attribs, &pix, &npix);
|
||||
if(err != kCGLNoError)
|
||||
{
|
||||
log_error("Failed to choose pixel format\n");
|
||||
return -1;
|
||||
}
|
||||
err = CGLCreateContext(pix, NULL, &mCGLContext);
|
||||
if(err != kCGLNoError)
|
||||
{
|
||||
log_error("Failed to create GL context\n");
|
||||
return -1;
|
||||
}
|
||||
CGLSetCurrentContext(mCGLContext);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
int error;
|
||||
|
||||
if( mCGLContext == NULL )
|
||||
mCGLContext = CGLGetCurrentContext();
|
||||
|
||||
CGLShareGroupObj share_group = CGLGetShareGroup(mCGLContext);
|
||||
cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 };
|
||||
cl_context context = clCreateContext(properties, 0, 0, 0, 0, &error);
|
||||
if (error) {
|
||||
print_error(error, "clCreateContext failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Verify that all devices in the context support the required extension
|
||||
cl_device_id devices[64];
|
||||
size_t size_out;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &size_out);
|
||||
if (error) {
|
||||
print_error(error, "clGetContextInfo failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)(size_out/sizeof(cl_device_id)); i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
|
||||
log_error("Device %d does not supporte required extension cl_APPLE_gl_sharing.\n", i);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return context;
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetDeviceIDs(NULL, device_type, 64, devices, &num_of_devices);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_APPLE_gl_sharing") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d does support required extension cl_APPLE_gl_sharing.\n", i, num_of_devices);
|
||||
found_valid_device = 1;
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~OSXGLEnvironment()
|
||||
{
|
||||
CGLDestroyContext( mCGLContext );
|
||||
}
|
||||
|
||||
CGLContextObj mCGLContext;
|
||||
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static OSXGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new OSXGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
204
test_common/gl/setup_win32.cpp
Normal file
204
test_common/gl/setup_win32.cpp
Normal file
@@ -0,0 +1,204 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
|
||||
#include "setup.h"
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glut.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/glut.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
|
||||
const cl_context_properties *properties,
|
||||
cl_gl_context_info param_name,
|
||||
size_t param_value_size,
|
||||
void *param_value,
|
||||
size_t *param_value_size_ret);
|
||||
|
||||
// Rename references to this dynamically linked function to avoid
|
||||
// collision with static link version
|
||||
#define clGetGLContextInfoKHR clGetGLContextInfoKHR_proc
|
||||
static clGetGLContextInfoKHR_fn clGetGLContextInfoKHR;
|
||||
|
||||
#define MAX_DEVICES 32
|
||||
|
||||
class WGLEnvironment : public GLEnvironment
|
||||
{
|
||||
private:
|
||||
cl_device_id m_devices[MAX_DEVICES];
|
||||
int m_device_count;
|
||||
cl_platform_id m_platform;
|
||||
|
||||
public:
|
||||
WGLEnvironment()
|
||||
{
|
||||
m_device_count = 0;
|
||||
m_platform = 0;
|
||||
|
||||
}
|
||||
virtual int Init( int *argc, char **argv, int use_opengl_32 )
|
||||
{
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
glewInit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
HGLRC hGLRC = wglGetCurrentContext();
|
||||
HDC hDC = wglGetCurrentDC();
|
||||
cl_context_properties properties[] = {
|
||||
CL_CONTEXT_PLATFORM, (cl_context_properties) m_platform,
|
||||
CL_GL_CONTEXT_KHR, (cl_context_properties) hGLRC,
|
||||
CL_WGL_HDC_KHR, (cl_context_properties) hDC,
|
||||
0
|
||||
};
|
||||
cl_device_id devices[MAX_DEVICES];
|
||||
size_t dev_size;
|
||||
cl_int status;
|
||||
|
||||
if (!hGLRC || !hDC) {
|
||||
print_error(CL_INVALID_CONTEXT, "No GL context bound");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!clGetGLContextInfoKHR) {
|
||||
// As OpenCL for the platforms. Warn if more than one platform found,
|
||||
// since this might not be the platform we want. By default, we simply
|
||||
// use the first returned platform.
|
||||
|
||||
cl_uint nplatforms;
|
||||
cl_platform_id platform;
|
||||
clGetPlatformIDs(0, NULL, &nplatforms);
|
||||
clGetPlatformIDs(1, &platform, NULL);
|
||||
|
||||
if (nplatforms > 1) {
|
||||
log_info("clGetPlatformIDs returned multiple values. This is not "
|
||||
"an error, but might result in obtaining incorrect function "
|
||||
"pointers if you do not want the first returned platform.\n");
|
||||
|
||||
// Show them the platform name, in case it is a problem.
|
||||
|
||||
size_t size;
|
||||
char *name;
|
||||
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
|
||||
name = (char*)malloc(size);
|
||||
clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
|
||||
|
||||
log_info("Using platform with name: %s \n", name);
|
||||
free(name);
|
||||
}
|
||||
|
||||
clGetGLContextInfoKHR = (clGetGLContextInfoKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clGetGLContextInfoKHR");
|
||||
if (!clGetGLContextInfoKHR) {
|
||||
print_error(CL_INVALID_PLATFORM, "Failed to query proc address for clGetGLContextInfoKHR");
|
||||
}
|
||||
}
|
||||
|
||||
status = clGetGLContextInfoKHR(properties,
|
||||
CL_DEVICES_FOR_GL_CONTEXT_KHR,
|
||||
sizeof(devices),
|
||||
devices,
|
||||
&dev_size);
|
||||
if (status != CL_SUCCESS) {
|
||||
print_error(status, "clGetGLContextInfoKHR failed");
|
||||
return 0;
|
||||
}
|
||||
dev_size /= sizeof(cl_device_id);
|
||||
log_info("GL context supports %d compute devices\n", dev_size);
|
||||
|
||||
status = clGetGLContextInfoKHR(properties,
|
||||
CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
|
||||
sizeof(devices),
|
||||
devices,
|
||||
&dev_size);
|
||||
if (status != CL_SUCCESS) {
|
||||
print_error(status, "clGetGLContextInfoKHR failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_device_id ctxDevice = m_devices[0];
|
||||
if (dev_size > 0) {
|
||||
log_info("GL context current device: 0x%x\n", devices[0]);
|
||||
for (int i = 0; i < m_device_count; i++) {
|
||||
if (m_devices[i] == devices[0]) {
|
||||
ctxDevice = devices[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_info("GL context current device is not a CL device, using device %d.\n", ctxDevice);
|
||||
}
|
||||
|
||||
return clCreateContext(properties, 1, &ctxDevice, NULL, NULL, &status);
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
cl_device_id devices[MAX_DEVICES];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetPlatformIDs(1, &m_platform, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetPlatformIDs failed");
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceIDs(m_platform, device_type, MAX_DEVICES, devices, &num_of_devices);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check all devices, search for one that supports cl_khr_gl_sharing
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_khr_gl_sharing") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
m_devices[m_device_count++] = devices[i];
|
||||
}
|
||||
}
|
||||
return m_device_count > 0;
|
||||
}
|
||||
|
||||
virtual ~WGLEnvironment()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static WGLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new WGLEnvironment();
|
||||
return env;
|
||||
}
|
||||
122
test_common/gl/setup_x11.cpp
Normal file
122
test_common/gl/setup_x11.cpp
Normal file
@@ -0,0 +1,122 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
|
||||
#include "setup.h"
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
|
||||
#include <GL/gl.h>
|
||||
#include <GL/glut.h>
|
||||
#include <GL/glext.h>
|
||||
#include <GL/freeglut.h>
|
||||
#include <GL/glx.h>
|
||||
#include <CL/cl_ext.h>
|
||||
|
||||
class X11GLEnvironment : public GLEnvironment
|
||||
{
|
||||
private:
|
||||
cl_device_id m_devices[64];
|
||||
cl_uint m_device_count;
|
||||
|
||||
public:
|
||||
X11GLEnvironment()
|
||||
{
|
||||
m_device_count = 0;
|
||||
}
|
||||
virtual int Init( int *argc, char **argv, int use_opencl_32 )
|
||||
{
|
||||
// Create a GLUT window to render into
|
||||
glutInit( argc, argv );
|
||||
glutInitWindowSize( 512, 512 );
|
||||
glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
|
||||
glutCreateWindow( "OpenCL <-> OpenGL Test" );
|
||||
glewInit();
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual cl_context CreateCLContext( void )
|
||||
{
|
||||
GLXContext context = glXGetCurrentContext();
|
||||
Display *dpy = glXGetCurrentDisplay();
|
||||
|
||||
cl_context_properties properties[] = {
|
||||
CL_GL_CONTEXT_KHR, (cl_context_properties) context,
|
||||
CL_GLX_DISPLAY_KHR, (cl_context_properties) dpy,
|
||||
0
|
||||
};
|
||||
cl_int status;
|
||||
|
||||
if (!context || !dpy) {
|
||||
print_error(CL_INVALID_CONTEXT, "No GL context bound");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return clCreateContext(properties, 1, m_devices, NULL, NULL, &status);
|
||||
}
|
||||
|
||||
virtual int SupportsCLGLInterop( cl_device_type device_type )
|
||||
{
|
||||
int found_valid_device = 0;
|
||||
cl_platform_id platform;
|
||||
cl_device_id devices[64];
|
||||
cl_uint num_of_devices;
|
||||
int error;
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetPlatformIDs failed");
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceIDs(platform, device_type, 64, devices, &num_of_devices);
|
||||
// If this platform doesn't have any of the requested device_type (namely GPUs) then return 0
|
||||
if (error == CL_DEVICE_NOT_FOUND)
|
||||
return 0;
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceIDs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char extensions[8192];
|
||||
for (int i=0; i<(int)num_of_devices; i++) {
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(extensions), extensions, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetDeviceInfo failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (strstr(extensions, "cl_khr_gl_sharing ") == NULL) {
|
||||
log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
} else {
|
||||
log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
|
||||
found_valid_device = 1;
|
||||
m_devices[m_device_count++] = devices[i];
|
||||
}
|
||||
}
|
||||
return found_valid_device;
|
||||
}
|
||||
|
||||
virtual ~X11GLEnvironment()
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
GLEnvironment * GLEnvironment::Instance( void )
|
||||
{
|
||||
static X11GLEnvironment * env = NULL;
|
||||
if( env == NULL )
|
||||
env = new X11GLEnvironment();
|
||||
return env;
|
||||
}
|
||||
18
test_common/harness/Jamfile
Normal file
18
test_common/harness/Jamfile
Normal file
@@ -0,0 +1,18 @@
|
||||
project
|
||||
: requirements <include>.
|
||||
<toolset>gcc:<cflags>"-xc++"
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
<warnings-as-errors>off
|
||||
: usage-requirements <include>.
|
||||
;
|
||||
|
||||
local harness.objs ;
|
||||
for source in [ glob *.c *.cpp ]
|
||||
{
|
||||
harness.objs += [ obj $(source:B).obj : $(source) ] ;
|
||||
}
|
||||
|
||||
alias harness : $(harness.objs)
|
||||
: <use>/Runtime//OpenCL.lib :
|
||||
: <library>/Runtime//OpenCL.lib
|
||||
;
|
||||
41
test_common/harness/Makefile
Normal file
41
test_common/harness/Makefile
Normal file
@@ -0,0 +1,41 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = conversions.c \
|
||||
errorHelpers.c \
|
||||
genericThread.cpp \
|
||||
imageHelpers.cpp \
|
||||
kernelHelpers.c \
|
||||
mt19937.c \
|
||||
rounding_mode.c \
|
||||
testHarness.c \
|
||||
testHarness.cpp \
|
||||
ThreadPool.c \
|
||||
threadTesting.c \
|
||||
typeWrappers.cpp
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
all: $(OBJECTS)
|
||||
|
||||
clean:
|
||||
rm -f $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
899
test_common/harness/ThreadPool.c
Normal file
899
test_common/harness/ThreadPool.c
Normal file
@@ -0,0 +1,899 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "ThreadPool.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "fpcontrol.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined( __APPLE__ ) || defined( __linux__ ) || defined( _WIN32 ) // or any other POSIX system
|
||||
|
||||
#if defined( _WIN32 )
|
||||
#include <windows.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#include "mingw_compat.h"
|
||||
#include <process.h>
|
||||
#else // !_WIN32
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/errno.h>
|
||||
#endif // !_WIN32
|
||||
|
||||
// declarations
|
||||
#ifdef _WIN32
|
||||
void ThreadPool_WorkerFunc( void *p );
|
||||
#else
|
||||
void *ThreadPool_WorkerFunc( void *p );
|
||||
#endif
|
||||
void ThreadPool_Init(void);
|
||||
void ThreadPool_Exit(void);
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
// Mutex for implementing super heavy atomic operations if you don't have GCC or MSVC
|
||||
CRITICAL_SECTION gAtomicLock;
|
||||
#elif defined( __GNUC__ ) || defined( _MSC_VER)
|
||||
#else
|
||||
pthread_mutex_t gAtomicLock;
|
||||
#endif
|
||||
|
||||
// Atomic add operator with mem barrier. Mem barrier needed to protect state modified by the worker functions.
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
|
||||
{
|
||||
#if defined (__MINGW32__)
|
||||
// No atomics on Mingw32
|
||||
EnterCriticalSection(&gAtomicLock);
|
||||
cl_int old = *a;
|
||||
*a = old + b;
|
||||
LeaveCriticalSection(&gAtomicLock);
|
||||
return old;
|
||||
#elif defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
return __sync_fetch_and_add( a, b );
|
||||
// do we need __sync_synchronize() here, too? GCC docs are unclear whether __sync_fetch_and_add does a synchronize
|
||||
#elif defined( _MSC_VER )
|
||||
return (cl_int) _InterlockedExchangeAdd( (volatile LONG*) a, (LONG) b );
|
||||
#else
|
||||
#warning Please add a atomic add implementation here, with memory barrier. Fallback code is slow.
|
||||
if( pthread_mutex_lock(&gAtomicLock) )
|
||||
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
|
||||
cl_int old = *a;
|
||||
*a = old + b;
|
||||
if( pthread_mutex_unlock(&gAtomicLock) )
|
||||
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock!\n");
|
||||
return old;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined( _WIN32 )
|
||||
// Uncomment the following line if Windows XP support is not required.
|
||||
// #define HAS_INIT_ONCE_EXECUTE_ONCE 1
|
||||
|
||||
#if defined(HAS_INIT_ONCE_EXECUTE_ONCE)
|
||||
#define _INIT_ONCE INIT_ONCE
|
||||
#define _PINIT_ONCE PINIT_ONCE
|
||||
#define _InitOnceExecuteOnce InitOnceExecuteOnce
|
||||
#else // !HAS_INIT_ONCE_EXECUTE_ONCE
|
||||
|
||||
typedef volatile LONG _INIT_ONCE;
|
||||
typedef _INIT_ONCE *_PINIT_ONCE;
|
||||
typedef BOOL (CALLBACK *_PINIT_ONCE_FN)(_PINIT_ONCE, PVOID, PVOID *);
|
||||
|
||||
#define _INIT_ONCE_UNINITIALIZED 0
|
||||
#define _INIT_ONCE_IN_PROGRESS 1
|
||||
#define _INIT_ONCE_DONE 2
|
||||
|
||||
static BOOL _InitOnceExecuteOnce(
|
||||
_PINIT_ONCE InitOnce,
|
||||
_PINIT_ONCE_FN InitFn,
|
||||
PVOID Parameter,
|
||||
LPVOID *Context
|
||||
)
|
||||
{
|
||||
while ( *InitOnce != _INIT_ONCE_DONE )
|
||||
{
|
||||
if (*InitOnce != _INIT_ONCE_IN_PROGRESS && _InterlockedCompareExchange( InitOnce, _INIT_ONCE_IN_PROGRESS, _INIT_ONCE_UNINITIALIZED ) == _INIT_ONCE_UNINITIALIZED )
|
||||
{
|
||||
InitFn( InitOnce, Parameter, Context );
|
||||
*InitOnce = _INIT_ONCE_DONE;
|
||||
return TRUE;
|
||||
}
|
||||
Sleep( 1 );
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
#endif // !HAS_INIT_ONCE_EXECUTE_ONCE
|
||||
|
||||
// Uncomment the following line if Windows XP support is not required.
|
||||
// #define HAS_CONDITION_VARIABLE 1
|
||||
|
||||
#if defined(HAS_CONDITION_VARIABLE)
|
||||
#define _CONDITION_VARIABLE CONDITION_VARIABLE
|
||||
#define _InitializeConditionVariable InitializeConditionVariable
|
||||
#define _SleepConditionVariableCS SleepConditionVariableCS
|
||||
#define _WakeAllConditionVariable WakeAllConditionVariable
|
||||
#else // !HAS_CONDITION_VARIABLE
|
||||
typedef struct
|
||||
{
|
||||
HANDLE mEvent; // Used to park the thread.
|
||||
CRITICAL_SECTION mLock[1]; // Used to protect mWaiters, mGeneration and mReleaseCount.
|
||||
volatile cl_int mWaiters; // Number of threads waiting on this cond var.
|
||||
volatile cl_int mGeneration; // Wait generation count.
|
||||
volatile cl_int mReleaseCount; // Number of releases to execute before reseting the event.
|
||||
} _CONDITION_VARIABLE;
|
||||
|
||||
typedef _CONDITION_VARIABLE *_PCONDITION_VARIABLE;
|
||||
|
||||
static void _InitializeConditionVariable( _PCONDITION_VARIABLE cond_var )
|
||||
{
|
||||
cond_var->mEvent = CreateEvent( NULL, TRUE, FALSE, NULL );
|
||||
InitializeCriticalSection( cond_var->mLock );
|
||||
cond_var->mWaiters = 0;
|
||||
cond_var->mGeneration = 0;
|
||||
#if !defined ( NDEBUG )
|
||||
cond_var->mReleaseCount = 0;
|
||||
#endif // !NDEBUG
|
||||
}
|
||||
|
||||
static void _SleepConditionVariableCS( _PCONDITION_VARIABLE cond_var, PCRITICAL_SECTION cond_lock, DWORD ignored)
|
||||
{
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
cl_int generation = cond_var->mGeneration;
|
||||
++cond_var->mWaiters;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
|
||||
while ( TRUE )
|
||||
{
|
||||
WaitForSingleObject( cond_var->mEvent, INFINITE );
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
BOOL done = cond_var->mReleaseCount > 0 && cond_var->mGeneration != generation;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
if ( done )
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EnterCriticalSection( cond_lock );
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
if ( --cond_var->mReleaseCount == 0 )
|
||||
{
|
||||
ResetEvent( cond_var->mEvent );
|
||||
}
|
||||
--cond_var->mWaiters;
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
}
|
||||
|
||||
static void _WakeAllConditionVariable( _PCONDITION_VARIABLE cond_var )
|
||||
{
|
||||
EnterCriticalSection( cond_var->mLock );
|
||||
if (cond_var->mWaiters > 0 )
|
||||
{
|
||||
++cond_var->mGeneration;
|
||||
cond_var->mReleaseCount = cond_var->mWaiters;
|
||||
SetEvent( cond_var->mEvent );
|
||||
}
|
||||
LeaveCriticalSection( cond_var->mLock );
|
||||
}
|
||||
#endif // !HAS_CONDITION_VARIABLE
|
||||
#endif // _WIN32
|
||||
|
||||
#define MAX_COUNT (1<<29)
|
||||
|
||||
// Global state to coordinate whether the threads have been launched successfully or not
|
||||
#if defined( _MSC_VER ) && (_WIN32_WINNT >= 0x600)
|
||||
static _INIT_ONCE threadpool_init_control;
|
||||
#elif defined (_WIN32) // MingW of XP
|
||||
static int threadpool_init_control;
|
||||
#else // Posix platforms
|
||||
pthread_once_t threadpool_init_control = PTHREAD_ONCE_INIT;
|
||||
#endif
|
||||
cl_int threadPoolInitErr = -1; // set to CL_SUCCESS on successful thread launch
|
||||
|
||||
// critical region lock around ThreadPool_Do. We can only run one ThreadPool_Do at a time,
|
||||
// because we are too lazy to set up a queue here, and don't expect to need one.
|
||||
#if defined( _WIN32 )
|
||||
CRITICAL_SECTION gThreadPoolLock[1];
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t gThreadPoolLock;
|
||||
#endif // !_WIN32
|
||||
|
||||
// Condition variable to park ThreadPool threads when not working
|
||||
#if defined( _WIN32 )
|
||||
CRITICAL_SECTION cond_lock[1];
|
||||
_CONDITION_VARIABLE cond_var[1];
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t cond_lock;
|
||||
pthread_cond_t cond_var;
|
||||
#endif // !_WIN32
|
||||
volatile cl_int gRunCount = 0; // Condition variable state. How many iterations on the function left to run.
|
||||
// set to CL_INT_MAX to cause worker threads to exit. Note: this value might go negative.
|
||||
|
||||
// State that only changes when the threadpool is not working.
|
||||
volatile TPFuncPtr gFunc_ptr = NULL;
|
||||
volatile void *gUserInfo = NULL;
|
||||
volatile cl_int gJobCount = 0;
|
||||
|
||||
// State that may change while the thread pool is working
|
||||
volatile cl_int jobError = CL_SUCCESS; // err code return for the job as a whole
|
||||
|
||||
// Condition variable to park caller while waiting
|
||||
#if defined( _WIN32 )
|
||||
HANDLE caller_event;
|
||||
#else // !_WIN32
|
||||
pthread_mutex_t caller_cond_lock;
|
||||
pthread_cond_t caller_cond_var;
|
||||
#endif // !_WIN32
|
||||
volatile cl_int gRunning = 0; // # of threads intended to be running. Running threads will decrement this as they discover they've run out of work to do.
|
||||
|
||||
// The total number of threads launched.
|
||||
volatile cl_int gThreadCount = 0;
|
||||
#ifdef _WIN32
|
||||
void ThreadPool_WorkerFunc( void *p )
|
||||
#else
|
||||
void *ThreadPool_WorkerFunc( void *p )
|
||||
#endif
|
||||
{
|
||||
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
|
||||
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
||||
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
|
||||
|
||||
while( MAX_COUNT > item )
|
||||
{
|
||||
cl_int err;
|
||||
|
||||
// check for more work to do
|
||||
if( 0 >= item )
|
||||
{
|
||||
// log_info( "Thread %d has run out of work.\n", threadID );
|
||||
|
||||
// No work to do. Attempt to block waiting for work
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Worker %d unable to block waiting for work. ThreadPool_WorkerFunc failed.\n", err, threadID );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
cl_int remaining = ThreadPool_AtomicAdd( &gRunning, -1 );
|
||||
// log_info( "ThreadPool_WorkerFunc: gRunning = %d\n", remaining - 1 );
|
||||
if( 1 == remaining )
|
||||
{ // last thread out signal the main thread to wake up
|
||||
#if defined( _WIN32 )
|
||||
SetEvent( caller_event );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if( (err = pthread_cond_broadcast( &caller_cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up main thread. ThreadPool_WorkerFunc failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake caller.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
// loop in case we are woken only to discover that some other thread already did all the work
|
||||
while( 0 >= item )
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
_SleepConditionVariableCS( cond_var, cond_lock, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &cond_var, &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
|
||||
pthread_mutex_unlock( &cond_lock);
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// try again to get a valid item id
|
||||
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
if( MAX_COUNT <= item ) // exit if we are done
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
pthread_mutex_unlock( &cond_lock);
|
||||
#endif // !_WIN32
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
||||
// log_info( "Thread %d has found work.\n", threadID);
|
||||
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_unlock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for waiting for work. ThreadPool_WorkerFunc failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
}
|
||||
|
||||
// we have a valid item, so do the work
|
||||
if( CL_SUCCESS == jobError ) // but only if we haven't already encountered an error
|
||||
{
|
||||
// log_info( "Thread %d doing job %d\n", threadID, item - 1);
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
// Call the user's function with this item ID
|
||||
err = gFunc_ptr( item - 1, threadID, (void*) gUserInfo );
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
if( err )
|
||||
{
|
||||
#if (__MINGW32__)
|
||||
EnterCriticalSection(&gAtomicLock);
|
||||
if( jobError == CL_SUCCESS );
|
||||
jobError = err;
|
||||
gRunCount = 0;
|
||||
LeaveCriticalSection(&gAtomicLock);
|
||||
#elif defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
// set the new error if we are the first one there.
|
||||
__sync_val_compare_and_swap( &jobError, CL_SUCCESS, err );
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
__sync_synchronize();
|
||||
#elif defined( _MSC_VER )
|
||||
// set the new error if we are the first one there.
|
||||
_InterlockedCompareExchange( (volatile LONG*) &jobError, err, CL_SUCCESS );
|
||||
|
||||
// drop run count to 0
|
||||
gRunCount = 0;
|
||||
_mm_mfence();
|
||||
#else
|
||||
if( pthread_mutex_lock(&gAtomicLock) )
|
||||
log_error( "Atomic operation failed. pthread_mutex_lock(&gAtomicLock) returned an error\n");
|
||||
if( jobError == CL_SUCCESS );
|
||||
jobError = err;
|
||||
gRunCount = 0;
|
||||
if( pthread_mutex_unlock(&gAtomicLock) )
|
||||
log_error( "Failed to release gAtomicLock. Further atomic operations may deadlock\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// get the next item
|
||||
item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||
}
|
||||
|
||||
exit:
|
||||
log_info( "ThreadPool: thread %d exiting.\n", threadID );
|
||||
ThreadPool_AtomicAdd( &gThreadCount, -1 );
|
||||
#if !defined(_WIN32)
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||
// If the value is 0 (the default) the number of threads will be determined based on
|
||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
||||
// that we still get some testing for thread safety.
|
||||
//
|
||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
||||
// code will run single threaded, but will report an error to indicate that the test
|
||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
||||
//
|
||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
||||
// otherwise the behavior is indefined.
|
||||
void SetThreadCount( int count )
|
||||
{
|
||||
if( threadPoolInitErr == CL_SUCCESS )
|
||||
{
|
||||
log_error( "Error: It is illegal to set the thread count after the first call to ThreadPool_Do or GetThreadCount\n" );
|
||||
abort();
|
||||
}
|
||||
|
||||
gThreadCount = count;
|
||||
}
|
||||
|
||||
void ThreadPool_Init(void)
|
||||
{
|
||||
cl_int i;
|
||||
int err;
|
||||
volatile cl_uint threadID = 0;
|
||||
|
||||
// Check for manual override of multithreading code. We add this for better debuggability.
|
||||
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
|
||||
{
|
||||
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
// Figure out how many threads to run -- check first for non-zero to give the implementation the chance
|
||||
if( 0 == gThreadCount )
|
||||
{
|
||||
#if defined(_MSC_VER) || defined (__MINGW64__)
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = NULL;
|
||||
DWORD length = 0;
|
||||
|
||||
GetLogicalProcessorInformation( NULL, &length );
|
||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
|
||||
if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
||||
{
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
||||
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
|
||||
{
|
||||
if( ptr->Relationship == RelationProcessorCore )
|
||||
{
|
||||
// Count the number of bits in ProcessorMask (number of logical cores)
|
||||
ULONG mask = ptr->ProcessorMask;
|
||||
while( mask )
|
||||
{
|
||||
++gThreadCount;
|
||||
mask &= mask - 1; // Remove 1 bit at a time
|
||||
}
|
||||
}
|
||||
++ptr;
|
||||
}
|
||||
free(buffer);
|
||||
}
|
||||
#elif defined (__MINGW32__)
|
||||
{
|
||||
#warning How about this, instead of hard coding it to 2?
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo( &sysinfo );
|
||||
gThreadCount = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#else // !_WIN32
|
||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||
#endif // !_WIN32
|
||||
|
||||
// Multithreaded tests are required to run multithreaded even on unicore systems so as to test thread safety
|
||||
if( 1 == gThreadCount )
|
||||
gThreadCount = 2;
|
||||
}
|
||||
|
||||
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
|
||||
if( gThreadCount < 2 )
|
||||
{
|
||||
log_error( "ERROR: Running single threaded because thread count < 2. \n*** TEST IS INVALID! ***\n");
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined( _WIN32 )
|
||||
InitializeCriticalSection( gThreadPoolLock );
|
||||
InitializeCriticalSection( cond_lock );
|
||||
_InitializeConditionVariable( cond_var );
|
||||
caller_event = CreateEvent( NULL, FALSE, FALSE, NULL );
|
||||
#elif defined (__GNUC__)
|
||||
// Dont rely on PTHREAD_MUTEX_INITIALIZER for intialization of a mutex since it might cause problem
|
||||
// with some flavors of gcc compilers.
|
||||
pthread_cond_init(&cond_var, NULL);
|
||||
pthread_mutex_init(&cond_lock ,NULL);
|
||||
pthread_cond_init(&caller_cond_var, NULL);
|
||||
pthread_mutex_init(&caller_cond_lock, NULL);
|
||||
pthread_mutex_init(&gThreadPoolLock, NULL);
|
||||
#endif
|
||||
|
||||
#if !(defined(__GNUC__) || defined(_MSC_VER) || defined(__MINGW32__))
|
||||
pthread_mutex_initialize(gAtomicLock);
|
||||
#elif defined (__MINGW32__)
|
||||
InitializeCriticalSection(&gAtomicLock);
|
||||
#endif
|
||||
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
|
||||
// That would cause a deadlock.
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
gThreadCount = 1;
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// init threads
|
||||
for( i = 0; i < gThreadCount; i++ )
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
uintptr_t handle = _beginthread(ThreadPool_WorkerFunc, 0, (void*) &threadID);
|
||||
err = ( handle == 0 );
|
||||
#else // !_WIN32
|
||||
pthread_t tid = 0;
|
||||
err = pthread_create( &tid, NULL, ThreadPool_WorkerFunc, (void*) &threadID );
|
||||
#endif // !_WIN32
|
||||
if( err )
|
||||
{
|
||||
log_error( "Error %d launching thread %d\n", err, i );
|
||||
threadPoolInitErr = err;
|
||||
gThreadCount = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
atexit( ThreadPool_Exit );
|
||||
|
||||
// block until they are done launching.
|
||||
do
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
WaitForSingleObject( caller_event, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
pthread_mutex_unlock( &caller_cond_lock);
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
while( gRunCount != -gThreadCount );
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Init failed.\n", err );
|
||||
return;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
threadPoolInitErr = CL_SUCCESS;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContex)
|
||||
{
|
||||
ThreadPool_Init();
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
void ThreadPool_Exit(void)
|
||||
{
|
||||
int err, count;
|
||||
gRunCount = CL_INT_MAX;
|
||||
|
||||
#if defined( __GNUC__ )
|
||||
// GCC extension: http://gcc.gnu.org/onlinedocs/gcc/Atomic-Builtins.html#Atomic-Builtins
|
||||
__sync_synchronize();
|
||||
#elif defined( _MSC_VER )
|
||||
_mm_mfence();
|
||||
#else
|
||||
#warning If this is a weakly ordered memory system, please add a memory barrier here to force this and everything else to memory before we proceed
|
||||
#endif
|
||||
|
||||
// spin waiting for threads to die
|
||||
for (count = 0; 0 != gThreadCount && count < 1000; count++)
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
Sleep(1);
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_cond_broadcast( &cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Exit failed.\n", err );
|
||||
break;
|
||||
}
|
||||
usleep(1000);
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
if( gThreadCount )
|
||||
log_error( "Error: Thread pool timed out after 1 second with %d threads still active.\n", gThreadCount );
|
||||
else
|
||||
log_info( "Thread pool exited in a orderly fashion.\n" );
|
||||
}
|
||||
|
||||
|
||||
// Blocking API that farms out count jobs to a thread pool.
|
||||
// It may return with some work undone if func_ptr() returns a non-zero
|
||||
// result.
|
||||
//
|
||||
// This function obviously has its shortcommings. Only one call to ThreadPool_Do
|
||||
// can be running at a time. It is not intended for general purpose use.
|
||||
// If clEnqueueNativeKernelFn, out of order queues and a CL_DEVICE_TYPE_CPU were
|
||||
// all available then it would make more sense to use those features.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo )
|
||||
{
|
||||
cl_int newErr;
|
||||
cl_int err = 0;
|
||||
// Lazily set up our threads
|
||||
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
|
||||
err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
|
||||
#elif defined (_WIN32)
|
||||
if (threadpool_init_control == 0) {
|
||||
#warning This is buggy and race prone. Find a better way.
|
||||
ThreadPool_Init();
|
||||
threadpool_init_control = 1;
|
||||
}
|
||||
#else //posix platform
|
||||
err = pthread_once( &threadpool_init_control, ThreadPool_Init );
|
||||
if( err )
|
||||
{
|
||||
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
// Single threaded code to handle case where threadpool wasn't allocated or was disabled by environment variable
|
||||
if( threadPoolInitErr )
|
||||
{
|
||||
cl_uint currentJob = 0;
|
||||
cl_int result = CL_SUCCESS;
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
for( currentJob = 0; currentJob < count; currentJob++ )
|
||||
if((result = func_ptr( currentJob, 0, userInfo )))
|
||||
{
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore FP state before leaving
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
if( count >= MAX_COUNT )
|
||||
{
|
||||
log_error("Error: ThreadPool_Do count %d >= max threadpool count of %d\n", count, MAX_COUNT );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Enter critical region
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( gThreadPoolLock );
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_mutex_lock( &gThreadPoolLock )))
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case EDEADLK:
|
||||
log_error("Error EDEADLK returned in ThreadPool_Do(). ThreadPool_Do is not designed to work recursively!\n" );
|
||||
break;
|
||||
case EINVAL:
|
||||
log_error("Error EINVAL returned in ThreadPool_Do(). How did we end up with an invalid gThreadPoolLock?\n" );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Start modifying the job state observable by worker threads
|
||||
#if defined( _WIN32 )
|
||||
EnterCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_mutex_lock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Make sure the last thread done in the work pool doesn't signal us to wake before we get to the point where we are supposed to wait
|
||||
// That would cause a deadlock.
|
||||
#if !defined( _WIN32 )
|
||||
if((err = pthread_mutex_lock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_lock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// Prime the worker threads to get going
|
||||
jobError = CL_SUCCESS;
|
||||
gRunCount = gJobCount = count;
|
||||
gFunc_ptr = func_ptr;
|
||||
gUserInfo = userInfo;
|
||||
|
||||
#if defined( _WIN32 )
|
||||
_WakeAllConditionVariable( cond_var );
|
||||
LeaveCriticalSection( cond_lock );
|
||||
#else // !_WIN32
|
||||
if( (err = pthread_cond_broadcast( &cond_var )))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_broadcast. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
if((err = pthread_mutex_unlock( &cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to wake up work threads. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
// block until they are done. It would be slightly more efficient to do some of the work here though.
|
||||
do
|
||||
{
|
||||
#if defined( _WIN32 )
|
||||
WaitForSingleObject( caller_event, INFINITE );
|
||||
#else // !_WIN32
|
||||
if((err = pthread_cond_wait( &caller_cond_var, &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_cond_wait. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
pthread_mutex_unlock( &caller_cond_lock);
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
}
|
||||
while( gRunning );
|
||||
#if !defined(_WIN32)
|
||||
if((err = pthread_mutex_unlock( &caller_cond_lock) ))
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to block for work to finish. ThreadPool_Do failed.\n", err );
|
||||
goto exit;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
err = jobError;
|
||||
|
||||
exit:
|
||||
// exit critical region
|
||||
#if defined( _WIN32 )
|
||||
LeaveCriticalSection( gThreadPoolLock );
|
||||
#else // !_WIN32
|
||||
newErr = pthread_mutex_unlock( &gThreadPoolLock );
|
||||
if( newErr)
|
||||
{
|
||||
log_error("Error %d from pthread_mutex_unlock. Unable to exit critical region. ThreadPool_Do failed.\n", newErr );
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
cl_uint GetThreadCount( void )
|
||||
{
|
||||
// Lazily set up our threads
|
||||
#if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
|
||||
cl_int err = !_InitOnceExecuteOnce( &threadpool_init_control, _ThreadPool_Init, NULL, NULL );
|
||||
#elif defined (_WIN32)
|
||||
if (threadpool_init_control == 0) {
|
||||
#warning This is buggy and race prone. Find a better way.
|
||||
ThreadPool_Init();
|
||||
threadpool_init_control = 1;
|
||||
}
|
||||
#else
|
||||
cl_int err = pthread_once( &threadpool_init_control, ThreadPool_Init );
|
||||
if( err )
|
||||
{
|
||||
log_error("Error %d from pthread_once. Unable to init threads. ThreadPool_Do failed.\n", err );
|
||||
return err;
|
||||
}
|
||||
#endif // !_WIN32
|
||||
|
||||
if( gThreadCount < 1 )
|
||||
return 1;
|
||||
|
||||
return gThreadCount;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
|
||||
#error ThreadPool implementation has not been multithreaded for this operating system. You must multithread this section.
|
||||
#endif
|
||||
//
|
||||
// We require multithreading in parts of the test as a means of simultaneously testing reentrancy requirements
|
||||
// of OpenCL API, while also checking
|
||||
//
|
||||
// A sample single threaded implementation follows, for documentation / bootstrapping purposes.
|
||||
// It is not okay to use this for conformance testing!!!
|
||||
//
|
||||
// Exception: If your operating system does not support multithreaded execution of any kind, then you may use this code.
|
||||
//
|
||||
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b )
|
||||
{
|
||||
cl_uint r = *a;
|
||||
|
||||
// since this fallback code path is not multithreaded, we just do a regular add here
|
||||
// If your operating system supports memory-barrier-atomics, use those here
|
||||
*a = r + b;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
// Blocking API that farms out count jobs to a thread pool.
|
||||
// It may return with some work undone if func_ptr() returns a non-zero
|
||||
// result.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo )
|
||||
{
|
||||
cl_uint currentJob = 0;
|
||||
cl_int result = CL_SUCCESS;
|
||||
|
||||
#ifndef MY_OS_REALLY_REALLY_DOESNT_SUPPORT_THREADS
|
||||
// THIS FUNCTION IS NOT INTENDED FOR USE!!
|
||||
log_error( "ERROR: Test must be multithreaded!\n" );
|
||||
exit(-1);
|
||||
#else
|
||||
static int spewCount = 0;
|
||||
|
||||
if( 0 == spewCount )
|
||||
{
|
||||
log_info( "\nWARNING: The operating system is claimed not to support threads of any sort. Running single threaded.\n" );
|
||||
spewCount = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
// The multithreaded code should mimic this behavior:
|
||||
for( currentJob = 0; currentJob < count; currentJob++ )
|
||||
if((result = func_ptr( currentJob, 0, userInfo )))
|
||||
return result;
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
cl_uint GetThreadCount( void )
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
void SetThreadCount( int count )
|
||||
{
|
||||
if( count > 1 )
|
||||
log_info( "WARNING: SetThreadCount(%d) ignored\n", count );
|
||||
}
|
||||
|
||||
#endif
|
||||
76
test_common/harness/ThreadPool.h
Normal file
76
test_common/harness/ThreadPool.h
Normal file
@@ -0,0 +1,76 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef THREAD_POOL_H
|
||||
#define THREAD_POOL_H
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//
|
||||
// An atomic add operator
|
||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
|
||||
|
||||
// Your function prototype
|
||||
//
|
||||
// A function pointer to the function you want to execute in a multithreaded context. No
|
||||
// synchronization primitives are provided, other than the atomic add above. You may not
|
||||
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
|
||||
// work, however.
|
||||
//
|
||||
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
|
||||
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
|
||||
// the work is done.
|
||||
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
|
||||
|
||||
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
|
||||
// Some workitems may not run if a non-zero result is returned from func_ptr().
|
||||
// This function may not be called from a TPFuncPtr.
|
||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
||||
cl_uint count,
|
||||
void *userInfo );
|
||||
|
||||
// Returns the number of worker threads that underlie the threadpool. The value passed
|
||||
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
|
||||
// This is safe to call from a TPFuncPtr.
|
||||
cl_uint GetThreadCount( void );
|
||||
|
||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||
// If the value is 0 (the default) the number of threads will be determined based on
|
||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
||||
// that we still get some testing for thread safety.
|
||||
//
|
||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
||||
// code will run single threaded, but will report an error to indicate that the test
|
||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
||||
//
|
||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
||||
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
|
||||
void SetThreadCount( int count );
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* THREAD_POOL_H */
|
||||
253
test_common/harness/clImageHelper.h
Normal file
253
test_common/harness/clImageHelper.h
Normal file
@@ -0,0 +1,253 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef test_conformance_clImageHelper_h
|
||||
#define test_conformance_clImageHelper_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include "errorHelpers.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
// helper function to replace clCreateImage2D , to make the existing code use
|
||||
// the functions of version 1.2 and veriosn 1.1 respectively
|
||||
|
||||
inline cl_mem create_image_2d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_row_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage = NULL;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_image_desc image_desc_dest;
|
||||
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;;
|
||||
image_desc_dest.image_width = image_width;
|
||||
image_desc_dest.image_height = image_height;
|
||||
image_desc_dest.image_depth= 0;// not usedfor 2d
|
||||
image_desc_dest.image_array_size = 0;// not used for 2d
|
||||
image_desc_dest.image_row_pitch = image_row_pitch;
|
||||
image_desc_dest.image_slice_pitch = 0;
|
||||
image_desc_dest.num_mip_levels = 0;
|
||||
image_desc_dest.num_samples = 0;
|
||||
image_desc_dest.buffer = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
||||
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
#else
|
||||
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_3d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_depth,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = image_height;
|
||||
image_desc.image_depth = image_depth;
|
||||
image_desc.image_array_size = 0;// not used for one image
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
#else
|
||||
mImage = clCreateImage3D( context,
|
||||
flags, image_format,
|
||||
image_width,
|
||||
image_height,
|
||||
image_depth,
|
||||
image_row_pitch,
|
||||
image_slice_pitch,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_2d_array (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_height,
|
||||
size_t image_array_size,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = image_height;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_array_size = image_array_size;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = NULL;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_1d_array (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_array_size,
|
||||
size_t image_row_pitch,
|
||||
size_t image_slice_pitch,
|
||||
void *host_ptr,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_array_size = image_array_size;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = image_slice_pitch;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = NULL;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
inline cl_mem create_image_1d (cl_context context,
|
||||
cl_mem_flags flags,
|
||||
const cl_image_format *image_format,
|
||||
size_t image_width,
|
||||
size_t image_row_pitch,
|
||||
void *host_ptr,
|
||||
cl_mem buffer,
|
||||
cl_int *errcode_ret)
|
||||
{
|
||||
cl_mem mImage;
|
||||
|
||||
cl_image_desc image_desc;
|
||||
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
|
||||
image_desc.image_width = image_width;
|
||||
image_desc.image_height = 1;
|
||||
image_desc.image_depth = 1;
|
||||
image_desc.image_row_pitch = image_row_pitch;
|
||||
image_desc.image_slice_pitch = 0;
|
||||
image_desc.num_mip_levels = 0;
|
||||
image_desc.num_samples = 0;
|
||||
image_desc.buffer = buffer;
|
||||
mImage = clCreateImage( context,
|
||||
flags,
|
||||
image_format,
|
||||
&image_desc,
|
||||
host_ptr,
|
||||
errcode_ret );
|
||||
if (errcode_ret && (*errcode_ret)) {
|
||||
// Log an info message and rely on the calling function to produce an error
|
||||
// if necessary.
|
||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||
}
|
||||
|
||||
return mImage;
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
200
test_common/harness/compat.h
Normal file
200
test_common/harness/compat.h
Normal file
@@ -0,0 +1,200 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _COMPAT_H_
|
||||
#define _COMPAT_H_
|
||||
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
|
||||
#include <Windows.h>
|
||||
#include <Winbase.h>
|
||||
#include <CL/cl.h>
|
||||
#include <float.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
||||
|
||||
#define isfinite(x) _finite(x)
|
||||
|
||||
#if !defined(__cplusplus)
|
||||
typedef char bool;
|
||||
#define inline
|
||||
|
||||
#else
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef char int8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned long long uint64_t;
|
||||
typedef long long int64_t;
|
||||
|
||||
#define MAXPATHLEN MAX_PATH
|
||||
|
||||
typedef unsigned short ushort;
|
||||
typedef unsigned int uint;
|
||||
typedef unsigned long ulong;
|
||||
|
||||
|
||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||
//#define NAN (INFINITY | 1)
|
||||
//const static int PINFBITPATT_SP32 = INFINITY;
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846264338327950288
|
||||
#endif
|
||||
|
||||
|
||||
#define isnan( x ) ((x) != (x))
|
||||
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
||||
|
||||
double rint( double x);
|
||||
float rintf( float x);
|
||||
long double rintl( long double x);
|
||||
|
||||
float cbrtf( float );
|
||||
double cbrt( double );
|
||||
|
||||
int ilogb( double x);
|
||||
int ilogbf (float x);
|
||||
int ilogbl(long double x);
|
||||
|
||||
double fmax(double x, double y);
|
||||
double fmin(double x, double y);
|
||||
float fmaxf( float x, float y );
|
||||
float fminf(float x, float y);
|
||||
|
||||
double log2(double x);
|
||||
long double log2l(long double x);
|
||||
|
||||
double exp2(double x);
|
||||
long double exp2l(long double x);
|
||||
|
||||
double fdim(double x, double y);
|
||||
float fdimf(float x, float y);
|
||||
long double fdiml(long double x, long double y);
|
||||
|
||||
double remquo( double x, double y, int *quo);
|
||||
float remquof( float x, float y, int *quo);
|
||||
long double remquol( long double x, long double y, int *quo);
|
||||
|
||||
long double scalblnl(long double x, long n);
|
||||
|
||||
inline long long
|
||||
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||
|
||||
|
||||
// end of math functions
|
||||
|
||||
uint64_t ReadTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
|
||||
#define sleep(X) Sleep(1000*X)
|
||||
#define snprintf sprintf_s
|
||||
//#define hypotl _hypot
|
||||
|
||||
float make_nan();
|
||||
float nanf( const char* str);
|
||||
double nan( const char* str);
|
||||
long double nanl( const char* str);
|
||||
|
||||
//#if defined USE_BOOST
|
||||
//#include <boost/math/tr1.hpp>
|
||||
//double hypot(double x, double y);
|
||||
float hypotf(float x, float y);
|
||||
long double hypotl(long double x, long double y) ;
|
||||
double lgamma(double x);
|
||||
float lgammaf(float x);
|
||||
|
||||
double trunc(double x);
|
||||
float truncf(float x);
|
||||
|
||||
double log1p(double x);
|
||||
float log1pf(float x);
|
||||
long double log1pl(long double x);
|
||||
|
||||
double copysign(double x, double y);
|
||||
float copysignf(float x, float y);
|
||||
long double copysignl(long double x, long double y);
|
||||
|
||||
long lround(double x);
|
||||
long lroundf(float x);
|
||||
//long lroundl(long double x)
|
||||
|
||||
double round(double x);
|
||||
float roundf(float x);
|
||||
long double roundl(long double x);
|
||||
|
||||
int signbit(double x);
|
||||
int signbitf(float x);
|
||||
|
||||
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); }
|
||||
//#endif // USE_BOOST
|
||||
|
||||
long int lrint (double flt);
|
||||
long int lrintf (float flt);
|
||||
|
||||
|
||||
float int2float (int32_t ix);
|
||||
int32_t float2int (float fx);
|
||||
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
int __builtin_clz(unsigned int pattern);
|
||||
|
||||
|
||||
static const double zero= 0.00000000000000000000e+00;
|
||||
#define NAN (INFINITY - INFINITY)
|
||||
#define HUGE_VALF (float)HUGE_VAL
|
||||
|
||||
int usleep(int usec);
|
||||
|
||||
// reimplement fenv.h because windows doesn't have it
|
||||
#define FE_INEXACT 0x0020
|
||||
#define FE_UNDERFLOW 0x0010
|
||||
#define FE_OVERFLOW 0x0008
|
||||
#define FE_DIVBYZERO 0x0004
|
||||
#define FE_INVALID 0x0001
|
||||
#define FE_ALL_EXCEPT 0x003D
|
||||
|
||||
int fetestexcept(int excepts);
|
||||
int feclearexcept(int excepts);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#else // !((defined(_WIN32) && defined(_MSC_VER)
|
||||
#if defined(__MINGW32__)
|
||||
#include <windows.h>
|
||||
#define sleep(X) Sleep(1000*X)
|
||||
|
||||
#endif
|
||||
#define MAKE_HEX_FLOAT(x,y,z) x
|
||||
#define MAKE_HEX_DOUBLE(x,y,z) x
|
||||
#define MAKE_HEX_LONG(x,y,z) x
|
||||
|
||||
#endif // !((defined(_WIN32) && defined(_MSC_VER)
|
||||
|
||||
|
||||
#endif // _COMPAT_H_
|
||||
1198
test_common/harness/conversions.c
Normal file
1198
test_common/harness/conversions.c
Normal file
File diff suppressed because it is too large
Load Diff
127
test_common/harness/conversions.h
Normal file
127
test_common/harness/conversions.h
Normal file
@@ -0,0 +1,127 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _conversions_h
|
||||
#define _conversions_h
|
||||
|
||||
#include "errorHelpers.h"
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include "compat.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Note: the next three all have to match in size and order!! */
|
||||
|
||||
enum ExplicitTypes
|
||||
{
|
||||
kBool = 0,
|
||||
kChar,
|
||||
kUChar,
|
||||
kUnsignedChar,
|
||||
kShort,
|
||||
kUShort,
|
||||
kUnsignedShort,
|
||||
kInt,
|
||||
kUInt,
|
||||
kUnsignedInt,
|
||||
kLong,
|
||||
kULong,
|
||||
kUnsignedLong,
|
||||
kFloat,
|
||||
kHalf,
|
||||
kDouble,
|
||||
kNumExplicitTypes
|
||||
};
|
||||
|
||||
typedef enum ExplicitTypes ExplicitType;
|
||||
|
||||
enum RoundingTypes
|
||||
{
|
||||
kRoundToEven = 0,
|
||||
kRoundToZero,
|
||||
kRoundToPosInf,
|
||||
kRoundToNegInf,
|
||||
kRoundToNearest,
|
||||
|
||||
kNumRoundingTypes,
|
||||
|
||||
kDefaultRoundingType = kRoundToNearest
|
||||
};
|
||||
|
||||
typedef enum RoundingTypes RoundingType;
|
||||
|
||||
extern void print_type_to_string(ExplicitType type, void *data, char* string);
|
||||
extern size_t get_explicit_type_size( ExplicitType type );
|
||||
extern const char * get_explicit_type_name( ExplicitType type );
|
||||
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
|
||||
|
||||
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
|
||||
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
|
||||
|
||||
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
|
||||
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
|
||||
extern float read_as_float( void *inRaw, ExplicitType inType );
|
||||
|
||||
extern float get_random_float(float low, float high, MTdata d);
|
||||
extern double get_random_double(double low, double high, MTdata d);
|
||||
extern float any_float( MTdata d );
|
||||
extern double any_double( MTdata d );
|
||||
|
||||
extern int random_in_range( int minV, int maxV, MTdata d );
|
||||
|
||||
size_t get_random_size_t(size_t low, size_t high, MTdata d);
|
||||
|
||||
// Note: though this takes a double, this is for use with single precision tests
|
||||
static inline int IsFloatSubnormal( float x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf(x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int IsDoubleSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs( x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _conversions_h
|
||||
|
||||
|
||||
585
test_common/harness/errorHelpers.c
Normal file
585
test_common/harness/errorHelpers.c
Normal file
@@ -0,0 +1,585 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "errorHelpers.h"
|
||||
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
const char *IGetErrorString( int clErrorCode )
|
||||
{
|
||||
switch( clErrorCode )
|
||||
{
|
||||
case CL_SUCCESS: return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
|
||||
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
|
||||
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE: return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY: return "CL_INVALID_PROPERTY";
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR: return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case CL_INVALID_COMPILER_OPTIONS: return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case CL_INVALID_LINKER_OPTIONS: return "CL_INVALID_LINKER_OPTIONS";
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
default: return "(unknown)";
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetChannelOrderName( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R: return "CL_R";
|
||||
case CL_A: return "CL_A";
|
||||
case CL_Rx: return "CL_Rx";
|
||||
case CL_RG: return "CL_RG";
|
||||
case CL_RA: return "CL_RA";
|
||||
case CL_RGx: return "CL_RGx";
|
||||
case CL_RGB: return "CL_RGB";
|
||||
case CL_RGBx: return "CL_RGBx";
|
||||
case CL_RGBA: return "CL_RGBA";
|
||||
case CL_ARGB: return "CL_ARGB";
|
||||
case CL_BGRA: return "CL_BGRA";
|
||||
case CL_INTENSITY: return "CL_INTENSITY";
|
||||
case CL_LUMINANCE: return "CL_LUMINANCE";
|
||||
#if defined CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE: return "CL_1RGB_APPLE";
|
||||
#endif
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE: return "CL_BGR1_APPLE";
|
||||
#endif
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int IsChannelOrderSupported( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
return 1;
|
||||
#if defined CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
#if defined CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetChannelTypeName( cl_channel_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_SNORM_INT8: return "CL_SNORM_INT8";
|
||||
case CL_SNORM_INT16: return "CL_SNORM_INT16";
|
||||
case CL_UNORM_INT8: return "CL_UNORM_INT8";
|
||||
case CL_UNORM_INT16: return "CL_UNORM_INT16";
|
||||
case CL_UNORM_SHORT_565: return "CL_UNORM_SHORT_565";
|
||||
case CL_UNORM_SHORT_555: return "CL_UNORM_SHORT_555";
|
||||
case CL_UNORM_INT_101010: return "CL_UNORM_INT_101010";
|
||||
case CL_SIGNED_INT8: return "CL_SIGNED_INT8";
|
||||
case CL_SIGNED_INT16: return "CL_SIGNED_INT16";
|
||||
case CL_SIGNED_INT32: return "CL_SIGNED_INT32";
|
||||
case CL_UNSIGNED_INT8: return "CL_UNSIGNED_INT8";
|
||||
case CL_UNSIGNED_INT16: return "CL_UNSIGNED_INT16";
|
||||
case CL_UNSIGNED_INT32: return "CL_UNSIGNED_INT32";
|
||||
case CL_HALF_FLOAT: return "CL_HALF_FLOAT";
|
||||
case CL_FLOAT: return "CL_FLOAT";
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
|
||||
#endif
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int IsChannelTypeSupported( cl_channel_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
case CL_UNORM_INT_101010:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT8:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
return 1;
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
return 1;
|
||||
#endif
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetAddressModeName( cl_addressing_mode mode )
|
||||
{
|
||||
switch( mode )
|
||||
{
|
||||
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
||||
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
||||
case CL_ADDRESS_CLAMP: return "CL_ADDRESS_CLAMP";
|
||||
case CL_ADDRESS_REPEAT: return "CL_ADDRESS_REPEAT";
|
||||
case CL_ADDRESS_MIRRORED_REPEAT: return "CL_ADDRESS_MIRRORED_REPEAT";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetDeviceTypeName( cl_device_type type )
|
||||
{
|
||||
switch( type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
||||
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: return "CL_DEVICE_TYPE_ACCELERATOR";
|
||||
case CL_DEVICE_TYPE_ALL: return "CL_DEVICE_TYPE_ALL";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
|
||||
{
|
||||
static char scratch[ 1024 ];
|
||||
size_t i, j;
|
||||
|
||||
if( buffer == NULL )
|
||||
buffer = scratch;
|
||||
|
||||
unsigned char *p = (unsigned char *)dataBuffer;
|
||||
char *bPtr;
|
||||
|
||||
buffer[ 0 ] = 0;
|
||||
bPtr = buffer;
|
||||
for( i = 0; i < vecSize; i++ )
|
||||
{
|
||||
if( i > 0 )
|
||||
{
|
||||
bPtr[ 0 ] = ' ';
|
||||
bPtr++;
|
||||
}
|
||||
for( j = 0; j < typeSize; j++ )
|
||||
{
|
||||
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
|
||||
bPtr += 2;
|
||||
}
|
||||
p += typeSize;
|
||||
}
|
||||
bPtr[ 0 ] = 0;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
#define scalbnf(_a, _i ) ldexpf( _a, _i )
|
||||
#define scalbn(_a, _i ) ldexp( _a, _i )
|
||||
#define scalbnl(_a, _i ) ldexpl( _a, _i )
|
||||
#endif
|
||||
|
||||
static float Ulp_Error_Half_Float( float test, double reference );
|
||||
static inline float half2float( cl_ushort half );
|
||||
|
||||
// taken from math tests
|
||||
#define HALF_MIN_EXP -13
|
||||
#define HALF_MANT_DIG 11
|
||||
static float Ulp_Error_Half_Float( float test, double reference )
|
||||
{
|
||||
union{ double d; uint64_t u; }u; u.d = reference;
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
double testVal = test;
|
||||
if( u.u & 0x000fffffffffffffULL )
|
||||
{ // Non-power of two and NaN
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( (double) test == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) (testVal - reference );
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
// Taken from vLoadHalf test
|
||||
static inline float half2float( cl_ushort us )
|
||||
{
|
||||
uint32_t u = us;
|
||||
uint32_t sign = (u << 16) & 0x80000000;
|
||||
int32_t exponent = (u & 0x7c00) >> 10;
|
||||
uint32_t mantissa = (u & 0x03ff) << 13;
|
||||
union{ unsigned int u; float f;}uu;
|
||||
|
||||
if( exponent == 0 )
|
||||
{
|
||||
if( mantissa == 0 )
|
||||
return sign ? -0.0f : 0.0f;
|
||||
|
||||
int shift = __builtin_clz( mantissa ) - 8;
|
||||
exponent -= shift-1;
|
||||
mantissa <<= shift;
|
||||
mantissa &= 0x007fffff;
|
||||
}
|
||||
else
|
||||
if( exponent == 31)
|
||||
{
|
||||
uu.u = mantissa | sign;
|
||||
if( mantissa )
|
||||
uu.u |= 0x7fc00000;
|
||||
else
|
||||
uu.u |= 0x7f800000;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
exponent += 127 - 15;
|
||||
exponent <<= 23;
|
||||
|
||||
exponent |= mantissa;
|
||||
uu.u = exponent | sign;
|
||||
|
||||
return uu.f;
|
||||
}
|
||||
|
||||
float Ulp_Error_Half( cl_ushort test, float reference )
|
||||
{
|
||||
return Ulp_Error_Half_Float( half2float(test), reference );
|
||||
}
|
||||
|
||||
|
||||
float Ulp_Error( float test, double reference )
|
||||
{
|
||||
union{ double d; uint64_t u; }u; u.d = reference;
|
||||
double testVal = test;
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( testVal == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) (testVal - reference );
|
||||
}
|
||||
|
||||
if( isinf( testVal) )
|
||||
{ // infinite test value, but finite (but possibly overflowing in float) reference.
|
||||
//
|
||||
// The function probably overflowed prematurely here. Formally, the spec says this is
|
||||
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
|
||||
// that the internal precision of some half_pow implementations would have to be 29+ bits
|
||||
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
|
||||
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
|
||||
// after rounding to single is 4*32 = 128, which will ultimately result in premature
|
||||
// overflow, even though a good faith representation would be correct to within 2**-29
|
||||
// interally.
|
||||
|
||||
// In the interest of not requiring the implementation go to extraordinary lengths to
|
||||
// deliver a half precision function, we allow premature overflow within the limit
|
||||
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
|
||||
// 2**128, the next value that would appear in the number line if float had sufficient range.
|
||||
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
|
||||
|
||||
// Note that the same hack may not work in long double, which is not guaranteed to have
|
||||
// more range than double. It is not clear that premature overflow should be tolerated for
|
||||
// double.
|
||||
}
|
||||
|
||||
if( u.u & 0x000fffffffffffffULL )
|
||||
{ // Non-power of two and NaN
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
return (float) scalbn( testVal - reference, ulp_exp );
|
||||
}
|
||||
|
||||
float Ulp_Error_Double( double test, long double reference )
|
||||
{
|
||||
// Deal with long double = double
|
||||
// On most systems long double is a higher precision type than double. They provide either
|
||||
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
|
||||
// That is sufficient to represent the accuracy of a floating point result to many more bits
|
||||
// than double and we can calculate sub-ulp errors. This is the standard system for which this
|
||||
// test suite is designed.
|
||||
//
|
||||
// On some systems double and long double are the same thing. Then we run into a problem,
|
||||
// because our representation of the infinitely precise result (passed in as reference above)
|
||||
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
|
||||
// reported error by half an ulp to take this into account. A more correct and permanent fix
|
||||
// would be to undertake refactoring the reference code to return results in this format:
|
||||
//
|
||||
// typedef struct DoubleReference
|
||||
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
|
||||
// double correctlyRoundedResult; // as best we can
|
||||
// double ulps; // plus a fractional amount to account for the difference
|
||||
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
|
||||
//
|
||||
// This would provide a useful higher-than-double precision format for everyone that we can use,
|
||||
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
||||
// that use a head to tail double double for long double.
|
||||
|
||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
||||
// rounded before calling this function. That test:
|
||||
//
|
||||
// if( (float) reference == test )
|
||||
// return 0.0f;
|
||||
//
|
||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
||||
// results.
|
||||
|
||||
|
||||
int x;
|
||||
long double testVal = test;
|
||||
if( 0.5L != frexpl( reference, &x) )
|
||||
{ // Non-power of two and NaN
|
||||
if( isinf( reference ) )
|
||||
{
|
||||
if( testVal == reference )
|
||||
return 0.0f;
|
||||
|
||||
return (float) ( testVal - reference );
|
||||
}
|
||||
|
||||
if( isnan( reference ) && isnan( test ) )
|
||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
||||
|
||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
// reference is a normal power of two or a zero
|
||||
// The unbiased exponent of the ulp unit place
|
||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
|
||||
|
||||
// Scale the exponent of the error
|
||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
||||
|
||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
||||
if( sizeof(long double) == sizeof( double ) )
|
||||
result += copysignf( 0.5f, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
|
||||
{
|
||||
int error;
|
||||
size_t size_ret;
|
||||
|
||||
// Does the program object exist?
|
||||
if (program != NULL) {
|
||||
|
||||
// Was the number of devices given
|
||||
if (num_devices == 0) {
|
||||
|
||||
// If zero devices were specified then allocate and query the device list from the context
|
||||
cl_context context;
|
||||
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
|
||||
test_error( error, "Unable to query program's context" );
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
|
||||
test_error( error, "Unable to query context's device size" );
|
||||
num_devices = size_ret / sizeof(cl_device_id);
|
||||
device_list = (cl_device_id *) malloc(size_ret);
|
||||
if (device_list == NULL) {
|
||||
print_error( error, "malloc failed" );
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
|
||||
test_error( error, "Unable to query context's devices" );
|
||||
|
||||
}
|
||||
|
||||
// For each device in the device_list
|
||||
unsigned int i;
|
||||
for (i = 0; i < num_devices; i++) {
|
||||
|
||||
// Get the build status
|
||||
cl_build_status build_status;
|
||||
error = clGetProgramBuildInfo(program,
|
||||
device_list[i],
|
||||
CL_PROGRAM_BUILD_STATUS,
|
||||
sizeof(build_status),
|
||||
&build_status,
|
||||
&size_ret);
|
||||
test_error( error, "Unable to query build status" );
|
||||
|
||||
// If the build failed then log the status, and allocate the build log, log it and free it
|
||||
if (build_status != CL_BUILD_SUCCESS) {
|
||||
|
||||
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
|
||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
|
||||
test_error( error, "Unable to query build log size" );
|
||||
char *build_log = (char *) malloc(size_ret);
|
||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
|
||||
test_error( error, "Unable to query build log" );
|
||||
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
|
||||
free(build_log);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Was the number of devices given
|
||||
if (num_devices == 0) {
|
||||
|
||||
// If zero devices were specified then free the device list
|
||||
free(device_list);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
149
test_common/harness/errorHelpers.h
Normal file
149
test_common/harness/errorHelpers.h
Normal file
@@ -0,0 +1,149 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _errorHelpers_h
|
||||
#define _errorHelpers_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
|
||||
// If USE_ATF is defined, all log_error and log_info calls can be routed to test library
|
||||
// functions as described below. This is helpful for integration into an automated testing
|
||||
// system.
|
||||
#if USE_ATF
|
||||
// export BUILD_WITH_ATF=1
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define log_info ATFLogInfo
|
||||
#define log_error ATFLogError
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format, ##__VA_ARGS__)
|
||||
#define test_finish() ATFTestFinish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) ATFLogPerformanceNumber(_number, _higherBetter, _numType, _format,##__VA_ARGS__)
|
||||
#define vlog ATFLogInfo
|
||||
#define vlog_error ATFLogError
|
||||
#else
|
||||
#define test_start()
|
||||
#define log_info printf
|
||||
#define log_error printf
|
||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better", _number )
|
||||
#define test_finish()
|
||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
||||
_higherBetter?"higher is better":"lower is better" , _number)
|
||||
#ifdef _WIN32
|
||||
#ifdef __MINGW32__
|
||||
// Use __mingw_printf since it supports "%a" format specifier
|
||||
#define vlog __mingw_printf
|
||||
#define vlog_error __mingw_printf
|
||||
#else
|
||||
// Use home-baked function that treats "%a" as "%f"
|
||||
static int vlog_win32(const char *format, ...);
|
||||
#define vlog vlog_win32
|
||||
#define vlog_error vlog_win32
|
||||
#endif
|
||||
#else
|
||||
#define vlog_error printf
|
||||
#define vlog printf
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define ct_assert(b) ct_assert_i(b, __LINE__)
|
||||
#define ct_assert_i(b, line) ct_assert_ii(b, line)
|
||||
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
|
||||
|
||||
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
|
||||
#define test_error_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return retValue ; } }
|
||||
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
|
||||
|
||||
// expected error code vs. what we got
|
||||
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
|
||||
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
||||
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
|
||||
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
||||
|
||||
extern const char *IGetErrorString( int clErrorCode );
|
||||
|
||||
extern float Ulp_Error_Half( cl_ushort test, float reference );
|
||||
extern float Ulp_Error( float test, double reference );
|
||||
extern float Ulp_Error_Double( double test, long double reference );
|
||||
|
||||
extern const char *GetChannelTypeName( cl_channel_type type );
|
||||
extern int IsChannelTypeSupported( cl_channel_type type );
|
||||
extern const char *GetChannelOrderName( cl_channel_order order );
|
||||
extern int IsChannelOrderSupported( cl_channel_order order );
|
||||
extern const char *GetAddressModeName( cl_addressing_mode mode );
|
||||
|
||||
extern const char *GetDeviceTypeName( cl_device_type type );
|
||||
|
||||
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
|
||||
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
|
||||
|
||||
#if defined (_WIN32) && !defined(__MINGW32__)
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static int vlog_win32(const char *format, ...)
|
||||
{
|
||||
const char *new_format = format;
|
||||
|
||||
if (strstr(format, "%a")) {
|
||||
char *temp;
|
||||
if ((temp = strdup(format)) == NULL) {
|
||||
printf("vlog_win32: Failed to allocate memory for strdup\n");
|
||||
return -1;
|
||||
}
|
||||
new_format = temp;
|
||||
while (*temp) {
|
||||
// replace %a with %f
|
||||
if ((*temp == '%') && (*(temp+1) == 'a')) {
|
||||
*(temp+1) = 'f';
|
||||
}
|
||||
temp++;
|
||||
}
|
||||
}
|
||||
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
vprintf(new_format, args);
|
||||
va_end(args);
|
||||
|
||||
if (new_format != format) {
|
||||
free((void*)new_format);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _errorHelpers_h
|
||||
|
||||
|
||||
89
test_common/harness/fpcontrol.h
Normal file
89
test_common/harness/fpcontrol.h
Normal file
@@ -0,0 +1,89 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _fpcontrol_h
|
||||
#define _fpcontrol_h
|
||||
|
||||
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
|
||||
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
|
||||
// in integer code, and have found this is the only way to correctly verify operation.
|
||||
//
|
||||
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
|
||||
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
|
||||
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
||||
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
||||
typedef int FPU_mode_type;
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
extern __thread fpu_control_t fpu_control;
|
||||
#endif
|
||||
// Set the reference hardware floating point unit to FTZ mode
|
||||
static inline void ForceFTZ( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr( *mode | 0x8040);
|
||||
#elif defined( __PPC__ )
|
||||
*mode = fpu_control;
|
||||
fpu_control |= _FPU_MASK_NI;
|
||||
#elif defined ( __arm__ )
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
||||
#else
|
||||
#error ForceFTZ needs an implentation
|
||||
#endif
|
||||
}
|
||||
|
||||
// Disable the denorm flush to zero
|
||||
static inline void DisableFTZ( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
*mode = _mm_getcsr();
|
||||
_mm_setcsr( *mode & ~0x8040);
|
||||
#elif defined( __PPC__ )
|
||||
*mode = fpu_control;
|
||||
fpu_control &= ~_FPU_MASK_NI;
|
||||
#elif defined ( __arm__ )
|
||||
unsigned fpscr;
|
||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||
*mode = fpscr;
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
||||
#else
|
||||
#error DisableFTZ needs an implentation
|
||||
#endif
|
||||
}
|
||||
|
||||
// Restore the reference hardware to floating point state indicated by *mode
|
||||
static inline void RestoreFPState( FPU_mode_type *mode )
|
||||
{
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
||||
_mm_setcsr( *mode );
|
||||
#elif defined( __PPC__)
|
||||
fpu_control = *mode;
|
||||
#elif defined (__arm__)
|
||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
||||
#else
|
||||
#error RestoreFPState needs an implementation
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
#error ForceFTZ and RestoreFPState need implentations
|
||||
#endif
|
||||
|
||||
#endif
|
||||
53
test_common/harness/genericThread.cpp
Normal file
53
test_common/harness/genericThread.cpp
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "genericThread.h"
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
#else // !_WIN32
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
void * genericThread::IStaticReflector( void * data )
|
||||
{
|
||||
genericThread *t = (genericThread *)data;
|
||||
return t->IRun();
|
||||
}
|
||||
|
||||
bool genericThread::Start( void )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
|
||||
return ( mHandle != NULL );
|
||||
#else // !_WIN32
|
||||
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
|
||||
return ( error == 0 );
|
||||
#endif // !_WIN32
|
||||
}
|
||||
|
||||
void * genericThread::Join( void )
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
|
||||
return NULL;
|
||||
#else // !_WIN32
|
||||
void * retVal;
|
||||
int error = pthread_join( (pthread_t)mHandle, &retVal );
|
||||
if( error != 0 )
|
||||
retVal = NULL;
|
||||
return retVal;
|
||||
#endif // !_WIN32
|
||||
}
|
||||
42
test_common/harness/genericThread.h
Normal file
42
test_common/harness/genericThread.h
Normal file
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _genericThread_h
|
||||
#define _genericThread_h
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
class genericThread
|
||||
{
|
||||
public:
|
||||
|
||||
virtual ~genericThread() {}
|
||||
|
||||
bool Start( void );
|
||||
void * Join( void );
|
||||
|
||||
protected:
|
||||
|
||||
virtual void * IRun( void ) = 0;
|
||||
|
||||
private:
|
||||
|
||||
void* mHandle;
|
||||
|
||||
static void * IStaticReflector( void * data );
|
||||
};
|
||||
|
||||
#endif // _genericThread_h
|
||||
|
||||
249
test_common/harness/imageHelpers.cpp
Normal file
249
test_common/harness/imageHelpers.cpp
Normal file
@@ -0,0 +1,249 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "imageHelpers.h"
|
||||
|
||||
size_t get_format_type_size( const cl_image_format *format )
|
||||
{
|
||||
return get_channel_data_type_size( format->image_channel_data_type );
|
||||
}
|
||||
|
||||
size_t get_channel_data_type_size( cl_channel_type channelType )
|
||||
{
|
||||
switch( channelType )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return 1;
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return sizeof( cl_short );
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
return sizeof( cl_int );
|
||||
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_SHORT_565_REV:
|
||||
case CL_UNORM_SHORT_555_REV:
|
||||
#endif
|
||||
return 2;
|
||||
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_8888:
|
||||
case CL_UNORM_INT_8888_REV:
|
||||
return 4;
|
||||
#endif
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_101010_REV:
|
||||
#endif
|
||||
return 4;
|
||||
|
||||
case CL_FLOAT:
|
||||
return sizeof( cl_float );
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_format_channel_count( const cl_image_format *format )
|
||||
{
|
||||
return get_channel_order_channel_count( format->image_channel_order );
|
||||
}
|
||||
|
||||
size_t get_channel_order_channel_count( cl_channel_order order )
|
||||
{
|
||||
switch( order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
return 1;
|
||||
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
return 2;
|
||||
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
return 3;
|
||||
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
#ifdef CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
#endif
|
||||
#ifdef CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
#endif
|
||||
return 4;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int is_format_signed( const cl_image_format *format )
|
||||
{
|
||||
switch( format->image_channel_data_type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_SNORM_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_FLOAT:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return 1;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t get_pixel_size( cl_image_format *format )
|
||||
{
|
||||
switch( format->image_channel_data_type )
|
||||
{
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return get_format_channel_count( format );
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return get_format_channel_count( format ) * sizeof( cl_ushort );
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
return get_format_channel_count( format ) * sizeof( cl_int );
|
||||
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_SHORT_565_REV:
|
||||
case CL_UNORM_SHORT_555_REV:
|
||||
#endif
|
||||
return 2;
|
||||
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_8888:
|
||||
case CL_UNORM_INT_8888_REV:
|
||||
return 4;
|
||||
#endif
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
#ifdef OBSOLETE_FORAMT
|
||||
case CL_UNORM_INT_101010_REV:
|
||||
#endif
|
||||
return 4;
|
||||
|
||||
case CL_FLOAT:
|
||||
return get_format_channel_count( format ) * sizeof( cl_float );
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
|
||||
{
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Make sure each image format is supported */
|
||||
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
|
||||
return error;
|
||||
|
||||
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
|
||||
{
|
||||
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
|
||||
{
|
||||
*outFormat = formatList[ i ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
|
||||
{
|
||||
cl_image_format formatList[ 128 ];
|
||||
unsigned int outFormatCount, i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Make sure each image format is supported */
|
||||
if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
|
||||
return error;
|
||||
|
||||
/* Look for one that is an 8-bit format */
|
||||
for( i = 0; i < outFormatCount; i++ )
|
||||
{
|
||||
if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
|
||||
formatList[ i ].image_channel_data_type == CL_FLOAT ||
|
||||
formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
|
||||
formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
|
||||
{
|
||||
if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
|
||||
{
|
||||
*outFormat = formatList[ i ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
37
test_common/harness/imageHelpers.h
Normal file
37
test_common/harness/imageHelpers.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _imageHelpers_h
|
||||
#define _imageHelpers_h
|
||||
|
||||
#include "errorHelpers.h"
|
||||
|
||||
|
||||
extern size_t get_format_type_size( const cl_image_format *format );
|
||||
extern size_t get_channel_data_type_size( cl_channel_type channelType );
|
||||
extern size_t get_format_channel_count( const cl_image_format *format );
|
||||
extern size_t get_channel_order_channel_count( cl_channel_order order );
|
||||
extern int is_format_signed( const cl_image_format *format );
|
||||
extern size_t get_pixel_size( cl_image_format *format );
|
||||
|
||||
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
|
||||
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
||||
|
||||
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
|
||||
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
||||
|
||||
|
||||
#endif // _imageHelpers_h
|
||||
|
||||
684
test_common/harness/kernelHelpers.c
Normal file
684
test_common/harness/kernelHelpers.c
Normal file
@@ -0,0 +1,684 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "imageHelpers.h"
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
#include "mingw_compat.h"
|
||||
#endif
|
||||
|
||||
int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName )
|
||||
{
|
||||
int error = CL_SUCCESS;
|
||||
|
||||
/* Create the program object from source */
|
||||
*outProgram = clCreateProgramWithSource( context, numKernelLines, kernelProgram, NULL, &error );
|
||||
if( *outProgram == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "clCreateProgramWithSource failed" );
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
int buildProgramFailed = 0;
|
||||
int printedSource = 0;
|
||||
error = clBuildProgram( *outProgram, 0, NULL, NULL, NULL, NULL );
|
||||
if (error != CL_SUCCESS)
|
||||
{
|
||||
unsigned int i;
|
||||
print_error(error, "clBuildProgram failed");
|
||||
buildProgramFailed = 1;
|
||||
printedSource = 1;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
}
|
||||
|
||||
// Verify the build status on all devices
|
||||
cl_uint deviceCount = 0;
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
||||
return error;
|
||||
}
|
||||
|
||||
if (deviceCount == 0) {
|
||||
log_error("No devices found for program.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_id *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
|
||||
if( NULL == devices )
|
||||
return -1;
|
||||
memset( devices, 0, deviceCount * sizeof( cl_device_id ));
|
||||
error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
|
||||
cl_uint z;
|
||||
for( z = 0; z < deviceCount; z++ )
|
||||
{
|
||||
char deviceName[4096] = "";
|
||||
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
|
||||
if (error != CL_SUCCESS || deviceName[0] == '\0') {
|
||||
log_error("Device \"%d\" failed to return a name\n", z);
|
||||
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
||||
}
|
||||
|
||||
cl_build_status buildStatus;
|
||||
error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
|
||||
if (error != CL_SUCCESS) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
|
||||
char log[10240] = "";
|
||||
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
|
||||
|
||||
char statusString[64] = "";
|
||||
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
||||
sprintf(statusString, "CL_BUILD_SUCCESS");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
||||
sprintf(statusString, "CL_BUILD_NONE");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
||||
sprintf(statusString, "CL_BUILD_ERROR");
|
||||
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
||||
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
||||
else
|
||||
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
||||
|
||||
if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
|
||||
error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
|
||||
if (error != CL_SUCCESS || log[0]=='\0'){
|
||||
log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
|
||||
if (error) {
|
||||
print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
||||
free( devices );
|
||||
return error;
|
||||
} else {
|
||||
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
||||
free( devices );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// In this case we've already printed out the code above.
|
||||
if (!printedSource)
|
||||
{
|
||||
unsigned int i;
|
||||
log_error( "Original source is: ------------\n" );
|
||||
for( i = 0; i < numKernelLines; i++ )
|
||||
log_error( "%s", kernelProgram[ i ] );
|
||||
printedSource = 1;
|
||||
}
|
||||
log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
|
||||
log_error( "%s\n", log );
|
||||
log_error( "\n----------\n" );
|
||||
free( devices );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* And create a kernel from it */
|
||||
*outKernel = clCreateKernel( *outProgram, kernelName, &error );
|
||||
if( *outKernel == NULL || error != CL_SUCCESS)
|
||||
{
|
||||
print_error( error, "Unable to create kernel" );
|
||||
free( devices );
|
||||
return error;
|
||||
}
|
||||
|
||||
free( devices );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_device_version( cl_device_id id, size_t* major, size_t* minor)
|
||||
{
|
||||
cl_char buffer[ 4098 ];
|
||||
size_t length;
|
||||
|
||||
// Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
|
||||
cl_int error = clGetDeviceInfo( id, CL_DEVICE_VERSION, sizeof( buffer ), buffer, &length );
|
||||
test_error( error, "Unable to get device version string" );
|
||||
|
||||
char *p1 = (char *)buffer + strlen( "OpenCL " );
|
||||
char *p2;
|
||||
while( *p1 == ' ' )
|
||||
p1++;
|
||||
*major = strtol( p1, &p2, 10 );
|
||||
error = *p2 != '.';
|
||||
test_error(error, "ERROR: Version number must contain a decimal point!");
|
||||
*minor = strtol( ++p2, NULL, 10 );
|
||||
return error;
|
||||
}
|
||||
|
||||
int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
|
||||
{
|
||||
cl_device_id *devices;
|
||||
size_t size, maxCommonSize = 0;
|
||||
int numDevices, i, j, error;
|
||||
cl_uint numDims;
|
||||
size_t outSize;
|
||||
size_t sizeLimit[]={1,1,1};
|
||||
|
||||
|
||||
/* Assume fewer than 16 devices will be returned */
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
|
||||
test_error( error, "Unable to obtain list of devices size for context" );
|
||||
devices = (cl_device_id *)malloc(outSize);
|
||||
error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
|
||||
test_error( error, "Unable to obtain list of devices for context" );
|
||||
|
||||
numDevices = (int)( outSize / sizeof( cl_device_id ) );
|
||||
|
||||
for( i = 0; i < numDevices; i++ )
|
||||
{
|
||||
error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
|
||||
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
||||
if( size < maxCommonSize || maxCommonSize == 0)
|
||||
maxCommonSize = size;
|
||||
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
||||
sizeLimit[0] = 1;
|
||||
error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
||||
|
||||
if (outLimits != NULL)
|
||||
{
|
||||
if (i == 0) {
|
||||
for (j=0; j<3; j++)
|
||||
outLimits[j] = sizeLimit[j];
|
||||
} else {
|
||||
for (j=0; j<(int)numDims; j++) {
|
||||
if (sizeLimit[j] < outLimits[j])
|
||||
outLimits[j] = sizeLimit[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(devices);
|
||||
|
||||
*outMaxSize = (unsigned int)maxCommonSize;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t globalThreadSize, size_t *outMaxSize )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
|
||||
/* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
|
||||
the modulo test will succeed and break the loop anyway */
|
||||
for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
int i, j;
|
||||
for (i=0 ; i<2; i++) {
|
||||
if (globalThreadSizes[i] > remainingSize)
|
||||
sizeForThisOne = remainingSize;
|
||||
else
|
||||
sizeForThisOne = globalThreadSizes[i];
|
||||
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
|
||||
outMaxSizes[i] = sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
for (j=0; j<=i; j++)
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
|
||||
size_t *globalThreadSizes, size_t *outMaxSizes )
|
||||
{
|
||||
size_t sizeLimit[3];
|
||||
size_t maxSize;
|
||||
int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
/* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
|
||||
sizes */
|
||||
|
||||
/* Simple case */
|
||||
if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
|
||||
{
|
||||
if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
|
||||
outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
|
||||
outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
|
||||
outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remainingSize, sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
int i, j;
|
||||
for (i=0 ; i<3; i++) {
|
||||
if (globalThreadSizes[i] > remainingSize)
|
||||
sizeForThisOne = remainingSize;
|
||||
else
|
||||
sizeForThisOne = globalThreadSizes[i];
|
||||
for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
|
||||
outMaxSizes[i] = sizeForThisOne;
|
||||
remainingSize = maxSize;
|
||||
for (j=0; j<=i; j++)
|
||||
remainingSize /=outMaxSizes[j];
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
int is_extension_available( cl_device_id device, const char *extensionName )
|
||||
{
|
||||
char *extString;
|
||||
size_t size = 0;
|
||||
int err;
|
||||
int result = 0;
|
||||
|
||||
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, 0, NULL, &size) ))
|
||||
{
|
||||
log_error( "Error: failed to determine size of device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( 0 == size )
|
||||
return 0;
|
||||
|
||||
extString = (char*) malloc( size );
|
||||
if( NULL == extString )
|
||||
{
|
||||
log_error( "Error: unable to allocate %ld byte buffer for extension string at %s:%d (err = %d)\n", size, __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(( err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, size, extString, NULL) ))
|
||||
{
|
||||
log_error( "Error: failed to obtain device extensions string at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
free( extString );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( strstr( extString, extensionName ) )
|
||||
result = 1;
|
||||
|
||||
free( extString );
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Helper to determine if a device supports an image format */
|
||||
int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
|
||||
{
|
||||
cl_image_format *list;
|
||||
cl_uint count = 0;
|
||||
cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
|
||||
if( count == 0 )
|
||||
return 0;
|
||||
|
||||
list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
|
||||
if( NULL == list )
|
||||
{
|
||||
log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__, err );
|
||||
return 0;
|
||||
}
|
||||
|
||||
cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
|
||||
if( error )
|
||||
{
|
||||
log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
|
||||
free( list );
|
||||
return 0;
|
||||
}
|
||||
|
||||
// iterate looking for a match.
|
||||
cl_uint i;
|
||||
for( i = 0; i < count; i++ )
|
||||
{
|
||||
if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
|
||||
fmt->image_channel_order == list[ i ].image_channel_order )
|
||||
break;
|
||||
}
|
||||
|
||||
free( list );
|
||||
return ( i < count ) ? true : false;
|
||||
}
|
||||
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt )
|
||||
{
|
||||
size_t chanCount;
|
||||
switch( fmt->image_channel_order )
|
||||
{
|
||||
case CL_R:
|
||||
case CL_A:
|
||||
case CL_Rx:
|
||||
case CL_INTENSITY:
|
||||
case CL_LUMINANCE:
|
||||
chanCount = 1;
|
||||
break;
|
||||
case CL_RG:
|
||||
case CL_RA:
|
||||
case CL_RGx:
|
||||
chanCount = 2;
|
||||
break;
|
||||
case CL_RGB:
|
||||
case CL_RGBx:
|
||||
chanCount = 3;
|
||||
break;
|
||||
case CL_RGBA:
|
||||
case CL_ARGB:
|
||||
case CL_BGRA:
|
||||
#ifdef CL_1RGB_APPLE
|
||||
case CL_1RGB_APPLE:
|
||||
#endif
|
||||
#ifdef CL_BGR1_APPLE
|
||||
case CL_BGR1_APPLE:
|
||||
#endif
|
||||
chanCount = 4;
|
||||
break;
|
||||
default:
|
||||
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
|
||||
switch( fmt->image_channel_data_type )
|
||||
{
|
||||
case CL_UNORM_SHORT_565:
|
||||
case CL_UNORM_SHORT_555:
|
||||
return 2;
|
||||
|
||||
case CL_UNORM_INT_101010:
|
||||
return 4;
|
||||
|
||||
case CL_SNORM_INT8:
|
||||
case CL_UNORM_INT8:
|
||||
case CL_SIGNED_INT8:
|
||||
case CL_UNSIGNED_INT8:
|
||||
return chanCount;
|
||||
|
||||
case CL_SNORM_INT16:
|
||||
case CL_UNORM_INT16:
|
||||
case CL_HALF_FLOAT:
|
||||
case CL_SIGNED_INT16:
|
||||
case CL_UNSIGNED_INT16:
|
||||
#ifdef CL_SFIXED14_APPLE
|
||||
case CL_SFIXED14_APPLE:
|
||||
#endif
|
||||
return chanCount * 2;
|
||||
|
||||
case CL_SIGNED_INT32:
|
||||
case CL_UNSIGNED_INT32:
|
||||
case CL_FLOAT:
|
||||
return chanCount * 4;
|
||||
|
||||
default:
|
||||
log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int verifyImageSupport( cl_device_id device )
|
||||
{
|
||||
if( checkForImageSupport( device ) )
|
||||
{
|
||||
log_error( "ERROR: Device does not supported images as required by this test!\n" );
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkForImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int checkFor3DImageSupport( cl_device_id device )
|
||||
{
|
||||
cl_uint i;
|
||||
int error;
|
||||
|
||||
/* Check the device props to see if images are supported at all first */
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
|
||||
test_error( error, "Unable to query device for image support" );
|
||||
if( i == 0 )
|
||||
{
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
char profile[128];
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
|
||||
test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
|
||||
if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
|
||||
{
|
||||
size_t width = -1L;
|
||||
size_t height = -1L;
|
||||
size_t depth = -1L;
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
|
||||
|
||||
if( 0 == (height | width | depth ))
|
||||
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* So our support is good */
|
||||
return 0;
|
||||
}
|
||||
|
||||
void * align_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
#error "Please add support OS for aligned malloc"
|
||||
#endif
|
||||
}
|
||||
|
||||
void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
return free(ptr);
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_free(ptr);
|
||||
#else
|
||||
#error "Please add support OS for aligned free"
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t get_min_alignment(cl_context context)
|
||||
{
|
||||
static cl_uint align_size = 0;
|
||||
|
||||
if( 0 == align_size )
|
||||
{
|
||||
cl_device_id * devices;
|
||||
size_t devices_size = 0;
|
||||
cl_uint result = 0;
|
||||
cl_int error;
|
||||
int i;
|
||||
|
||||
error = clGetContextInfo (context,
|
||||
CL_CONTEXT_DEVICES,
|
||||
0,
|
||||
NULL,
|
||||
&devices_size);
|
||||
test_error_ret(error, "clGetContextInfo failed", 0);
|
||||
|
||||
devices = (cl_device_id*)malloc(devices_size);
|
||||
if (devices == NULL) {
|
||||
print_error( error, "malloc failed" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = clGetContextInfo (context,
|
||||
CL_CONTEXT_DEVICES,
|
||||
devices_size,
|
||||
(void*)devices,
|
||||
NULL);
|
||||
test_error_ret(error, "clGetContextInfo failed", 0);
|
||||
|
||||
for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
|
||||
{
|
||||
cl_uint alignment = 0;
|
||||
|
||||
error = clGetDeviceInfo (devices[i],
|
||||
CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
||||
sizeof(cl_uint),
|
||||
(void*)&alignment,
|
||||
NULL);
|
||||
|
||||
if (error == CL_SUCCESS)
|
||||
{
|
||||
alignment >>= 3; // convert bits to bytes
|
||||
result = (alignment > result) ? alignment : result;
|
||||
}
|
||||
else
|
||||
print_error( error, "clGetDeviceInfo failed" );
|
||||
}
|
||||
|
||||
align_size = result;
|
||||
free(devices);
|
||||
}
|
||||
|
||||
return align_size;
|
||||
}
|
||||
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device )
|
||||
{
|
||||
char profileStr[128] = "";
|
||||
cl_device_fp_config single = 0;
|
||||
int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
|
||||
if( error )
|
||||
test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
|
||||
|
||||
if( single & CL_FP_ROUND_TO_NEAREST )
|
||||
return CL_FP_ROUND_TO_NEAREST;
|
||||
|
||||
if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
|
||||
test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
|
||||
|
||||
// Make sure we are an embedded device before allowing a pass
|
||||
if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
|
||||
test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
|
||||
|
||||
if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
|
||||
test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
|
||||
|
||||
return CL_FP_ROUND_TO_ZERO;
|
||||
}
|
||||
|
||||
int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
|
||||
{
|
||||
cl_command_queue_properties realProps;
|
||||
cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_PROPERTIES, sizeof( realProps ), &realProps, NULL );
|
||||
test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
|
||||
|
||||
return ( realProps & prop ) ? 1 : 0;
|
||||
}
|
||||
|
||||
int printDeviceHeader( cl_device_id device )
|
||||
{
|
||||
char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
|
||||
int error;
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_NAME for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
|
||||
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
|
||||
test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
|
||||
|
||||
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
|
||||
deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
|
||||
( error == CL_SUCCESS ) ? cLangVersion : "" );
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
131
test_common/harness/kernelHelpers.h
Normal file
131
test_common/harness/kernelHelpers.h
Normal file
@@ -0,0 +1,131 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _kernelHelpers_h
|
||||
#define _kernelHelpers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
/*
|
||||
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
|
||||
*
|
||||
* const char *source = {
|
||||
* INIT_OPENCL_DEBUG_INFO
|
||||
* "__kernel void foo( int x )\n"
|
||||
* "{\n"
|
||||
* " ...\n"
|
||||
* "}\n"
|
||||
* };
|
||||
*/
|
||||
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
|
||||
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
|
||||
#ifndef STRINGIFY_VALUE
|
||||
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
|
||||
#endif
|
||||
#ifndef STRINGIFY
|
||||
#define STRINGIFY(_x) #_x
|
||||
#endif
|
||||
|
||||
/* Helper that creates a single program and kernel from a single-kernel program source */
|
||||
extern int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
||||
|
||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
||||
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
||||
|
||||
/* Helper to get major/minor number for a device */
|
||||
extern int get_device_version( cl_device_id id, size_t* major, size_t* minor);
|
||||
|
||||
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
|
||||
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
|
||||
|
||||
/* Helper to determine if an extension is supported by a device */
|
||||
extern int is_extension_available( cl_device_id device, const char *extensionName );
|
||||
|
||||
/* Helper to determine if a device supports an image format */
|
||||
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
|
||||
|
||||
/* Helper to get pixel size for a pixel format */
|
||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||
|
||||
/* Verify the given device supports images. 0 means you're good to go, otherwise an error */
|
||||
extern int verifyImageSupport( cl_device_id device );
|
||||
|
||||
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
|
||||
extern int checkForImageSupport( cl_device_id device );
|
||||
extern int checkFor3DImageSupport( cl_device_id device );
|
||||
|
||||
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
|
||||
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
|
||||
|
||||
/* Helper for aligned memory allocation */
|
||||
void * align_malloc(size_t size, size_t alignment);
|
||||
void align_free(void *);
|
||||
|
||||
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
|
||||
size_t get_min_alignment(cl_context context);
|
||||
|
||||
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
|
||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
|
||||
|
||||
#define PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) \
|
||||
if( checkForImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
#define PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device ) \
|
||||
if( checkFor3DImageSupport( device ) ) \
|
||||
{ \
|
||||
log_info( "\n\tNote: device does not support 3D images. Skipping test...\n" ); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
/* Prints out the standard device header for all tests given the device to print for */
|
||||
extern int printDeviceHeader( cl_device_id device );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // _kernelHelpers_h
|
||||
59
test_common/harness/mingw_compat.c
Normal file
59
test_common/harness/mingw_compat.c
Normal file
@@ -0,0 +1,59 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if defined(__MINGW32__)
|
||||
|
||||
#include "mingw_compat.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
//This function is unavailable on various mingw compilers,
|
||||
//especially 64 bit so implementing it here
|
||||
const char *basename_dot=".";
|
||||
char*
|
||||
basename(char *path)
|
||||
{
|
||||
char *p = path, *b = NULL;
|
||||
int len = strlen(path);
|
||||
|
||||
if (path == NULL) {
|
||||
return (char*)basename_dot;
|
||||
}
|
||||
|
||||
// Not absolute path on windows
|
||||
if (path[1] != ':') {
|
||||
return path;
|
||||
}
|
||||
|
||||
// Trim trailing path seperators
|
||||
if (path[len - 1] == '\\' ||
|
||||
path[len - 1] == '/' ) {
|
||||
len--;
|
||||
path[len] = '\0';
|
||||
}
|
||||
|
||||
while (len) {
|
||||
while((*p != '\\' || *p != '/') && len) {
|
||||
p++;
|
||||
len--;
|
||||
}
|
||||
p++;
|
||||
b = p;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
#endif
|
||||
31
test_common/harness/mingw_compat.h
Normal file
31
test_common/harness/mingw_compat.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef MINGW_COMPAT_H
|
||||
#define MINGW_COMPAT_H
|
||||
|
||||
#if defined(__MINGW32__)
|
||||
char *basename(char *path);
|
||||
#include <malloc.h>
|
||||
|
||||
#if defined(__MINGW64__)
|
||||
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
|
||||
#define __mingw_aligned_malloc _aligned_malloc
|
||||
#define __mingw_aligned_free _aligned_free
|
||||
#include <stddef.h>
|
||||
#endif //(__MINGW64__)
|
||||
|
||||
#endif //(__MINGW32__)
|
||||
#endif // MINGW_COMPAT_H
|
||||
749
test_common/harness/msvc9.c
Normal file
749
test_common/harness/msvc9.c
Normal file
@@ -0,0 +1,749 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if defined(_WIN32) && defined (_MSC_VER)
|
||||
|
||||
#include "compat.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#include <assert.h>
|
||||
#include <CL/cl_platform.h>
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// rint, rintf
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
float copysignf( float x, float y )
|
||||
{
|
||||
union{ cl_uint u; float f; }ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
double copysign( double x, double y )
|
||||
{
|
||||
union{ cl_ulong u; double f; }ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
long double copysignl( long double x, long double y )
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
||||
}ux, uy;
|
||||
|
||||
ux.f = x;
|
||||
uy.f = y;
|
||||
|
||||
ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
|
||||
|
||||
return ux.f;
|
||||
}
|
||||
|
||||
float rintf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysignf( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
double rint(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysign( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long double rintl(long double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
|
||||
{
|
||||
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
|
||||
long double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
x = copysignl( rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ilogb, ilogbf, ilogbl
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
#ifndef FP_ILOGB0
|
||||
#define FP_ILOGB0 INT_MIN
|
||||
#endif
|
||||
|
||||
#ifndef FP_ILOGBNAN
|
||||
#define FP_ILOGBNAN INT_MIN
|
||||
#endif
|
||||
|
||||
int ilogb (double x)
|
||||
{
|
||||
union{ double f; cl_ulong u;} u;
|
||||
u.f = x;
|
||||
|
||||
cl_ulong absx = u.u & CL_LONG_MAX;
|
||||
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
|
||||
{
|
||||
switch( absx )
|
||||
{
|
||||
case 0:
|
||||
return FP_ILOGB0;
|
||||
case 0x7ff0000000000000ULL:
|
||||
return INT_MAX;
|
||||
default:
|
||||
if( absx > 0x7ff0000000000000ULL )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
// subnormal
|
||||
u.u = absx | 0x3ff0000000000000ULL;
|
||||
u.f -= 1.0;
|
||||
return (u.u >> 52) - (1023 + 1022);
|
||||
}
|
||||
}
|
||||
|
||||
return (absx >> 52) - 1023;
|
||||
}
|
||||
|
||||
|
||||
int ilogbf (float x)
|
||||
{
|
||||
union{ float f; cl_uint u;} u;
|
||||
u.f = x;
|
||||
|
||||
cl_uint absx = u.u & 0x7fffffff;
|
||||
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
|
||||
{
|
||||
switch( absx )
|
||||
{
|
||||
case 0:
|
||||
return FP_ILOGB0;
|
||||
case 0x7f800000U:
|
||||
return INT_MAX;
|
||||
default:
|
||||
if( absx > 0x7f800000 )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
// subnormal
|
||||
u.u = absx | 0x3f800000U;
|
||||
u.f -= 1.0f;
|
||||
return (u.u >> 23) - (127 + 126);
|
||||
}
|
||||
}
|
||||
|
||||
return (absx >> 23) - 127;
|
||||
}
|
||||
|
||||
int ilogbl (long double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
||||
} u;
|
||||
u.f = x;
|
||||
|
||||
int exp = u.u.sexp & 0x7fff;
|
||||
if( 0 == exp )
|
||||
{
|
||||
if( 0 == u.u.m )
|
||||
return FP_ILOGB0;
|
||||
|
||||
//subnormal
|
||||
u.u.sexp = 0x3fff;
|
||||
u.f -= 1.0f;
|
||||
exp = u.u.sexp & 0x7fff;
|
||||
|
||||
return exp - (0x3fff + 0x3ffe);
|
||||
}
|
||||
else if( 0x7fff == exp )
|
||||
{
|
||||
if( u.u.m & CL_LONG_MAX )
|
||||
return FP_ILOGBNAN;
|
||||
|
||||
return INT_MAX;
|
||||
}
|
||||
|
||||
return exp - 0x3fff;
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// fmax, fmin, fmaxf, fminf
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
static void GET_BITS_SP32(float fx, unsigned int* ux)
|
||||
{
|
||||
volatile union {float f; unsigned int u;} _bitsy;
|
||||
_bitsy.f = (fx);
|
||||
*ux = _bitsy.u;
|
||||
}
|
||||
/* static void GET_BITS_SP32(float fx, unsigned int* ux) */
|
||||
/* { */
|
||||
/* volatile union {float f; unsigned int i;} _bitsy; */
|
||||
/* _bitsy.f = (fx); */
|
||||
/* *ux = _bitsy.i; */
|
||||
/* } */
|
||||
static void PUT_BITS_SP32(unsigned int ux, float* fx)
|
||||
{
|
||||
volatile union {float f; unsigned int u;} _bitsy;
|
||||
_bitsy.u = (ux);
|
||||
*fx = _bitsy.f;
|
||||
}
|
||||
/* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
|
||||
/* { */
|
||||
/* volatile union {float f; unsigned int i;} _bitsy; */
|
||||
/* _bitsy.i = (ux); */
|
||||
/* *fx = _bitsy.f; */
|
||||
/* } */
|
||||
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
|
||||
{
|
||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
||||
_bitsy.d = (dx);
|
||||
*lx = _bitsy.l;
|
||||
}
|
||||
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
|
||||
{
|
||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
||||
_bitsy.l = (lx);
|
||||
*dx = _bitsy.d;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int SIGNBIT_DP64(double x )
|
||||
{
|
||||
int hx;
|
||||
_GET_HIGH_WORD(hx,x);
|
||||
return((hx>>31));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* fmax(x, y) returns the larger (more positive) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
/* This works so long as the compiler knows that (x != x) means
|
||||
that x is NaN; gcc does. */
|
||||
double fmax(double x, double y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x >= y ? x : y;
|
||||
}
|
||||
|
||||
|
||||
/* fmin(x, y) returns the smaller (more negative) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
double fmin(double x, double y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x <= y ? x : y;
|
||||
}
|
||||
|
||||
|
||||
float fmaxf( float x, float y )
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x >= y ? x : y;
|
||||
}
|
||||
|
||||
/* fminf(x, y) returns the smaller (more negative) of x and y.
|
||||
NaNs are treated as missing values: if one argument is NaN,
|
||||
the other argument is returned. If both arguments are NaN,
|
||||
the first argument is returned. */
|
||||
|
||||
float fminf(float x, float y)
|
||||
{
|
||||
if( isnan(y) )
|
||||
return x;
|
||||
|
||||
return x <= y ? x : y;
|
||||
}
|
||||
|
||||
long double scalblnl(long double x, long n)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double d;
|
||||
struct{ cl_ulong m; cl_ushort sexp;}u;
|
||||
}u;
|
||||
u.u.m = CL_LONG_MIN;
|
||||
|
||||
if( x == 0.0L || n < -2200)
|
||||
return copysignl( 0.0L, x );
|
||||
|
||||
if( n > 2200 )
|
||||
return INFINITY;
|
||||
|
||||
if( n < 0 )
|
||||
{
|
||||
u.u.sexp = 0x3fff - 1022;
|
||||
while( n <= -1022 )
|
||||
{
|
||||
x *= u.d;
|
||||
n += 1022;
|
||||
}
|
||||
u.u.sexp = 0x3fff + n;
|
||||
x *= u.d;
|
||||
return x;
|
||||
}
|
||||
|
||||
if( n > 0 )
|
||||
{
|
||||
u.u.sexp = 0x3fff + 1023;
|
||||
while( n >= 1023 )
|
||||
{
|
||||
x *= u.d;
|
||||
n -= 1023;
|
||||
}
|
||||
u.u.sexp = 0x3fff + n;
|
||||
x *= u.d;
|
||||
return x;
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// log2
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
const static cl_double log_e_base2 = 1.4426950408889634074;
|
||||
const static cl_double log_10_base2 = 3.3219280948873623478;
|
||||
|
||||
//double log10(double x);
|
||||
|
||||
double log2(double x)
|
||||
{
|
||||
return 1.44269504088896340735992468100189214 * log(x);
|
||||
}
|
||||
|
||||
long double log2l(long double x)
|
||||
{
|
||||
return 1.44269504088896340735992468100189214L * log(x);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// misc functions
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
// This function is commented out because the Windows implementation should never call munmap.
|
||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||
int munmap(void *addr, size_t len)
|
||||
{
|
||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||
|
||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||
}
|
||||
*/
|
||||
|
||||
uint64_t ReadTime( void )
|
||||
{
|
||||
LARGE_INTEGER current;
|
||||
QueryPerformanceCounter(¤t);
|
||||
return (uint64_t)current.QuadPart;
|
||||
}
|
||||
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||
{
|
||||
static double PerformanceFrequency = 0.0;
|
||||
|
||||
if (PerformanceFrequency == 0.0) {
|
||||
LARGE_INTEGER frequency;
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
PerformanceFrequency = (double) frequency.QuadPart;
|
||||
}
|
||||
|
||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||
}
|
||||
|
||||
float make_nan()
|
||||
{
|
||||
/* This is the IEEE 754 single-precision format:
|
||||
unsigned int mantissa: 22;
|
||||
unsigned int quiet_nan: 1;
|
||||
unsigned int exponent: 8;
|
||||
unsigned int negative: 1;
|
||||
*/
|
||||
//const static unsigned
|
||||
static const int32_t _nan = 0x7fc00000;
|
||||
return *(const float*)(&_nan);
|
||||
}
|
||||
|
||||
float nanf( const char* str)
|
||||
{
|
||||
cl_uint u = atoi( str );
|
||||
u |= 0x7fc00000U;
|
||||
return *( float*)(&u);
|
||||
}
|
||||
|
||||
|
||||
double nan( const char* str)
|
||||
{
|
||||
cl_ulong u = atoi( str );
|
||||
u |= 0x7ff8000000000000ULL;
|
||||
return *( double*)(&u);
|
||||
}
|
||||
|
||||
// double check this implementatation
|
||||
long double nanl( const char* str)
|
||||
{
|
||||
union
|
||||
{
|
||||
long double f;
|
||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
||||
}u;
|
||||
u.u.sexp = 0x7fff;
|
||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
||||
|
||||
return u.f;
|
||||
}
|
||||
|
||||
double trunc(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
||||
{
|
||||
cl_long rounded = x;
|
||||
x = copysign( (double) rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float truncf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
cl_int rounded = x;
|
||||
x = copysignf( (float) rounded, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long lround(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 0.5 )
|
||||
return 0;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||
{
|
||||
absx += 0.5;
|
||||
cl_long rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysign( absx, x );
|
||||
}
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
return (long) x;
|
||||
}
|
||||
|
||||
long lroundf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 0.5f )
|
||||
return 0;
|
||||
|
||||
if( absx < 8388608.0f )
|
||||
{
|
||||
absx += 0.5f;
|
||||
cl_int rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignf( absx, x );
|
||||
}
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
return (long) x;
|
||||
}
|
||||
|
||||
double round(double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( absx < 0.5 )
|
||||
return copysign( 0.0, x);
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||
{
|
||||
absx += 0.5;
|
||||
cl_long rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysign( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
float roundf(float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( absx < 0.5f )
|
||||
return copysignf( 0.0f, x);
|
||||
|
||||
if( absx < 8388608.0f )
|
||||
{
|
||||
absx += 0.5f;
|
||||
cl_int rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignf( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
long double roundl(long double x)
|
||||
{
|
||||
long double absx = fabsl(x);
|
||||
|
||||
if( absx < 0.5L )
|
||||
return copysignl( 0.0L, x);
|
||||
|
||||
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
|
||||
{
|
||||
absx += 0.5L;
|
||||
cl_ulong rounded = absx;
|
||||
absx = rounded;
|
||||
x = copysignl( absx, x );
|
||||
}
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
int signbit(double x)
|
||||
{
|
||||
union
|
||||
{
|
||||
double f;
|
||||
cl_ulong u;
|
||||
}u;
|
||||
u.f = x;
|
||||
return u.u >> 63;
|
||||
}
|
||||
|
||||
int signbitf(float x)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
cl_uint u;
|
||||
}u;
|
||||
u.f = x;
|
||||
return u.u >> 31;
|
||||
}
|
||||
|
||||
float cbrtf( float x )
|
||||
{
|
||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
||||
return copysignf( z, x );
|
||||
}
|
||||
|
||||
double cbrt( double x )
|
||||
{
|
||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
||||
}
|
||||
|
||||
float int2float (int32_t ix)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} u;
|
||||
u.i = ix;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
int32_t float2int (float fx)
|
||||
{
|
||||
union {
|
||||
float f;
|
||||
int32_t i;
|
||||
} u;
|
||||
u.f = fx;
|
||||
return u.i;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER) && !defined(_WIN64)
|
||||
/** Returns the number of leading 0-bits in x,
|
||||
starting at the most significant bit position.
|
||||
If x is 0, the result is undefined.
|
||||
*/
|
||||
int __builtin_clz(unsigned int pattern)
|
||||
{
|
||||
#if 0
|
||||
int res;
|
||||
__asm {
|
||||
mov eax, pattern
|
||||
bsr eax, eax
|
||||
mov res, eax
|
||||
}
|
||||
return 31 - res;
|
||||
#endif
|
||||
unsigned long index;
|
||||
unsigned char res = _BitScanReverse( &index, pattern);
|
||||
if (res) {
|
||||
return 8*sizeof(int) - 1 - index;
|
||||
} else {
|
||||
return 8*sizeof(int);
|
||||
}
|
||||
}
|
||||
#else
|
||||
int __builtin_clz(unsigned int pattern)
|
||||
{
|
||||
int count;
|
||||
if (pattern == 0u) {
|
||||
return 32;
|
||||
}
|
||||
count = 31;
|
||||
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
|
||||
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
|
||||
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
|
||||
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
|
||||
if (pattern >= 1u<<1) { count -= 1; }
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif //defined(_MSC_VER) && !defined(_WIN64)
|
||||
|
||||
#include <intrin.h>
|
||||
#include <emmintrin.h>
|
||||
long int lrint (double x)
|
||||
{
|
||||
double absx = fabs(x);
|
||||
|
||||
if( x >= (double) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
||||
{
|
||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||
double rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
long int lrintf (float x)
|
||||
{
|
||||
float absx = fabsf(x);
|
||||
|
||||
if( x >= (float) LONG_MAX )
|
||||
return LONG_MAX;
|
||||
|
||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||
{
|
||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||
float rounded = x + magic;
|
||||
rounded -= magic;
|
||||
return (long int) rounded;
|
||||
}
|
||||
|
||||
return (long int) x;
|
||||
}
|
||||
|
||||
int usleep(int usec)
|
||||
{
|
||||
Sleep((usec + 999) / 1000);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int fetestexcept(int excepts)
|
||||
{
|
||||
unsigned int status = _statusfp();
|
||||
return excepts & (
|
||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
||||
);
|
||||
}
|
||||
|
||||
int feclearexcept(int excepts)
|
||||
{
|
||||
_clearfp();
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif //defined(_WIN32)
|
||||
274
test_common/harness/mt19937.c
Normal file
274
test_common/harness/mt19937.c
Normal file
@@ -0,0 +1,274 @@
|
||||
/*
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
||||
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
||||
|
||||
Modifications for use in OpenCL by Ian Ollmann, Apple Inc.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "mt19937.h"
|
||||
#include "mingw_compat.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
static void * align_malloc(size_t size, size_t alignment)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
return _aligned_malloc(size, alignment);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
void * ptr = NULL;
|
||||
if (0 == posix_memalign(&ptr, alignment, size))
|
||||
return ptr;
|
||||
return NULL;
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_malloc(size, alignment);
|
||||
#else
|
||||
#error "Please add support OS for aligned malloc"
|
||||
#endif
|
||||
}
|
||||
|
||||
static void align_free(void * ptr)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
_aligned_free(ptr);
|
||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||
return free(ptr);
|
||||
#elif defined(__MINGW32__)
|
||||
return __mingw_aligned_free(ptr);
|
||||
#else
|
||||
#error "Please add support OS for aligned free"
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* Period parameters */
|
||||
#define N 624 /* vector code requires multiple of 4 here */
|
||||
#define M 397
|
||||
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
|
||||
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
|
||||
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
|
||||
|
||||
typedef struct _MTdata
|
||||
{
|
||||
cl_uint mt[N];
|
||||
#ifdef __SSE2__
|
||||
cl_uint cache[N];
|
||||
#endif
|
||||
cl_int mti;
|
||||
}_MTdata;
|
||||
|
||||
/* initializes mt[N] with a seed */
|
||||
MTdata init_genrand(cl_uint s)
|
||||
{
|
||||
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
|
||||
if( NULL != r )
|
||||
{
|
||||
cl_uint *mt = r->mt;
|
||||
int mti = 0;
|
||||
mt[0]= s; // & 0xffffffffUL;
|
||||
for (mti=1; mti<N; mti++) {
|
||||
mt[mti] = (cl_uint)
|
||||
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
|
||||
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
||||
/* In the previous versions, MSBs of the seed affect */
|
||||
/* only MSBs of the array mt[]. */
|
||||
/* 2002/01/09 modified by Makoto Matsumoto */
|
||||
// mt[mti] &= 0xffffffffUL;
|
||||
/* for >32 bit machines */
|
||||
}
|
||||
r->mti = mti;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void free_mtdata( MTdata d )
|
||||
{
|
||||
if(d)
|
||||
align_free(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata d)
|
||||
{
|
||||
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
||||
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
|
||||
#ifdef __SSE2__
|
||||
static volatile int init = 0;
|
||||
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
|
||||
#endif
|
||||
|
||||
|
||||
cl_uint *mt = d->mt;
|
||||
cl_uint y;
|
||||
|
||||
if (d->mti == N)
|
||||
{ /* generate N words at one time */
|
||||
int kk;
|
||||
|
||||
#ifdef __SSE2__
|
||||
if( 0 == init )
|
||||
{
|
||||
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
|
||||
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
|
||||
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
||||
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
|
||||
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
|
||||
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
|
||||
init = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
kk = 0;
|
||||
#ifdef __SSE2__
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-M; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
}
|
||||
#endif
|
||||
for ( ;kk<N-M;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
// advance to next aligned location
|
||||
for (;kk<N-1 && (kk & 3);kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
|
||||
// vector loop
|
||||
for( ; kk + 4 <= N-1; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||
|
||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
|
||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
||||
}
|
||||
#endif
|
||||
|
||||
for (;kk<N-1;kk++) {
|
||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
}
|
||||
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
|
||||
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
||||
|
||||
#ifdef __SSE2__
|
||||
// Do the tempering ahead of time in vector code
|
||||
for( kk = 0; kk + 4 <= N; kk += 4 )
|
||||
{
|
||||
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
|
||||
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
|
||||
}
|
||||
#endif
|
||||
|
||||
d->mti = 0;
|
||||
}
|
||||
#ifdef __SSE2__
|
||||
y = d->cache[d->mti++];
|
||||
#else
|
||||
y = mt[d->mti++];
|
||||
|
||||
/* Tempering */
|
||||
y ^= (y >> 11);
|
||||
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||
y ^= (y >> 18);
|
||||
#endif
|
||||
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
cl_ulong genrand_int64( MTdata d)
|
||||
{
|
||||
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967295.0);
|
||||
/* divided by 2^32-1 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2(MTdata d)
|
||||
{
|
||||
return genrand_int32(d)*(1.0/4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3(MTdata d)
|
||||
{
|
||||
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
|
||||
/* divided by 2^32 */
|
||||
}
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53(MTdata d)
|
||||
{
|
||||
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
|
||||
return(a*67108864.0+b)*(1.0/9007199254740992.0);
|
||||
}
|
||||
99
test_common/harness/mt19937.h
Normal file
99
test_common/harness/mt19937.h
Normal file
@@ -0,0 +1,99 @@
|
||||
|
||||
/*
|
||||
* mt19937.h
|
||||
*
|
||||
* Mersenne Twister.
|
||||
*
|
||||
A C-program for MT19937, with initialization improved 2002/1/26.
|
||||
Coded by Takuji Nishimura and Makoto Matsumoto.
|
||||
|
||||
Before using, initialize the state by using init_genrand(seed)
|
||||
or init_by_array(init_key, key_length).
|
||||
|
||||
Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The names of its contributors may not be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
Any feedback is very welcome.
|
||||
http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
|
||||
email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
|
||||
*/
|
||||
|
||||
#ifndef MT19937_H
|
||||
#define MT19937_H 1
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/cl_platform.h>
|
||||
#else
|
||||
#include <CL/cl_platform.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Interfaces here have been modified from original sources so that they
|
||||
* are safe to call reentrantly, so long as a different MTdata is used
|
||||
* on each thread.
|
||||
*/
|
||||
|
||||
typedef struct _MTdata *MTdata;
|
||||
|
||||
/* Create the random number generator with seed */
|
||||
MTdata init_genrand( cl_uint /*seed*/ );
|
||||
|
||||
/* release memory used by a MTdata private data */
|
||||
void free_mtdata( MTdata /*data*/ );
|
||||
|
||||
/* generates a random number on [0,0xffffffff]-interval */
|
||||
cl_uint genrand_int32( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
|
||||
cl_ulong genrand_int64( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1]-real-interval */
|
||||
double genrand_real1( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1)-real-interval */
|
||||
double genrand_real2( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on (0,1)-real-interval */
|
||||
double genrand_real3( MTdata /*data*/);
|
||||
|
||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||
double genrand_res53( MTdata /*data*/ );
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MT19937_H */
|
||||
49
test_common/harness/ref_counting.h
Normal file
49
test_common/harness/ref_counting.h
Normal file
@@ -0,0 +1,49 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _ref_counting_h
|
||||
#define _ref_counting_h
|
||||
|
||||
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type );
|
||||
|
||||
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
|
||||
cl_uint c##_refCount_new; \
|
||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
|
||||
test_error( error, "Unable to check reference count for " #type ); \
|
||||
if( c##_refCount != c##_refCount_new ) \
|
||||
{ \
|
||||
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
|
||||
|
||||
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
|
||||
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
|
||||
|
||||
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
||||
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
||||
|
||||
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
|
||||
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
|
||||
|
||||
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
|
||||
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
|
||||
|
||||
#endif // _ref_counting_h
|
||||
175
test_common/harness/rounding_mode.c
Normal file
175
test_common/harness/rounding_mode.c
Normal file
@@ -0,0 +1,175 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "rounding_mode.h"
|
||||
|
||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||
const int *p = int_rounds;
|
||||
if( outType == kfloat || outType == kdouble )
|
||||
p = flt_rounds;
|
||||
int oldRound = fegetround();
|
||||
fesetround( p[r] );
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
default:
|
||||
abort(); // ??!
|
||||
}
|
||||
return kDefaultRoundingMode; //never happens
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
int oldRound = fegetround();
|
||||
|
||||
switch( oldRound )
|
||||
{
|
||||
case FE_TONEAREST:
|
||||
return kRoundToNearestEven;
|
||||
case FE_UPWARD:
|
||||
return kRoundUp;
|
||||
case FE_DOWNWARD:
|
||||
return kRoundDown;
|
||||
case FE_TOWARDZERO:
|
||||
return kRoundTowardZero;
|
||||
}
|
||||
|
||||
return kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#else
|
||||
RoundingMode set_round( RoundingMode r, Type outType )
|
||||
{
|
||||
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
||||
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
||||
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
|
||||
unsigned int oldRound;
|
||||
|
||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
||||
if (err) {
|
||||
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
|
||||
return kDefaultRoundingMode; //what else never happens
|
||||
}
|
||||
|
||||
oldRound &= _MCW_RC;
|
||||
|
||||
RoundingMode old =
|
||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
||||
(oldRound == _RC_UP)? kRoundUp :
|
||||
(oldRound == _RC_DOWN)? kRoundDown :
|
||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
||||
kDefaultRoundingMode;
|
||||
|
||||
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
|
||||
return old; //returning old rounding mode
|
||||
}
|
||||
|
||||
RoundingMode get_round( void )
|
||||
{
|
||||
unsigned int oldRound;
|
||||
|
||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
||||
oldRound &= _MCW_RC;
|
||||
return
|
||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
||||
(oldRound == _RC_UP)? kRoundUp :
|
||||
(oldRound == _RC_DOWN)? kRoundDown :
|
||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
||||
kDefaultRoundingMode;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//
|
||||
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
|
||||
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
|
||||
// software by testing against FLT_MIN or DBL_MIN in that file.
|
||||
//
|
||||
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
|
||||
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
|
||||
// operators do (e.g. add, subtract, multiply, divide, etc.)
|
||||
//
|
||||
// Configuring hardware to FTZ mode varies by platform.
|
||||
// CAUTION: Some C implementations may also fail to behave properly in this mode.
|
||||
//
|
||||
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
|
||||
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
|
||||
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
|
||||
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
|
||||
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
|
||||
// in which case, these function are at liberty to do nothing.
|
||||
//
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
|
||||
#include <xmmintrin.h>
|
||||
#elif defined( __PPC__ )
|
||||
#include <fpu_control.h>
|
||||
#endif
|
||||
void *FlushToZero( void )
|
||||
{
|
||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ int i; void *p; }u = { _mm_getcsr() };
|
||||
_mm_setcsr( u.i | 0x8040 );
|
||||
return u.p;
|
||||
#elif defined( __arm__ )
|
||||
// processor is already in FTZ mode -- do nothing
|
||||
return NULL;
|
||||
#elif defined( __PPC__ )
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
flags |= _FPU_MASK_NI;
|
||||
_FPU_SETCW(flags);
|
||||
return NULL;
|
||||
#else
|
||||
#error Unknown arch
|
||||
#endif
|
||||
#else
|
||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||
#endif
|
||||
}
|
||||
|
||||
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
|
||||
void UnFlushToZero( void *p)
|
||||
{
|
||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||
union{ void *p; int i; }u = { p };
|
||||
_mm_setcsr( u.i );
|
||||
#elif defined( __arm__ )
|
||||
// processor is already in FTZ mode -- do nothing
|
||||
#elif defined( __PPC__)
|
||||
fpu_control_t flags = 0;
|
||||
_FPU_GETCW(flags);
|
||||
flags &= ~_FPU_MASK_NI;
|
||||
_FPU_SETCW(flags);
|
||||
#else
|
||||
#error Unknown arch
|
||||
#endif
|
||||
#else
|
||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||
#endif
|
||||
}
|
||||
73
test_common/harness/rounding_mode.h
Normal file
73
test_common/harness/rounding_mode.h
Normal file
@@ -0,0 +1,73 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef __ROUNDING_MODE_H__
|
||||
#define __ROUNDING_MODE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#if (defined(_WIN32) && defined (_MSC_VER))
|
||||
// need for _controlfp_s and rouinding modes in RoundingMode
|
||||
#include <float.h>
|
||||
#include "errorHelpers.h"
|
||||
#include "testHarness.h"
|
||||
#else
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
typedef enum
|
||||
{
|
||||
kDefaultRoundingMode = 0,
|
||||
kRoundToNearestEven,
|
||||
kRoundUp,
|
||||
kRoundDown,
|
||||
kRoundTowardZero,
|
||||
|
||||
kRoundingModeCount
|
||||
}RoundingMode;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
kuchar = 0,
|
||||
kchar = 1,
|
||||
kushort = 2,
|
||||
kshort = 3,
|
||||
kuint = 4,
|
||||
kint = 5,
|
||||
kfloat = 6,
|
||||
kdouble = 7,
|
||||
kulong = 8,
|
||||
klong = 9,
|
||||
|
||||
//This goes last
|
||||
kTypeCount
|
||||
}Type;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern RoundingMode set_round( RoundingMode r, Type outType );
|
||||
extern RoundingMode get_round( void );
|
||||
extern void *FlushToZero( void );
|
||||
extern void UnFlushToZero( void *p);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* __ROUNDING_MODE_H__ */
|
||||
812
test_common/harness/testHarness.c
Normal file
812
test_common/harness/testHarness.c
Normal file
@@ -0,0 +1,812 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testHarness.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "fpcontrol.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#if !defined (__APPLE__)
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "compat.h"
|
||||
|
||||
int gTestsPassed = 0;
|
||||
int gTestsFailed = 0;
|
||||
cl_uint gRandomSeed = 0;
|
||||
cl_uint gReSeed = 0;
|
||||
|
||||
int gFlushDenormsToZero = 0;
|
||||
int gInfNanSupport = 1;
|
||||
int gIsEmbedded = 0;
|
||||
int gIsOpenCL_C_1_0_Device = 0;
|
||||
int gIsOpenCL_1_0_Device = 0;
|
||||
int gHasLong = 1;
|
||||
|
||||
#define DEFAULT_NUM_ELEMENTS 0x4000
|
||||
|
||||
int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps )
|
||||
{
|
||||
return runTestHarnessWithCheck( argc, argv, num_fns, fnList, fnNames, imageSupportRequired, forceNoContextCreation, queueProps,
|
||||
( imageSupportRequired ) ? verifyImageSupport : NULL );
|
||||
}
|
||||
|
||||
int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps,
|
||||
DeviceCheckFn deviceCheckFn )
|
||||
{
|
||||
test_start();
|
||||
|
||||
cl_device_type device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
cl_uint num_platforms = 0;
|
||||
cl_platform_id *platforms;
|
||||
cl_device_id device;
|
||||
int num_elements = DEFAULT_NUM_ELEMENTS;
|
||||
cl_uint num_devices = 0;
|
||||
cl_device_id *devices = NULL;
|
||||
cl_uint choosen_device_index = 0;
|
||||
cl_uint choosen_platform_index = 0;
|
||||
|
||||
int err, ret;
|
||||
char *endPtr;
|
||||
unsigned int i;
|
||||
int based_on_env_var = 0;
|
||||
|
||||
|
||||
/* Check for environment variable to set device type */
|
||||
char *env_mode = getenv( "CL_DEVICE_TYPE" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
based_on_env_var = 1;
|
||||
if( strcmp( env_mode, "gpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( env_mode, "cpu" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( env_mode, "accelerator" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( env_mode, "default" ) == 0 || strcmp( env_mode, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
else
|
||||
{
|
||||
log_error( "Unknown CL_DEVICE_TYPE env variable setting: %s.\nAborting...\n", env_mode );
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
{
|
||||
// report on any unusual library search path indirection
|
||||
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
|
||||
|
||||
// report on any unusual framework search path indirection
|
||||
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
|
||||
}
|
||||
#endif
|
||||
|
||||
env_mode = getenv( "CL_DEVICE_INDEX" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
choosen_device_index = atoi(env_mode);
|
||||
}
|
||||
|
||||
env_mode = getenv( "CL_PLATFORM_INDEX" );
|
||||
if( env_mode != NULL )
|
||||
{
|
||||
choosen_platform_index = atoi(env_mode);
|
||||
}
|
||||
|
||||
/* Process the command line arguments */
|
||||
|
||||
/* Special case: just list the tests */
|
||||
if( ( argc > 1 ) && (!strcmp( argv[ 1 ], "-list" ) || !strcmp( argv[ 1 ], "-h" ) || !strcmp( argv[ 1 ], "--help" )))
|
||||
{
|
||||
log_info( "Usage: %s [<function name>*] [pid<num>] [id<num>] [<device type>]\n", argv[0] );
|
||||
log_info( "\t<function name>\tOne or more of: (wildcard character '*') (default *)\n");
|
||||
log_info( "\tpid<num>\t\tIndicates platform at index <num> should be used (default 0).\n" );
|
||||
log_info( "\tid<num>\t\tIndicates device at index <num> should be used (default 0).\n" );
|
||||
log_info( "\t<device_type>\tcpu|gpu|accelerator|<CL_DEVICE_TYPE_*> (default CL_DEVICE_TYPE_DEFAULT)\n" );
|
||||
|
||||
for( i = 0; i < num_fns - 1; i++ )
|
||||
{
|
||||
log_info( "\t\t%s\n", fnNames[ i ] );
|
||||
}
|
||||
test_finish();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* How are we supposed to seed the random # generators? */
|
||||
if( argc > 1 && strcmp( argv[ argc - 1 ], "randomize" ) == 0 )
|
||||
{
|
||||
log_info(" Initializing random seed based on the clock.\n");
|
||||
gRandomSeed = (unsigned)clock();
|
||||
gReSeed = 1;
|
||||
argc--;
|
||||
}
|
||||
else
|
||||
{
|
||||
log_info(" Initializing random seed to 0.\n");
|
||||
}
|
||||
|
||||
/* Do we have an integer to specify the number of elements to pass to tests? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
ret = (int)strtol( argv[ argc - 1 ], &endPtr, 10 );
|
||||
if( endPtr != argv[ argc - 1 ] && *endPtr == 0 )
|
||||
{
|
||||
/* By spec, this means the entire string was a valid integer, so we treat it as a num_elements spec */
|
||||
/* (hence why we stored the result in ret first) */
|
||||
num_elements = ret;
|
||||
log_info( "Testing with num_elements of %d\n", num_elements );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Do we have a CPU/GPU specification? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_GPU;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_CPU;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
argc--;
|
||||
}
|
||||
else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
{
|
||||
device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Did we choose a specific device index? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'i' && argv[ argc - 1 ][1] == 'd' )
|
||||
{
|
||||
choosen_device_index = atoi( &(argv[ argc - 1 ][2]) );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
/* Did we choose a specific platform index? */
|
||||
if( argc > 1 )
|
||||
{
|
||||
if( strlen( argv[ argc - 1 ] ) >= 3 && argv[ argc - 1 ][0] == 'p' && argv[ argc - 1 ][1] == 'i' && argv[ argc - 1 ][2] == 'd')
|
||||
{
|
||||
choosen_platform_index = atoi( &(argv[ argc - 1 ][3]) );
|
||||
argc--;
|
||||
}
|
||||
}
|
||||
|
||||
switch( device_type )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
|
||||
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
|
||||
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
|
||||
default: log_error( "Requesting unknown device "); return -1;
|
||||
}
|
||||
log_info( based_on_env_var ? "based on environment variable " : "based on command line " );
|
||||
log_info( "for platform index %d and device index %d\n", choosen_platform_index, choosen_device_index);
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
||||
#define kHasSSE3 0x00000008
|
||||
#define kHasSupplementalSSE3 0x00000100
|
||||
#define kHasSSE4_1 0x00000400
|
||||
#define kHasSSE4_2 0x00000800
|
||||
/* check our environment for a hint to disable SSE variants */
|
||||
{
|
||||
const char *env = getenv( "CL_MAX_SSE" );
|
||||
if( env )
|
||||
{
|
||||
extern int _cpu_capabilities;
|
||||
int mask = 0;
|
||||
if( 0 == strcasecmp( env, "SSE4.1" ) )
|
||||
mask = kHasSSE4_2;
|
||||
else if( 0 == strcasecmp( env, "SSSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1;
|
||||
else if( 0 == strcasecmp( env, "SSE3" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3;
|
||||
else if( 0 == strcasecmp( env, "SSE2" ) )
|
||||
mask = kHasSSE4_2 | kHasSSE4_1 | kHasSupplementalSSE3 | kHasSSE3;
|
||||
else
|
||||
{
|
||||
log_error( "Error: Unknown CL_MAX_SSE setting: %s\n", env );
|
||||
return -2;
|
||||
}
|
||||
|
||||
log_info( "*** Environment: CL_MAX_SSE = %s ***\n", env );
|
||||
_cpu_capabilities &= ~mask;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Get the platform */
|
||||
err = clGetPlatformIDs(0, NULL, &num_platforms);
|
||||
if (err) {
|
||||
print_error(err, "clGetPlatformIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
platforms = (cl_platform_id *) malloc( num_platforms * sizeof( cl_platform_id ) );
|
||||
if (!platforms || choosen_platform_index >= num_platforms) {
|
||||
log_error( "platform index out of range -- choosen_platform_index (%d) >= num_platforms (%d)\n", choosen_platform_index, num_platforms );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
if (err) {
|
||||
print_error(err, "clGetPlatformIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Get the number of requested devices */
|
||||
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, 0, NULL, &num_devices );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
devices = (cl_device_id *) malloc( num_devices * sizeof( cl_device_id ) );
|
||||
if (!devices || choosen_device_index >= num_devices) {
|
||||
log_error( "device index out of range -- choosen_device_index (%d) >= num_devices (%d)\n", choosen_device_index, num_devices );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Get the requested device */
|
||||
err = clGetDeviceIDs(platforms[choosen_platform_index], device_type, num_devices, devices, NULL );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceIDs failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
device = devices[choosen_device_index];
|
||||
free(devices);
|
||||
devices = NULL;
|
||||
free(platforms);
|
||||
platforms = NULL;
|
||||
|
||||
if( printDeviceHeader( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_device_fp_config fpconfig = 0;
|
||||
err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( fpconfig ), &fpconfig, NULL );
|
||||
if (err) {
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
gFlushDenormsToZero = ( 0 == (fpconfig & CL_FP_DENORM));
|
||||
log_info( "Supports single precision denormals: %s\n", gFlushDenormsToZero ? "NO" : "YES" );
|
||||
log_info( "sizeof( void*) = %d (host)\n", (int) sizeof( void* ) );
|
||||
|
||||
//detect whether profile of the device is embedded
|
||||
char profile[1024] = "";
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
|
||||
if (err)
|
||||
{
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
|
||||
|
||||
//detect the floating point capabilities
|
||||
cl_device_fp_config floatCapabilities = 0;
|
||||
err = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL);
|
||||
if (err)
|
||||
{
|
||||
print_error(err, "clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed\n");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check for problems that only embedded will have
|
||||
if( gIsEmbedded )
|
||||
{
|
||||
//If the device is embedded, we need to detect if the device supports Infinity and NaN
|
||||
if ((floatCapabilities & CL_FP_INF_NAN) == 0)
|
||||
gInfNanSupport = 0;
|
||||
|
||||
// check the extensions list to see if ulong and long are supported
|
||||
size_t extensionsStringSize = 0;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, 0, NULL, &extensionsStringSize ) ))
|
||||
{
|
||||
print_error( err, "Unable to get extensions string size for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
char *extensions_string = (char*) malloc(extensionsStringSize);
|
||||
if( NULL == extensions_string )
|
||||
{
|
||||
print_error( CL_OUT_OF_HOST_MEMORY, "Unable to allocate storage for extensions string for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_EXTENSIONS, extensionsStringSize, extensions_string, NULL ) ))
|
||||
{
|
||||
print_error( err, "Unable to get extensions string for embedded device" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( extensions_string[extensionsStringSize-1] != '\0' )
|
||||
{
|
||||
log_error( "FAILURE: extensions string for embedded device is not NUL terminated" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( NULL == strstr( extensions_string, "cles_khr_int64" ))
|
||||
gHasLong = 0;
|
||||
|
||||
free(extensions_string);
|
||||
}
|
||||
|
||||
if( getenv( "OPENCL_1_0_DEVICE" ) )
|
||||
{
|
||||
char c_version[1024];
|
||||
gIsOpenCL_1_0_Device = 1;
|
||||
memset( c_version, 0, sizeof( c_version ) );
|
||||
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, NULL )) )
|
||||
{
|
||||
log_error( "FAILURE: unable to get CL_DEVICE_OPENCL_C_VERSION on 1.0 device. (%d)\n", err );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( 0 == strncmp( c_version, "OpenCL C 1.0 ", strlen( "OpenCL C 1.0 " ) ) )
|
||||
{
|
||||
gIsOpenCL_C_1_0_Device = 1;
|
||||
log_info( "Device is a OpenCL C 1.0 device\n" );
|
||||
}
|
||||
else
|
||||
log_info( "Device is a OpenCL 1.0 device, but supports OpenCL C 1.1\n" );
|
||||
}
|
||||
|
||||
cl_uint device_address_bits = 0;
|
||||
if( (err = clGetDeviceInfo( device, CL_DEVICE_ADDRESS_BITS, sizeof( device_address_bits ), &device_address_bits, NULL ) ))
|
||||
{
|
||||
print_error( err, "Unable to obtain device address bits" );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
if( device_address_bits )
|
||||
log_info( "sizeof( void*) = %d (device)\n", device_address_bits/8 );
|
||||
else
|
||||
{
|
||||
log_error("Invalid device address bit size returned by device.\n");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* If we have a device checking function, run it */
|
||||
if( ( deviceCheckFn != NULL ) && deviceCheckFn( device ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (num_elements <= 0)
|
||||
num_elements = DEFAULT_NUM_ELEMENTS;
|
||||
|
||||
// On most platforms which support denorm, default is FTZ off. However,
|
||||
// on some hardware where the reference is computed, default might be flush denorms to zero e.g. arm.
|
||||
// This creates issues in result verification. Since spec allows the implementation to either flush or
|
||||
// not flush denorms to zero, an implementation may choose not be flush i.e. return denorm result whereas
|
||||
// reference result may be zero (flushed denorm). Hence we need to disable denorm flushing on host side
|
||||
// where reference is being computed to make sure we get non-flushed reference result. If implementation
|
||||
// returns flushed result, we correctly take care of that in verification code.
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
FPU_mode_type oldMode;
|
||||
DisableFTZ( &oldMode );
|
||||
#endif
|
||||
|
||||
int error = parseAndCallCommandLineTests( argc, argv, device, num_fns, fnList, fnNames, forceNoContextCreation, queueProps, num_elements );
|
||||
|
||||
#if defined(__APPLE__) && defined(__arm__)
|
||||
// Restore the old FP mode before leaving.
|
||||
RestoreFPState( &oldMode );
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements )
|
||||
{
|
||||
int ret, argIndex;
|
||||
unsigned int i;
|
||||
int fn_to_test = -1; // initialized to test all.
|
||||
// unsigned int threadSize;
|
||||
char partial[512] = { 0 };
|
||||
|
||||
|
||||
/* Now that we have an environment, go through our arguments and run tests that match each argument */
|
||||
if( argc == 1 )
|
||||
{
|
||||
/* No actual arguments, so just run all tests */
|
||||
ret = callTestFunctions( fnList, num_fns - 1, fnNames,
|
||||
device, forceNoContextCreation, num_elements, -1, NULL, queueProps );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Go through each argument and use it to process a list of functions to run */
|
||||
ret = 0;
|
||||
for( argIndex = 1; argIndex < argc; argIndex++ )
|
||||
{
|
||||
/* Are we a partial test? */
|
||||
fn_to_test = -1;
|
||||
if( strchr( argv[argIndex], '*' ) != NULL )
|
||||
{
|
||||
/* Yes, store the partial test for later */
|
||||
strcpy( partial, argv[argIndex] );
|
||||
strchr( partial, '*' )[0] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Nope, loop through looking for an exact name match */
|
||||
for (i=0; i<num_fns; i++)
|
||||
{
|
||||
if (strcmp(argv[argIndex], fnNames[i]) == 0)
|
||||
{
|
||||
fn_to_test = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == num_fns)
|
||||
{
|
||||
log_error("invalid test name: %s \n", argv[argIndex]);
|
||||
ret = 1;
|
||||
continue; /* Keep processing other arguments */
|
||||
}
|
||||
else if( ( fn_to_test == (int)num_fns - 1 ) && ( strcmp( fnNames[i], "all" ) == 0 ) )
|
||||
{
|
||||
fn_to_test = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Execute this particular test loop (remember to remove 1 from the function count for the lack of "all" at the end!) */
|
||||
ret += callTestFunctions( fnList, num_fns - 1, fnNames,
|
||||
device, forceNoContextCreation, num_elements,
|
||||
fn_to_test, partial, queueProps );
|
||||
}
|
||||
}
|
||||
|
||||
if (gTestsFailed == 0) {
|
||||
if (gTestsPassed > 1)
|
||||
log_info("PASSED %d of %d tests.\n", gTestsPassed, gTestsPassed);
|
||||
else if (gTestsPassed > 0)
|
||||
log_info("PASSED test.\n");
|
||||
} else if (gTestsFailed > 0) {
|
||||
if (gTestsFailed+gTestsPassed > 1)
|
||||
log_error("FAILED %d of %d tests.\n", gTestsFailed, gTestsFailed+gTestsPassed);
|
||||
else
|
||||
log_error("FAILED test.\n");
|
||||
}
|
||||
|
||||
test_finish();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// The actual function that loops through tests and executes them
|
||||
int callTestFunctions( basefn functionList[], int numFunctions,
|
||||
const char *functionNames[],
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse,
|
||||
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps )
|
||||
{
|
||||
int numErrors = 0, found = 0, i;
|
||||
|
||||
if( functionIndexToCall >= numFunctions )
|
||||
{
|
||||
log_error( "ERROR: Invalid function index to test!\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (functionIndexToCall == -1)
|
||||
{
|
||||
for (i=0; i<numFunctions; i++)
|
||||
{
|
||||
/* If we're matching partial names, skip any that don't match */
|
||||
if( partialName != NULL && strncmp( functionNames[i], partialName, strlen( partialName ) ) != 0 )
|
||||
continue;
|
||||
|
||||
/* Skip any unimplemented tests */
|
||||
if (functionList[i] == 0)
|
||||
{
|
||||
log_info("%s test currently not implemented\n", functionNames[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
found = 1;
|
||||
numErrors += callSingleTestFunction( functionList[i], functionNames[i], deviceToUse, forceNoContextCreation, numElementsToUse, queueProps );
|
||||
}
|
||||
if( found == 0 && partialName != NULL )
|
||||
{
|
||||
log_error( "ERROR: Wildcard test name does not match any tests: %s\n", partialName );
|
||||
return numErrors + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Run a single test */
|
||||
if (functionList[functionIndexToCall])
|
||||
{
|
||||
numErrors += callSingleTestFunction( functionList[functionIndexToCall], functionNames[functionIndexToCall],
|
||||
deviceToUse, forceNoContextCreation, numElementsToUse, queueProps );
|
||||
}
|
||||
else
|
||||
log_info("%s test currently not implemented\n", functionNames[functionIndexToCall]);
|
||||
}
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
||||
{
|
||||
log_info( "%s\n", errinfo );
|
||||
}
|
||||
|
||||
// Actual function execution
|
||||
int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps )
|
||||
{
|
||||
int numErrors = 0, ret;
|
||||
cl_int error;
|
||||
cl_context context = NULL;
|
||||
cl_command_queue queue = NULL;
|
||||
|
||||
/* Create a context to work with, unless we're told not to */
|
||||
if( !forceNoContextCreation )
|
||||
{
|
||||
context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL, &error );
|
||||
if (!context)
|
||||
{
|
||||
print_error( error, "Unable to create testing context" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
queue = clCreateCommandQueue( context, deviceToUse, queueProps, &error );
|
||||
if( queue == NULL )
|
||||
{
|
||||
print_error( error, "Unable to create testing command queue" );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Run the test and print the result */
|
||||
log_info( "%s...\n", functionName );
|
||||
fflush( stdout );
|
||||
|
||||
ret = functionToCall( deviceToUse, context, queue, numElementsToUse); //test_threaded_function( ptr_basefn_list[i], group, context, num_elements);
|
||||
if( ret == TEST_NOT_IMPLEMENTED )
|
||||
{
|
||||
/* Tests can also let us know they're not implemented yet */
|
||||
log_info("%s test currently not implemented\n\n", functionName);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Print result */
|
||||
if( ret == 0 ) {
|
||||
log_info( "%s passed\n", functionName );
|
||||
gTestsPassed++;
|
||||
}
|
||||
else
|
||||
{
|
||||
numErrors++;
|
||||
log_error( "%s FAILED\n", functionName );
|
||||
gTestsFailed++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Release the context */
|
||||
if( !forceNoContextCreation )
|
||||
{
|
||||
int error = clFinish(queue);
|
||||
if (error) {
|
||||
log_error("clFinish failed: %d", error);
|
||||
numErrors++;
|
||||
}
|
||||
clReleaseCommandQueue( queue );
|
||||
clReleaseContext( context );
|
||||
}
|
||||
|
||||
return numErrors;
|
||||
}
|
||||
|
||||
void checkDeviceTypeOverride( cl_device_type *inOutType )
|
||||
{
|
||||
/* Check if we are forced to CPU mode */
|
||||
char *force_cpu = getenv( "CL_DEVICE_TYPE" );
|
||||
if( force_cpu != NULL )
|
||||
{
|
||||
if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
*inOutType = CL_DEVICE_TYPE_DEFAULT;
|
||||
}
|
||||
|
||||
switch( *inOutType )
|
||||
{
|
||||
case CL_DEVICE_TYPE_GPU: log_info( "Requesting GPU device " ); break;
|
||||
case CL_DEVICE_TYPE_CPU: log_info( "Requesting CPU device " ); break;
|
||||
case CL_DEVICE_TYPE_ACCELERATOR: log_info( "Requesting Accelerator device " ); break;
|
||||
case CL_DEVICE_TYPE_DEFAULT: log_info( "Requesting Default device " ); break;
|
||||
default: break;
|
||||
}
|
||||
log_info( force_cpu != NULL ? "based on environment variable\n" : "based on command line\n" );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
{
|
||||
// report on any unusual library search path indirection
|
||||
char *libSearchPath = getenv( "DYLD_LIBRARY_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_LIBRARY_PATH = \"%s\"\n", libSearchPath );
|
||||
|
||||
// report on any unusual framework search path indirection
|
||||
char *frameworkSearchPath = getenv( "DYLD_FRAMEWORK_PATH");
|
||||
if( libSearchPath )
|
||||
log_info( "*** DYLD_FRAMEWORK_PATH = \"%s\"\n", frameworkSearchPath );
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes )
|
||||
{
|
||||
uint32_t pat = ((uint32_t*) src_pattern)[0];
|
||||
size_t count = bytes / 4;
|
||||
size_t i;
|
||||
uint32_t *d = (uint32_t*)dest;
|
||||
|
||||
for( i = 0; i < count; i++ )
|
||||
d[i] = pat;
|
||||
|
||||
d += i;
|
||||
|
||||
bytes &= 3;
|
||||
if( bytes )
|
||||
memcpy( d, src_pattern, bytes );
|
||||
}
|
||||
#endif
|
||||
|
||||
extern cl_device_type GetDeviceType( cl_device_id d )
|
||||
{
|
||||
cl_device_type result = -1;
|
||||
cl_int err = clGetDeviceInfo( d, CL_DEVICE_TYPE, sizeof( result ), &result, NULL );
|
||||
if( CL_SUCCESS != err )
|
||||
log_error( "ERROR: Unable to get device type for device %p\n", d );
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
cl_device_id GetOpposingDevice( cl_device_id device )
|
||||
{
|
||||
cl_int error;
|
||||
cl_device_id *otherDevices;
|
||||
cl_uint actualCount;
|
||||
cl_platform_id plat;
|
||||
|
||||
// Get the platform of the device to use for getting a list of devices
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_PLATFORM, sizeof( plat ), &plat, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get device's platform" );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Get a list of all devices
|
||||
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, 0, NULL, &actualCount );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get list of devices size" );
|
||||
return NULL;
|
||||
}
|
||||
otherDevices = (cl_device_id *)malloc(actualCount*sizeof(cl_device_id));
|
||||
error = clGetDeviceIDs( plat, CL_DEVICE_TYPE_ALL, actualCount, otherDevices, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get list of devices" );
|
||||
free(otherDevices);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if( actualCount == 1 )
|
||||
{
|
||||
free(otherDevices);
|
||||
return device; // NULL means error, returning self means we couldn't find another one
|
||||
}
|
||||
|
||||
// Loop and just find one that isn't the one we were given
|
||||
cl_uint i;
|
||||
for( i = 0; i < actualCount; i++ )
|
||||
{
|
||||
if( otherDevices[ i ] != device )
|
||||
{
|
||||
cl_device_type newType;
|
||||
error = clGetDeviceInfo( otherDevices[ i ], CL_DEVICE_TYPE, sizeof( newType ), &newType, NULL );
|
||||
if( error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to get device type for other device" );
|
||||
free(otherDevices);
|
||||
return NULL;
|
||||
}
|
||||
cl_device_id result = otherDevices[ i ];
|
||||
free(otherDevices);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Should never get here
|
||||
free(otherDevices);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
104
test_common/harness/testHarness.h
Normal file
104
test_common/harness/testHarness.h
Normal file
@@ -0,0 +1,104 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testHarness_h
|
||||
#define _testHarness_h
|
||||
|
||||
#include "threadTesting.h"
|
||||
#include "clImageHelper.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern cl_uint gReSeed;
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
|
||||
// setup work, and then call each function in turn as dictatated by the passed arguments.
|
||||
extern int runTestHarness( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
|
||||
|
||||
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than CL_SUCCESS (0), the harness exits.
|
||||
typedef int (*DeviceCheckFn)( cl_device_id device );
|
||||
|
||||
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
|
||||
extern int runTestHarnessWithCheck( int argc, const char *argv[], unsigned int num_fns,
|
||||
basefn fnList[], const char *fnNames[],
|
||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps, DeviceCheckFn deviceCheckFn );
|
||||
|
||||
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
|
||||
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, unsigned int num_fns,
|
||||
basefn *fnList, const char *fnNames[],
|
||||
int forceNoContextCreation, cl_command_queue_properties queueProps, int num_elements );
|
||||
|
||||
// Call this function if you need to do all the setup work yourself, and just need the function list called/
|
||||
// managed.
|
||||
// functionIndexToCall can be a valid index into the function list, or -1 to run all of them.
|
||||
// partialName can be a string to partially match function names against and only execute functions who
|
||||
// match, or NULL to not restrict execution (ignored if functionIndexToCall is not -1)
|
||||
// functionList is the actual array of functions
|
||||
// numFunctions is the number of functions in the list (which should NOT have NULL at the end for "all")
|
||||
// functionNames is an array of strings representing the name of each function, to be used in partial matching
|
||||
// contextProps are used to create a testing context for each test
|
||||
// deviceToUse, deviceGroupToUse and numElementsToUse are all just passed to each test function
|
||||
|
||||
extern int callTestFunctions( basefn functionList[], int numFunctions,
|
||||
const char *functionNames[],
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse,
|
||||
int functionIndexToCall, const char *partialName, cl_command_queue_properties queueProps );
|
||||
|
||||
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
|
||||
extern int callSingleTestFunction( basefn functionToCall, const char *functionName,
|
||||
cl_device_id deviceToUse, int forceNoContextCreation,
|
||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
||||
|
||||
///// Miscellaneous steps
|
||||
|
||||
// Given a pre-existing device type choice, check the environment for an override, then print what
|
||||
// choice was made and how (and return the overridden choice, if there is one)
|
||||
extern void checkDeviceTypeOverride( cl_device_type *inOutType );
|
||||
|
||||
// standard callback function for context pfn_notify
|
||||
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
|
||||
|
||||
extern cl_device_type GetDeviceType( cl_device_id );
|
||||
|
||||
// Given a device (most likely passed in by the harness, but not required), will attempt to find
|
||||
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
|
||||
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
|
||||
// is the only device available, the SAME device is returned, so check!
|
||||
extern cl_device_id GetOpposingDevice( cl_device_id device );
|
||||
|
||||
|
||||
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
|
||||
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
|
||||
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
||||
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
|
||||
extern int gIsOpenCL_C_1_0_Device; // This is set to 1 if the device supports only OpenCL C 1.0.
|
||||
|
||||
#if ! defined( __APPLE__ )
|
||||
void memset_pattern4(void *, const void *, size_t);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _testHarness_h
|
||||
|
||||
|
||||
51
test_common/harness/test_mt19937.c
Normal file
51
test_common/harness/test_mt19937.c
Normal file
@@ -0,0 +1,51 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "mt19937.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main( void )
|
||||
{
|
||||
MTdata d = init_genrand(42);
|
||||
int i;
|
||||
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
|
||||
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
|
||||
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
|
||||
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
|
||||
int errcount = 0;
|
||||
|
||||
for( i = 0; i < 65536; i++ )
|
||||
{
|
||||
cl_uint u = genrand_int32( d );
|
||||
if( 0 == (i & 4095) )
|
||||
{
|
||||
if( u != reference[i>>12] )
|
||||
{
|
||||
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
|
||||
errcount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free_mtdata(d);
|
||||
|
||||
if( errcount )
|
||||
printf("mt19937 test failed.\n");
|
||||
else
|
||||
printf("mt19937 test passed.\n");
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
106
test_common/harness/threadTesting.c
Normal file
106
test_common/harness/threadTesting.c
Normal file
@@ -0,0 +1,106 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "threadTesting.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
#if 0 // Disabed for now
|
||||
|
||||
typedef struct
|
||||
{
|
||||
basefn mFunction;
|
||||
cl_device_id mDevice;
|
||||
cl_context mContext;
|
||||
int mNumElements;
|
||||
} TestFnArgs;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Thread-based testing. Spawns a new thread to run the given test function,
|
||||
// then waits for it to complete. The entire idea is that, if the thread crashes,
|
||||
// we can catch it and report it as a failure instead of crashing the entire suite
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void *test_thread_wrapper( void *data )
|
||||
{
|
||||
TestFnArgs *args;
|
||||
int retVal;
|
||||
cl_context context;
|
||||
|
||||
args = (TestFnArgs *)data;
|
||||
|
||||
/* Create a new context to use (contexts can't cross threads) */
|
||||
context = clCreateContext(NULL, args->mDeviceGroup);
|
||||
if( context == NULL )
|
||||
{
|
||||
log_error("clCreateContext failed for new thread\n");
|
||||
return (void *)(-1);
|
||||
}
|
||||
|
||||
/* Call function */
|
||||
retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
|
||||
|
||||
clReleaseContext( context );
|
||||
|
||||
return (void *)retVal;
|
||||
}
|
||||
|
||||
int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
|
||||
{
|
||||
int error;
|
||||
pthread_t threadHdl;
|
||||
void *retVal;
|
||||
TestFnArgs args;
|
||||
|
||||
|
||||
args.mFunction = fnToTest;
|
||||
args.mDeviceGroup = deviceGroup;
|
||||
args.mDevice = device;
|
||||
args.mContext = context;
|
||||
args.mNumElements = numElements;
|
||||
|
||||
|
||||
error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to create thread for testing!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Thread has been started, now just wait for it to complete (or crash) */
|
||||
error = pthread_join( threadHdl, &retVal );
|
||||
if( error != 0 )
|
||||
{
|
||||
log_error( "ERROR: Unable to join testing thread!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return (int)((intptr_t)retVal);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
32
test_common/harness/threadTesting.h
Normal file
32
test_common/harness/threadTesting.h
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _threadTesting_h
|
||||
#define _threadTesting_h
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
|
||||
#define TEST_NOT_IMPLEMENTED -99
|
||||
|
||||
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
||||
|
||||
#endif // _threadTesting_h
|
||||
|
||||
|
||||
481
test_common/harness/typeWrappers.cpp
Normal file
481
test_common/harness/typeWrappers.cpp
Normal file
@@ -0,0 +1,481 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "typeWrappers.h"
|
||||
#include "kernelHelpers.h"
|
||||
#include "errorHelpers.h"
|
||||
#include <stdlib.h>
|
||||
#include "clImageHelper.h"
|
||||
|
||||
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#define kPageSize 4096
|
||||
#include <sys/mman.h>
|
||||
#include <stdlib.h>
|
||||
#elif defined(__linux__)
|
||||
#include <unistd.h>
|
||||
#define kPageSize (getpagesize())
|
||||
#endif
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
for( row = 0; row < height; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
|
||||
{
|
||||
cl_int error;
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
for( row = 0; row < height*depth; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
||||
|
||||
#endif
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
|
||||
{
|
||||
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
|
||||
if( errcode_ret != NULL )
|
||||
*errcode_ret = err;
|
||||
}
|
||||
|
||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
|
||||
{
|
||||
cl_int error;
|
||||
#if defined( __APPLE__ )
|
||||
int protect_pages = 1;
|
||||
cl_device_id devices[16];
|
||||
size_t number_of_devices;
|
||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||
|
||||
number_of_devices /= sizeof(cl_device_id);
|
||||
for (int i=0; i<(int)number_of_devices; i++) {
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||
if (type == CL_DEVICE_TYPE_GPU) {
|
||||
protect_pages = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (protect_pages) {
|
||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
||||
size_t rowStride = rowBytes + kPageSize;
|
||||
|
||||
// create backing store
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
backingStoreSize = rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
backingStoreSize = arraySize * rowStride + 8 * rowStride;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
|
||||
break;
|
||||
}
|
||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
// add guard pages
|
||||
size_t row;
|
||||
char *p = (char*) backingStore;
|
||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p += rowBytes;
|
||||
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
|
||||
for( row = 0; row < sz; row++ )
|
||||
{
|
||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
p -= rowBytes;
|
||||
for( row = 0; row < 4; row++ )
|
||||
{
|
||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
||||
}
|
||||
|
||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
||||
{
|
||||
static int spewEnv = 1;
|
||||
if(spewEnv)
|
||||
{
|
||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
||||
spewEnv = 0;
|
||||
}
|
||||
imagePtr += rowBytes - pixelBytes * width;
|
||||
}
|
||||
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
backingStore = NULL;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
|
||||
backingStore = NULL;
|
||||
switch (imageType)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******
|
||||
* clProtectedArray implementation
|
||||
*******/
|
||||
clProtectedArray::clProtectedArray()
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
}
|
||||
|
||||
clProtectedArray::clProtectedArray( size_t sizeInBytes )
|
||||
{
|
||||
mBuffer = mValidBuffer = NULL;
|
||||
Allocate( sizeInBytes );
|
||||
}
|
||||
|
||||
clProtectedArray::~clProtectedArray()
|
||||
{
|
||||
if( mBuffer != NULL ) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap( mBuffer, mRealSize );
|
||||
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
||||
#else
|
||||
free( mBuffer );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void clProtectedArray::Allocate( size_t sizeInBytes )
|
||||
{
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
|
||||
// Allocate enough space to: round up our actual allocation to an even number of pages
|
||||
// and allocate two pages on either side
|
||||
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
|
||||
mRealSize = mRoundedSize + kPageSize * 2;
|
||||
|
||||
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
|
||||
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||
|
||||
mValidBuffer = mBuffer + kPageSize;
|
||||
|
||||
// Protect guard area from access
|
||||
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
|
||||
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
|
||||
#else
|
||||
mRoundedSize = mRealSize = sizeInBytes;
|
||||
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
333
test_common/harness/typeWrappers.h
Normal file
333
test_common/harness/typeWrappers.h
Normal file
@@ -0,0 +1,333 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _typeWrappers_h
|
||||
#define _typeWrappers_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "compat.h"
|
||||
#include <stdio.h>
|
||||
#include "mt19937.h"
|
||||
#include "errorHelpers.h"
|
||||
#include "kernelHelpers.h"
|
||||
|
||||
extern "C" cl_uint gReSeed;
|
||||
extern "C" cl_uint gRandomSeed;
|
||||
|
||||
/* cl_context wrapper */
|
||||
|
||||
class clContextWrapper
|
||||
{
|
||||
public:
|
||||
clContextWrapper() { mContext = NULL; }
|
||||
clContextWrapper( cl_context program ) { mContext = program; }
|
||||
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
|
||||
|
||||
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
|
||||
operator cl_context() { return mContext; }
|
||||
|
||||
cl_context * operator&() { return &mContext; }
|
||||
|
||||
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_context mContext;
|
||||
};
|
||||
|
||||
/* cl_program wrapper */
|
||||
|
||||
class clProgramWrapper
|
||||
{
|
||||
public:
|
||||
clProgramWrapper() { mProgram = NULL; }
|
||||
clProgramWrapper( cl_program program ) { mProgram = program; }
|
||||
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
|
||||
|
||||
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
|
||||
operator cl_program() { return mProgram; }
|
||||
|
||||
cl_program * operator&() { return &mProgram; }
|
||||
|
||||
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_program mProgram;
|
||||
};
|
||||
|
||||
/* cl_kernel wrapper */
|
||||
|
||||
class clKernelWrapper
|
||||
{
|
||||
public:
|
||||
clKernelWrapper() { mKernel = NULL; }
|
||||
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
|
||||
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
|
||||
|
||||
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
|
||||
operator cl_kernel() { return mKernel; }
|
||||
|
||||
cl_kernel * operator&() { return &mKernel; }
|
||||
|
||||
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_kernel mKernel;
|
||||
};
|
||||
|
||||
/* cl_mem (stream) wrapper */
|
||||
|
||||
class clMemWrapper
|
||||
{
|
||||
public:
|
||||
clMemWrapper() { mMem = NULL; }
|
||||
clMemWrapper( cl_mem mem ) { mMem = mem; }
|
||||
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
|
||||
|
||||
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_mem() { return mMem; }
|
||||
|
||||
cl_mem * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_mem mMem;
|
||||
};
|
||||
|
||||
class clProtectedImage
|
||||
{
|
||||
public:
|
||||
clProtectedImage() { image = NULL; backingStore = NULL; }
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
|
||||
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
|
||||
~clProtectedImage()
|
||||
{
|
||||
if( image != NULL )
|
||||
clReleaseMemObject( image );
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
if(backingStore)
|
||||
munmap(backingStore, backingStoreSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
|
||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
|
||||
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
|
||||
|
||||
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
|
||||
operator cl_mem() { return image; }
|
||||
|
||||
cl_mem * operator&() { return ℑ }
|
||||
|
||||
bool operator==( const cl_mem &rhs ) { return image == rhs; }
|
||||
|
||||
protected:
|
||||
void *backingStore;
|
||||
size_t backingStoreSize;
|
||||
cl_mem image;
|
||||
};
|
||||
|
||||
/* cl_command_queue wrapper */
|
||||
|
||||
class clCommandQueueWrapper
|
||||
{
|
||||
public:
|
||||
clCommandQueueWrapper() { mMem = NULL; }
|
||||
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
|
||||
~clCommandQueueWrapper() { if( mMem != NULL ) {int error = clFinish(mMem); if (error) print_error(error, "clFinish failed"); clReleaseCommandQueue( mMem );} }
|
||||
|
||||
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_command_queue() { return mMem; }
|
||||
|
||||
cl_command_queue * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_command_queue mMem;
|
||||
};
|
||||
|
||||
/* cl_sampler wrapper */
|
||||
class clSamplerWrapper
|
||||
{
|
||||
public:
|
||||
clSamplerWrapper() { mMem = NULL; }
|
||||
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
|
||||
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
|
||||
|
||||
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_sampler() { return mMem; }
|
||||
|
||||
cl_sampler * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_sampler mMem;
|
||||
};
|
||||
|
||||
/* cl_event wrapper */
|
||||
class clEventWrapper
|
||||
{
|
||||
public:
|
||||
clEventWrapper() { mMem = NULL; }
|
||||
clEventWrapper( cl_event mem ) { mMem = mem; }
|
||||
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
|
||||
|
||||
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
|
||||
operator cl_event() { return mMem; }
|
||||
|
||||
cl_event * operator&() { return &mMem; }
|
||||
|
||||
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
|
||||
|
||||
protected:
|
||||
|
||||
cl_event mMem;
|
||||
};
|
||||
|
||||
/* Generic protected memory buffer, for verifying access within bounds */
|
||||
class clProtectedArray
|
||||
{
|
||||
public:
|
||||
clProtectedArray();
|
||||
clProtectedArray( size_t sizeInBytes );
|
||||
virtual ~clProtectedArray();
|
||||
|
||||
void Allocate( size_t sizeInBytes );
|
||||
|
||||
operator void *() { return (void *)mValidBuffer; }
|
||||
operator const void *() const { return (const void *)mValidBuffer; }
|
||||
|
||||
protected:
|
||||
|
||||
char * mBuffer;
|
||||
char * mValidBuffer;
|
||||
size_t mRealSize, mRoundedSize;
|
||||
};
|
||||
|
||||
class RandomSeed
|
||||
{
|
||||
public:
|
||||
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
|
||||
~RandomSeed()
|
||||
{
|
||||
if( gReSeed )
|
||||
gRandomSeed = genrand_int32( mtData );
|
||||
free_mtdata(mtData);
|
||||
}
|
||||
|
||||
operator MTdata () {return mtData;}
|
||||
|
||||
protected:
|
||||
MTdata mtData;
|
||||
};
|
||||
|
||||
template <typename T> class BufferOwningPtr
|
||||
{
|
||||
BufferOwningPtr(BufferOwningPtr const &); // do not implement
|
||||
void operator=(BufferOwningPtr const &); // do not implement
|
||||
|
||||
void *ptr;
|
||||
void *map;
|
||||
size_t mapsize; // Bytes allocated total, pointed to by map.
|
||||
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
|
||||
bool aligned;
|
||||
public:
|
||||
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
|
||||
explicit BufferOwningPtr(void *p, void *m, size_t s)
|
||||
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
|
||||
{
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
~BufferOwningPtr() {
|
||||
if (map) {
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap(map, mapsize);
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
|
||||
if (map){
|
||||
#if defined( __APPLE__ )
|
||||
int error = munmap(map, mapsize);
|
||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||
#else
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
|
||||
abort();
|
||||
#endif
|
||||
} else {
|
||||
if ( aligned )
|
||||
{
|
||||
align_free(ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
ptr = p;
|
||||
map = m;
|
||||
mapsize = mapsize_;
|
||||
allocsize = allocsize_;
|
||||
aligned = aligned_;
|
||||
#if ! defined( __APPLE__ )
|
||||
if(m)
|
||||
{
|
||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
||||
abort();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
operator T*() { return (T*)ptr; }
|
||||
|
||||
size_t getSize() const { return allocsize; };
|
||||
};
|
||||
|
||||
#endif // _typeWrappers_h
|
||||
|
||||
|
||||
27
test_conformance/CMakeLists.txt
Normal file
27
test_conformance/CMakeLists.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
add_subdirectory(allocations)
|
||||
add_subdirectory(api)
|
||||
add_subdirectory(atomics)
|
||||
add_subdirectory(basic)
|
||||
add_subdirectory(buffers)
|
||||
add_subdirectory(commonfns)
|
||||
add_subdirectory(compiler)
|
||||
add_subdirectory(computeinfo)
|
||||
add_subdirectory(gl)
|
||||
add_subdirectory(conversions)
|
||||
add_subdirectory(contractions)
|
||||
add_subdirectory(events)
|
||||
add_subdirectory(geometrics)
|
||||
add_subdirectory(half)
|
||||
add_subdirectory(headers)
|
||||
add_subdirectory(headers_c99)
|
||||
add_subdirectory(images)
|
||||
add_subdirectory(implicit_convert)
|
||||
add_subdirectory(integer_ops)
|
||||
add_subdirectory(math_brute_force)
|
||||
add_subdirectory(multiple_device_context)
|
||||
add_subdirectory(profiling)
|
||||
add_subdirectory(relationals)
|
||||
add_subdirectory(select)
|
||||
add_subdirectory(thread_dimensions)
|
||||
add_subdirectory(vec_align)
|
||||
add_subdirectory(vec_step)
|
||||
24
test_conformance/Jamfile
Normal file
24
test_conformance/Jamfile
Normal file
@@ -0,0 +1,24 @@
|
||||
project
|
||||
: requirements
|
||||
<library>/harness//harness
|
||||
<warnings>off
|
||||
;
|
||||
|
||||
use-project /harness : ../test_common/harness ;
|
||||
|
||||
proj_lst = allocations api atomics basic buffers commonfns compiler
|
||||
computeinfo contractions conversions events geometrics gl
|
||||
half images integer_ops math_brute_force multiple_device_context
|
||||
profiling relationals select thread_dimensions ;
|
||||
|
||||
for proj in $(proj_lst)
|
||||
{
|
||||
build-project $(proj) ;
|
||||
}
|
||||
|
||||
install data
|
||||
: [ glob *.csv ] [ glob *.py ]
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance
|
||||
;
|
||||
|
||||
53
test_conformance/Makefile
Normal file
53
test_conformance/Makefile
Normal file
@@ -0,0 +1,53 @@
|
||||
|
||||
PRODUCTS = \
|
||||
allocations/ \
|
||||
api/ \
|
||||
atomics/ \
|
||||
basic/ \
|
||||
buffers/ \
|
||||
commonfns/ \
|
||||
compiler/ \
|
||||
computeinfo/ \
|
||||
contractions/ \
|
||||
conversions/ \
|
||||
device_partition/ \
|
||||
events/ \
|
||||
geometrics/ \
|
||||
gl/ \
|
||||
half/ \
|
||||
headers/ \
|
||||
images/ \
|
||||
integer_ops/ \
|
||||
math_brute_force/ \
|
||||
mem_host_flags/ \
|
||||
multiple_device_context/ \
|
||||
printf/ \
|
||||
profiling/ \
|
||||
relationals/ \
|
||||
select/ \
|
||||
thread_dimensions/ \
|
||||
vec_align/ \
|
||||
vec_step/
|
||||
|
||||
|
||||
TOP=$(shell pwd)
|
||||
|
||||
all: $(PRODUCTS)
|
||||
|
||||
clean:
|
||||
@for testdir in $(dir $(PRODUCTS)) ; \
|
||||
do ( \
|
||||
echo "==================================================================================" ; \
|
||||
echo "Cleaning $$testdir" ; \
|
||||
echo "==================================================================================" ; \
|
||||
cd $$testdir && make clean \
|
||||
); \
|
||||
done \
|
||||
|
||||
$(PRODUCTS):
|
||||
@echo "==================================================================================" ;
|
||||
@echo "(`date "+%H:%M:%S"`) Make $@" ;
|
||||
@echo "==================================================================================" ;
|
||||
cd $(dir $@) && make -i
|
||||
|
||||
.PHONY: clean $(PRODUCTS) all
|
||||
30
test_conformance/allocations/CMakeLists.txt
Normal file
30
test_conformance/allocations/CMakeLists.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
add_executable(conformance_test_allocations
|
||||
main.cpp
|
||||
allocation_execute.cpp
|
||||
allocation_fill.cpp
|
||||
allocation_functions.cpp
|
||||
allocation_utils.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c)
|
||||
|
||||
set_source_files_properties(
|
||||
main.cpp
|
||||
allocation_execute.cpp
|
||||
allocation_fill.cpp
|
||||
allocation_functions.cpp
|
||||
allocation_utils.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
|
||||
TARGET_LINK_LIBRARIES(conformance_test_allocations
|
||||
${CLConform_LIBRARIES})
|
||||
19
test_conformance/allocations/Jamfile
Normal file
19
test_conformance/allocations/Jamfile
Normal file
@@ -0,0 +1,19 @@
|
||||
project
|
||||
: requirements
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
# <toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_allocations
|
||||
: allocation_execute.cpp
|
||||
allocation_fill.cpp
|
||||
allocation_functions.cpp
|
||||
allocation_utils.cpp
|
||||
main.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_allocations
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/allocations
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/allocations
|
||||
;
|
||||
46
test_conformance/allocations/Makefile
Normal file
46
test_conformance/allocations/Makefile
Normal file
@@ -0,0 +1,46 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.cpp \
|
||||
allocation_functions.cpp \
|
||||
allocation_fill.cpp \
|
||||
allocation_utils.cpp \
|
||||
allocation_execute.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_allocations
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32 -Os
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
333
test_conformance/allocations/allocation_execute.cpp
Normal file
333
test_conformance/allocations/allocation_execute.cpp
Normal file
@@ -0,0 +1,333 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_execute.h"
|
||||
#include "allocation_functions.h"
|
||||
|
||||
|
||||
const char *buffer_kernel_pattern = {
|
||||
"__kernel void sample_test(%s __global uint *result, __global uint *array_sizes, uint per_item)\n"
|
||||
"{\n"
|
||||
"\tint tid = get_global_id(0);\n"
|
||||
"\tuint r = 0;\n"
|
||||
"\tulong i;\n"
|
||||
"\tfor(i=tid*per_item; i<(1+tid)*per_item; i++) {\n"
|
||||
"%s"
|
||||
"\t}\n"
|
||||
"\tresult[tid] = r;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *image_kernel_pattern = {
|
||||
"__kernel void sample_test(%s __global uint *result)\n"
|
||||
"{\n"
|
||||
"\tuint4 color;\n"
|
||||
"\tcolor = (uint4)(0);\n"
|
||||
"%s"
|
||||
"\tint x, y;\n"
|
||||
"%s"
|
||||
"\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
|
||||
"}\n" };
|
||||
|
||||
const char *read_pattern = {
|
||||
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
|
||||
"\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
|
||||
"\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
|
||||
"\t\t\t\tcolor += read_imageui(image%d, sampler, (int2)(x,y));\n"
|
||||
"\t\t\t}\n"
|
||||
};
|
||||
|
||||
const char *offset_pattern =
|
||||
"\tconst uint4 offset = (uint4)(0,1,2,3);\n";
|
||||
|
||||
const char *sampler_pattern =
|
||||
"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
|
||||
|
||||
|
||||
const char *write_pattern = {
|
||||
"\tfor(y=0; y<get_image_height(image%d); y++)\n"
|
||||
"\t\tif (y %s get_global_size(0) == get_global_id(0))\n"
|
||||
"\t\t\tfor (x=0; x<get_image_width(image%d); x++) {\n"
|
||||
"\t\t\t\tcolor = (uint4)x*(uint4)y+offset;\n"
|
||||
"\t\t\t\twrite_imageui(image%d, (int2)(x,y), color);\n"
|
||||
"\t\t\t}\n"
|
||||
"\tbarrier(CLK_LOCAL_MEM_FENCE);\n"
|
||||
};
|
||||
|
||||
|
||||
int check_image(cl_command_queue queue, cl_mem mem) {
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t width, height;
|
||||
size_t origin[3], region[3], x, j;
|
||||
cl_uint *data;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (type == CL_MEM_OBJECT_BUFFER) {
|
||||
log_error("Expected image object, not buffer.\n");
|
||||
return -1;
|
||||
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
|
||||
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
|
||||
return -1;
|
||||
}
|
||||
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
|
||||
if (data == NULL) {
|
||||
log_error("Failed to malloc host buffer for writing into image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
origin[0] = 0;
|
||||
origin[1] = 0;
|
||||
origin[2] = 0;
|
||||
region[0] = width;
|
||||
region[1] = 1;
|
||||
region[2] = 1;
|
||||
for (origin[1] = 0; origin[1] < height; origin[1]++) {
|
||||
error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clEnqueueReadImage failed");
|
||||
free(data);
|
||||
return error;
|
||||
}
|
||||
|
||||
for (x=0; x<width; x++) {
|
||||
for (j=0; j<4; j++) {
|
||||
if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
|
||||
log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
|
||||
(int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#define NUM_OF_WORK_ITEMS 8192*2
|
||||
|
||||
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
|
||||
|
||||
char *argument_string;
|
||||
char *access_string;
|
||||
char *kernel_string;
|
||||
int i, error, result;
|
||||
clKernelWrapper kernel;
|
||||
clProgramWrapper program;
|
||||
clMemWrapper result_mem;
|
||||
char *ptr;
|
||||
size_t global_dims[3];
|
||||
cl_ulong per_item;
|
||||
cl_uint per_item_uint;
|
||||
cl_uint returned_results[NUM_OF_WORK_ITEMS], final_result;
|
||||
clEventWrapper event;
|
||||
cl_int event_status;
|
||||
|
||||
// Allocate memory for the kernel source
|
||||
argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
|
||||
access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
|
||||
kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
|
||||
argument_string[0] = '\0';
|
||||
access_string[0] = '\0';
|
||||
kernel_string[0] = '\0';
|
||||
|
||||
// Zero the results.
|
||||
for (i=0; i<NUM_OF_WORK_ITEMS; i++)
|
||||
returned_results[i] = 0;
|
||||
|
||||
// Build the kernel source
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
|
||||
sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
|
||||
}
|
||||
sprintf(kernel_string, buffer_kernel_pattern, argument_string, access_string);
|
||||
}
|
||||
else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
|
||||
}
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
|
||||
}
|
||||
else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
|
||||
for(i=0; i<number_of_mems_used; i++) {
|
||||
sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
|
||||
sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
|
||||
}
|
||||
sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
|
||||
}
|
||||
ptr = kernel_string;
|
||||
|
||||
// Create the kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
|
||||
|
||||
free(argument_string);
|
||||
free(access_string);
|
||||
free(kernel_string);
|
||||
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
print_error(error, "Create kernel and program failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Set the arguments
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
}
|
||||
|
||||
// Set the result
|
||||
result_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
// Thread dimensions for execution
|
||||
global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
|
||||
|
||||
// We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
|
||||
cl_uint *sizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
|
||||
cl_uint max_size = 0;
|
||||
clMemWrapper buffer_sizes;
|
||||
if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
size_t size;
|
||||
error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
sizes[i] = (cl_uint)(size/sizeof(cl_uint));
|
||||
if (size/sizeof(cl_uint) > max_size)
|
||||
max_size = (cl_uint)(size/sizeof(cl_uint));
|
||||
}
|
||||
buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, sizes, &error);
|
||||
test_error_abort(error, "clCreateBuffer failed");
|
||||
error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
per_item = (cl_ulong)ceil((double)max_size/global_dims[0]);
|
||||
if (per_item > CL_UINT_MAX)
|
||||
log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
|
||||
per_item_uint = (cl_uint)per_item;
|
||||
error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
free(sizes);
|
||||
}
|
||||
|
||||
size_t local_dims[3] = {1,1,1};
|
||||
error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
|
||||
test_error(error, "get_max_common_work_group_size failed");
|
||||
|
||||
// Execute the kernel
|
||||
error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
|
||||
else
|
||||
print_error(error, "clEnqueueNDRangeKernel failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Finish the test
|
||||
error = clFinish(*queue);
|
||||
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
print_error(error, "clFinish failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
// Verify that the event from the execution did not have an error
|
||||
error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
|
||||
test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
|
||||
if (event_status < 0) {
|
||||
result = check_allocation_error(context, device_id, event_status, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
|
||||
else
|
||||
print_error(event_status, "clEnqueueNDRangeKernel failed");
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// If we are not verifying the checksum return here
|
||||
if (!verify_checksum) {
|
||||
log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
// Verify the checksum.
|
||||
// Read back the result
|
||||
error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0, sizeof(cl_uint)*NUM_OF_WORK_ITEMS, &returned_results, 0, NULL, NULL);
|
||||
test_error_abort(error, "clEnqueueReadBuffer failed");
|
||||
final_result = 0;
|
||||
if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
|
||||
// For buffers or read images we are just looking at the sum of what each thread summed up
|
||||
for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
|
||||
final_result += returned_results[i];
|
||||
}
|
||||
if (final_result != checksum) {
|
||||
log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
|
||||
} else {
|
||||
// For write images we need to verify the values
|
||||
for (i=0; i<number_of_mems_used; i++) {
|
||||
if (check_image(*queue, mems[i])) {
|
||||
log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
}
|
||||
log_info("\t\tImage contents verified.\n");
|
||||
}
|
||||
|
||||
// Finish the test
|
||||
error = clFinish(*queue);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
if (result != SUCCEEDED) {
|
||||
if (result == FAILED_TOO_BIG)
|
||||
log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
|
||||
else
|
||||
print_error(error, "clFinish failed");
|
||||
return result;
|
||||
}
|
||||
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
22
test_conformance/allocations/allocation_execute.h
Normal file
22
test_conformance/allocations/allocation_execute.h
Normal file
@@ -0,0 +1,22 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "allocation_utils.h"
|
||||
|
||||
|
||||
int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum);
|
||||
|
||||
|
||||
312
test_conformance/allocations/allocation_fill.cpp
Normal file
312
test_conformance/allocations/allocation_fill.cpp
Normal file
@@ -0,0 +1,312 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_fill.h"
|
||||
|
||||
#define BUFFER_CHUNK_SIZE 8*1024*1024
|
||||
#define IMAGE_LINES 8
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
int fill_buffer_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t size, MTdata d, cl_bool blocking_write) {
|
||||
size_t i, j;
|
||||
cl_uint *data;
|
||||
int error, result;
|
||||
cl_uint checksum_delta = 0;
|
||||
cl_event event;
|
||||
|
||||
size_t size_to_use = BUFFER_CHUNK_SIZE;
|
||||
if (size_to_use > size)
|
||||
size_to_use = size;
|
||||
|
||||
data = (cl_uint*)malloc(size_to_use);
|
||||
if (data == NULL) {
|
||||
log_error("Failed to malloc host buffer for writing into buffer.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
for (i=0; i<size-size_to_use; i+=size_to_use) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<size_to_use/sizeof(cl_uint); j++) {
|
||||
data[j] = genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
if (blocking_write) {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size_to_use, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size_to_use, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with any leftover bits
|
||||
if (i < size) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<(size-i)/sizeof(cl_uint); j++) {
|
||||
data[j] = (cl_uint)genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
|
||||
if (blocking_write) {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size-i, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size-i, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteBuffer failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
free(data);
|
||||
// Only update the checksum if this succeeded.
|
||||
checksum += checksum_delta;
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t width, size_t height, MTdata d, cl_bool blocking_write) {
|
||||
size_t origin[3], region[3], j;
|
||||
int error, result;
|
||||
cl_uint *data;
|
||||
cl_uint checksum_delta = 0;
|
||||
cl_event event;
|
||||
|
||||
size_t image_lines_to_use;
|
||||
image_lines_to_use = IMAGE_LINES;
|
||||
if (image_lines_to_use > height)
|
||||
image_lines_to_use = height;
|
||||
|
||||
data = (cl_uint*)malloc(width*4*sizeof(cl_uint)*IMAGE_LINES);
|
||||
if (data == NULL) {
|
||||
log_error("Failed to malloc host buffer for writing into image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
origin[0] = 0;
|
||||
origin[1] = 0;
|
||||
origin[2] = 0;
|
||||
region[0] = width;
|
||||
region[1] = IMAGE_LINES;
|
||||
region[2] = 1;
|
||||
for (origin[1] = 0; origin[1] < height - IMAGE_LINES; origin[1] += IMAGE_LINES) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<width*4*IMAGE_LINES; j++) {
|
||||
data[j] = (cl_uint)genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
|
||||
if (blocking_write) {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with any leftover bits
|
||||
if (origin[1] < height) {
|
||||
// Put values in the data, and keep a checksum as we go along.
|
||||
for (j=0; j<width*4*(height-origin[1]); j++) {
|
||||
data[j] = (cl_uint)genrand_int32(d);
|
||||
checksum_delta += data[j];
|
||||
}
|
||||
|
||||
region[1] = height-origin[1];
|
||||
if(blocking_write) {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clEnqueueWriteImage failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseMemObject(mem);
|
||||
free(data);
|
||||
return result;
|
||||
}
|
||||
|
||||
error = clWaitForEvents(1, &event);
|
||||
result = check_allocation_error(context, device_id, error, queue);
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
print_error(error, "clWaitForEvents failed.");
|
||||
}
|
||||
|
||||
if (result != SUCCEEDED) {
|
||||
clReleaseEvent(event);
|
||||
free(data);
|
||||
clReleaseMemObject(mem);
|
||||
return result;
|
||||
}
|
||||
|
||||
clReleaseEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
free(data);
|
||||
// Only update the checksum if this succeeded.
|
||||
checksum += checksum_delta;
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write) {
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t size, width, height;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
|
||||
if (type == CL_MEM_OBJECT_BUFFER) {
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
return fill_buffer_with_data(context, device_id, queue, mem, size, d, blocking_write);
|
||||
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
|
||||
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_WIDTH.");
|
||||
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_HEIGHT.");
|
||||
return fill_image_with_data(context, device_id, queue, mem, width, height, d, blocking_write);
|
||||
}
|
||||
|
||||
log_error("Invalid CL_MEM_TYPE: %d\n", type);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
19
test_conformance/allocations/allocation_fill.h
Normal file
19
test_conformance/allocations/allocation_fill.h
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "allocation_utils.h"
|
||||
|
||||
int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write);
|
||||
246
test_conformance/allocations/allocation_functions.cpp
Normal file
246
test_conformance/allocations/allocation_functions.cpp
Normal file
@@ -0,0 +1,246 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_functions.h"
|
||||
#include "allocation_fill.h"
|
||||
|
||||
|
||||
static cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
|
||||
|
||||
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
int error;
|
||||
log_info("\t\tAttempting to allocate a %gMB array and fill with %s writes.\n", (size_to_allocate/(1024.0*1024.0)), (blocking_write ? "blocking" : "non-blocking"));
|
||||
*mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size_to_allocate, NULL, &error);
|
||||
return check_allocation_error(context, device_id, error, queue);
|
||||
}
|
||||
|
||||
|
||||
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height) {
|
||||
size_t max_width, max_height, num_pixels, found_width, found_height;
|
||||
int error;
|
||||
|
||||
if (checkForImageSupport(device_id)) {
|
||||
log_info("Can not allocate an image on this device because it does not support images.");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
if (size_to_allocate == 0) {
|
||||
log_error("Trying to allcoate a zero sized image.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
|
||||
test_error_abort(error, "clGetDeviceInfo failed.");
|
||||
error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
|
||||
test_error_abort(error, "clGetDeviceInfo failed.");
|
||||
|
||||
num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
|
||||
|
||||
if (num_pixels > (max_width*max_height))
|
||||
return FAILED_TOO_BIG;
|
||||
|
||||
// We want a close-to-square aspect ratio.
|
||||
// Note that this implicitly assumes that max width >= max height
|
||||
found_width = (int)sqrt( (double) num_pixels );
|
||||
if (found_width == 0)
|
||||
found_width = 1;
|
||||
if( found_width > max_width ) {
|
||||
found_width = max_width;
|
||||
}
|
||||
found_height = (size_t)num_pixels/found_width;
|
||||
if (found_height > max_height) {
|
||||
found_height = max_height;
|
||||
}
|
||||
|
||||
*width = found_width;
|
||||
*height = found_height;
|
||||
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
size_t width, height;
|
||||
int error;
|
||||
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height);
|
||||
if (error != SUCCEEDED)
|
||||
return error;
|
||||
|
||||
log_info("\t\tAttempting to allocate a %gMB read-only image (%d x %d) and fill with %s writes.\n",
|
||||
(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
|
||||
*mem = create_image_2d(context, CL_MEM_READ_ONLY, &image_format, width, height, 0, NULL, &error);
|
||||
|
||||
return check_allocation_error(context, device_id, error, queue);
|
||||
}
|
||||
|
||||
|
||||
int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||
size_t width, height;
|
||||
int error;
|
||||
|
||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height);
|
||||
if (error != SUCCEEDED)
|
||||
return error;
|
||||
|
||||
log_info("\t\tAttempting to allocate a %gMB write-only image (%d x %d) and fill with %s writes.\n",
|
||||
(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
|
||||
*mem = create_image_2d(context, CL_MEM_WRITE_ONLY, &image_format, width, height, 0, NULL, &error);
|
||||
|
||||
return check_allocation_error(context, device_id, error, queue);
|
||||
}
|
||||
|
||||
int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem) {
|
||||
if (type == BUFFER) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, true);
|
||||
if (type == IMAGE_READ) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, true);
|
||||
if (type == IMAGE_WRITE) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, true);
|
||||
if (type == BUFFER_NON_BLOCKING) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, false);
|
||||
if (type == IMAGE_READ_NON_BLOCKING) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, false);
|
||||
if (type == IMAGE_WRITE_NON_BLOCKING) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, false);
|
||||
log_error("Invalid allocation type: %d\n", type);
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
|
||||
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
|
||||
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
|
||||
|
||||
cl_ulong max_individual_allocation_size, global_mem_size;
|
||||
int error, result;
|
||||
size_t amount_allocated;
|
||||
size_t reduction_amount;
|
||||
size_t min_allocation_allowed;
|
||||
int current_allocation;
|
||||
size_t allocation_this_time, actual_allocation;
|
||||
|
||||
// Set the number of mems used to 0 so if we fail to create even a single one we don't end up returning a garbage value
|
||||
*number_of_mems = 0;
|
||||
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||
error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
|
||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
|
||||
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||
// max_individual_allocation_size, toMB(max_individual_allocation_size),
|
||||
// global_mem_size, toMB(global_mem_size));
|
||||
|
||||
if (size_to_allocate > global_mem_size) {
|
||||
log_error("Can not allocate more than the global memory size.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
amount_allocated = 0;
|
||||
current_allocation = 0;
|
||||
reduction_amount = (size_t)max_individual_allocation_size/16;
|
||||
min_allocation_allowed = (size_t)max_individual_allocation_size/4;
|
||||
if (min_allocation_allowed > size_to_allocate)
|
||||
min_allocation_allowed = size_to_allocate/4;
|
||||
|
||||
if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
|
||||
else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||
else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||
|
||||
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
|
||||
// toMB(reduction_amount), toMB(min_allocation_allowed));
|
||||
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
|
||||
|
||||
// If we are only doing a single allocation, only allow 1
|
||||
int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
|
||||
|
||||
// Make sure that the maximum number of images allocated is constrained by the
|
||||
// maximum that may be passed to a kernel
|
||||
if (type != BUFFER && type != BUFFER_NON_BLOCKING) {
|
||||
cl_device_info param_name = (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) ?
|
||||
CL_DEVICE_MAX_READ_IMAGE_ARGS : CL_DEVICE_MAX_WRITE_IMAGE_ARGS;
|
||||
|
||||
cl_uint max_image_args;
|
||||
error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
|
||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
|
||||
|
||||
if ((int)max_image_args < max_to_allocate) {
|
||||
log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
|
||||
max_to_allocate = max_image_args;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Try to allocate the requested amount.
|
||||
while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
|
||||
allocation_this_time = size_to_allocate - amount_allocated;
|
||||
if (allocation_this_time > max_individual_allocation_size)
|
||||
allocation_this_time = (size_t)max_individual_allocation_size;
|
||||
|
||||
// Try to allocate a chunk of memory
|
||||
result = FAILED_TOO_BIG;
|
||||
//log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
|
||||
while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
|
||||
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
|
||||
if (result == SUCCEEDED) {
|
||||
// Allocation succeeded, another memory object was added to the array
|
||||
*number_of_mems = (current_allocation+1);
|
||||
// Verify the size is correct to within 1MB.
|
||||
actual_allocation = get_actual_allocation_size(mems[current_allocation]);
|
||||
if (fabs((double)(allocation_this_time - actual_allocation)) > 1024.0*1024.0) {
|
||||
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
// If we are filling the allocation for verification do so
|
||||
if (force_fill) {
|
||||
//log_info("\t\t\tWriting random values to object and calculating checksum.\n");
|
||||
cl_bool blocking_write = true;
|
||||
if (type == BUFFER_NON_BLOCKING || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE_NON_BLOCKING) {
|
||||
blocking_write = false;
|
||||
}
|
||||
result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
|
||||
}
|
||||
}
|
||||
if (result == FAILED_TOO_BIG) {
|
||||
//log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
|
||||
if (allocation_this_time > reduction_amount)
|
||||
allocation_this_time -= reduction_amount;
|
||||
else {
|
||||
allocation_this_time = 0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (result == FAILED_ABORT) {
|
||||
log_error("\t\tAllocation failed.\n");
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
|
||||
if (allocation_this_time < min_allocation_allowed && allocation_this_time < (size_to_allocate-amount_allocated)) {
|
||||
log_info("\t\tFailed to allocate an individual allocation of more than %gMB.\n", toMB(min_allocation_allowed));
|
||||
return FAILED_TOO_BIG;
|
||||
}
|
||||
|
||||
// Otherwise we succeeded
|
||||
if (result != SUCCEEDED) {
|
||||
log_error("Test logic error.");
|
||||
test_finish();
|
||||
exit(-1);
|
||||
}
|
||||
amount_allocated += allocation_this_time;
|
||||
|
||||
*final_size = amount_allocated;
|
||||
|
||||
current_allocation++;
|
||||
}
|
||||
|
||||
log_info("\t\tSucceeded in allocating %gMB using %d memory objects.\n", toMB(amount_allocated), current_allocation);
|
||||
return SUCCEEDED;
|
||||
}
|
||||
24
test_conformance/allocations/allocation_functions.h
Normal file
24
test_conformance/allocations/allocation_functions.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "allocation_utils.h"
|
||||
|
||||
int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem);
|
||||
int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
|
||||
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
|
||||
int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
|
||||
int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
|
||||
int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d);
|
||||
87
test_conformance/allocations/allocation_utils.cpp
Normal file
87
test_conformance/allocations/allocation_utils.cpp
Normal file
@@ -0,0 +1,87 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "allocation_utils.h"
|
||||
|
||||
cl_command_queue reset_queue(cl_context context, cl_device_id device_id, cl_command_queue *queue, int *error)
|
||||
{
|
||||
log_info("Invalid command queue. Releasing and recreating the command queue.\n");
|
||||
clReleaseCommandQueue(*queue);
|
||||
*queue = clCreateCommandQueue(context, device_id, 0, error);
|
||||
return *queue;
|
||||
}
|
||||
|
||||
int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue) {
|
||||
//log_info("check_allocation_error context=%p device_id=%p error=%d *queue=%p\n", context, device_id, error, *queue);
|
||||
if ((error == CL_MEM_OBJECT_ALLOCATION_FAILURE ) || (error == CL_OUT_OF_RESOURCES ) || (error == CL_OUT_OF_HOST_MEMORY) || (error == CL_INVALID_IMAGE_SIZE)) {
|
||||
return FAILED_TOO_BIG;
|
||||
} else if (error == CL_INVALID_COMMAND_QUEUE) {
|
||||
*queue = reset_queue(context, device_id, queue, &error);
|
||||
if (CL_SUCCESS != error)
|
||||
{
|
||||
log_error("Failed to reset command queue after corrupted queue: %s\n", IGetErrorString(error));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
// Try again with smaller resources.
|
||||
return FAILED_TOO_BIG;
|
||||
} else if (error != CL_SUCCESS) {
|
||||
log_error("Allocation failed with %s.\n", IGetErrorString(error));
|
||||
return FAILED_ABORT;
|
||||
}
|
||||
return SUCCEEDED;
|
||||
}
|
||||
|
||||
|
||||
double toMB(cl_ulong size_in) {
|
||||
return (double)size_in/(1024.0*1024.0);
|
||||
}
|
||||
|
||||
size_t get_actual_allocation_size(cl_mem mem) {
|
||||
int error;
|
||||
cl_mem_object_type type;
|
||||
size_t size, width, height;
|
||||
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (type == CL_MEM_OBJECT_BUFFER) {
|
||||
error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
|
||||
return 0;
|
||||
}
|
||||
return size;
|
||||
} else if (type == CL_MEM_OBJECT_IMAGE2D) {
|
||||
error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
|
||||
return 0;
|
||||
}
|
||||
error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
|
||||
if (error) {
|
||||
print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
|
||||
return 0;
|
||||
}
|
||||
return width*height*4*sizeof(cl_uint);
|
||||
}
|
||||
|
||||
log_error("Invalid CL_MEM_TYPE: %d\n", type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
24
test_conformance/allocations/allocation_utils.h
Normal file
24
test_conformance/allocations/allocation_utils.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
extern cl_uint checksum;
|
||||
|
||||
int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue);
|
||||
double toMB(cl_ulong size_in);
|
||||
size_t get_actual_allocation_size(cl_mem mem);
|
||||
|
||||
|
||||
354
test_conformance/allocations/main.cpp
Normal file
354
test_conformance/allocations/main.cpp
Normal file
@@ -0,0 +1,354 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#include "allocation_functions.h"
|
||||
#include "allocation_fill.h"
|
||||
#include "allocation_execute.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include <time.h>
|
||||
|
||||
cl_device_id g_device_id;
|
||||
cl_device_type g_device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
clContextWrapper g_context;
|
||||
clCommandQueueWrapper g_queue;
|
||||
int g_repetition_count = 1;
|
||||
int g_tests_to_run = 0;
|
||||
int g_reduction_percentage = 100;
|
||||
int g_write_allocations = 1;
|
||||
int g_multiple_allocations = 0;
|
||||
int g_execute_kernel = 1;
|
||||
|
||||
cl_uint checksum;
|
||||
|
||||
void printUsage( const char *execName )
|
||||
{
|
||||
const char *p = strrchr( execName, '/' );
|
||||
if( p != NULL )
|
||||
execName = p + 1;
|
||||
|
||||
log_info( "Usage: %s [single|multiple] [numReps] [reduction%%] allocType\n", execName );
|
||||
log_info( "Where:\n" );
|
||||
log_info( "\tsingle - Tests using a single allocation as large as possible\n" );
|
||||
log_info( "\tmultiple - Tests using as many allocations as possible\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tnumReps - Optional integer specifying the number of repetitions to run and average the result (defaults to 1)\n" );
|
||||
log_info( "\treduction%% - Optional integer, followed by a %% sign, that acts as a multiplier for the target amount of memory.\n" );
|
||||
log_info( "\t Example: target amount of 512MB and a reduction of 75%% will result in a target of 384MB.\n" );
|
||||
log_info( "\n" );
|
||||
log_info( "\tallocType - Allocation type to test with. Can be one of the following:\n" );
|
||||
log_info( "\t\tbuffer\n");
|
||||
log_info( "\t\timage2d_read\n");
|
||||
log_info( "\t\timage2d_write\n");
|
||||
log_info( "\t\tbuffer_non_blocking\n");
|
||||
log_info( "\t\timage2d_read_non_blocking\n");
|
||||
log_info( "\t\timage2d_write_non_blocking\n");
|
||||
log_info( "\t\tall (runs all of the above in sequence)\n" );
|
||||
log_info( "\tdo_not_force_fill - Disable explicitly write data to all memory objects after creating them.\n" );
|
||||
log_info( "\t Without this, the kernel execution can not verify its checksum.\n" );
|
||||
log_info( "\tdo_not_execute - Disable executing a kernel that accesses all of the memory objects.\n" );
|
||||
}
|
||||
|
||||
|
||||
int init_cl() {
|
||||
cl_platform_id platform;
|
||||
int error;
|
||||
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
test_error(error, "clGetPlatformIDs failed");
|
||||
|
||||
error = clGetDeviceIDs(platform, g_device_type, 1, &g_device_id, NULL);
|
||||
test_error(error, "clGetDeviceIDs failed");
|
||||
|
||||
/* Create a context */
|
||||
g_context = clCreateContext( NULL, 1, &g_device_id, notify_callback, NULL, &error );
|
||||
test_error(error, "clCreateContext failed");
|
||||
|
||||
/* Create command queue */
|
||||
g_queue = clCreateCommandQueue( g_context, g_device_id, 0, &error );
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
int error;
|
||||
int count;
|
||||
cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
|
||||
cl_ulong max_individual_allocation_size, global_mem_size;
|
||||
char str[ 128 ], *endPtr;
|
||||
int r;
|
||||
int number_of_mems_used;
|
||||
int failure_counts = 0;
|
||||
int test, test_to_run = 0;
|
||||
int randomize = 0;
|
||||
size_t final_size, max_size, current_test_size;
|
||||
|
||||
test_start();
|
||||
|
||||
|
||||
// Parse arguments
|
||||
checkDeviceTypeOverride( &g_device_type );
|
||||
for( int i = 1; i < argc; i++ )
|
||||
{
|
||||
strncpy( str, argv[ i ], sizeof( str ) - 1 );
|
||||
|
||||
if( strcmp( str, "cpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_CPU" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_CPU;
|
||||
else if( strcmp( str, "gpu" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_GPU" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_GPU;
|
||||
else if( strcmp( str, "accelerator" ) == 0 || strcmp( str, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_ACCELERATOR;
|
||||
else if( strcmp( str, "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
|
||||
g_device_type = CL_DEVICE_TYPE_DEFAULT;
|
||||
|
||||
else if( strcmp( str, "multiple" ) == 0 )
|
||||
g_multiple_allocations = 1;
|
||||
else if( strcmp( str, "randomize" ) == 0 )
|
||||
randomize = 1;
|
||||
else if( strcmp( str, "single" ) == 0 )
|
||||
g_multiple_allocations = 0;
|
||||
|
||||
else if( ( r = (int)strtol( str, &endPtr, 10 ) ) && ( endPtr != str ) && ( *endPtr == 0 ) )
|
||||
{
|
||||
// By spec, that means the entire string was an integer, so take it as a repetition count
|
||||
g_repetition_count = r;
|
||||
}
|
||||
|
||||
else if( strcmp( str, "all" ) == 0 )
|
||||
{
|
||||
g_tests_to_run = BUFFER | IMAGE_READ | IMAGE_WRITE | BUFFER_NON_BLOCKING | IMAGE_READ_NON_BLOCKING | IMAGE_WRITE_NON_BLOCKING;
|
||||
}
|
||||
|
||||
else if( strchr( str, '%' ) != NULL )
|
||||
{
|
||||
// Reduction percentage (let strtol ignore the percentage)
|
||||
g_reduction_percentage = (int)strtol( str, NULL, 10 );
|
||||
}
|
||||
|
||||
else if( g_tests_to_run == 0 )
|
||||
{
|
||||
if( strcmp( str, "buffer" ) == 0 )
|
||||
{
|
||||
g_tests_to_run |= BUFFER;
|
||||
}
|
||||
else if( strcmp( str, "image2d_read" ) == 0 )
|
||||
{
|
||||
g_tests_to_run |= IMAGE_READ;
|
||||
}
|
||||
else if( strcmp( str, "image2d_write" ) == 0 )
|
||||
{
|
||||
g_tests_to_run |= IMAGE_WRITE;
|
||||
}
|
||||
else if( strcmp( str, "buffer_non_blocking" ) == 0 )
|
||||
{
|
||||
g_tests_to_run |= BUFFER_NON_BLOCKING;
|
||||
}
|
||||
else if( strcmp( str, "image2d_read_non_blocking" ) == 0 )
|
||||
{
|
||||
g_tests_to_run |= IMAGE_READ_NON_BLOCKING;
|
||||
}
|
||||
else if( strcmp( str, "image2d_write_non_blocking" ) == 0 )
|
||||
{
|
||||
g_tests_to_run |= IMAGE_WRITE_NON_BLOCKING;
|
||||
}
|
||||
if( g_tests_to_run == 0 )
|
||||
break; // Argument is invalid; break to print usage
|
||||
}
|
||||
|
||||
else if( strcmp( str, "do_not_force_fill" ) == 0 )
|
||||
{
|
||||
g_write_allocations = 0;
|
||||
}
|
||||
|
||||
else if( strcmp( str, "do_not_execute" ) == 0 )
|
||||
{
|
||||
g_execute_kernel = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if( randomize )
|
||||
{
|
||||
gRandomSeed = (cl_uint) clock();
|
||||
gReSeed = 1;
|
||||
}
|
||||
|
||||
if( g_tests_to_run == 0 )
|
||||
{
|
||||
// Allocation type was never specified, or one of the arguments was invalid. Print usage and bail
|
||||
printUsage( argv[ 0 ] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// All ready to go, so set up an environment
|
||||
error = init_cl();
|
||||
if (error) {
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( printDeviceHeader( g_device_id ) != CL_SUCCESS )
|
||||
{
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
error = clGetDeviceInfo(g_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
|
||||
if ( error ) {
|
||||
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
error = clGetDeviceInfo(g_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
|
||||
if ( error ) {
|
||||
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||
max_individual_allocation_size, toMB(max_individual_allocation_size),
|
||||
global_mem_size, toMB(global_mem_size));
|
||||
|
||||
if( max_individual_allocation_size > global_mem_size )
|
||||
{
|
||||
log_error( "FAILURE: CL_DEVICE_MAX_MEM_ALLOC_SIZE (%lld) is greater than the CL_DEVICE_GLOBAL_MEM_SIZE (%lld)\n", max_individual_allocation_size, global_mem_size );
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
|
||||
// We may need to back off the global_mem_size on unified memory devices to leave room for application and operating system code
|
||||
// and associated data in the working set, so we dont start pathologically paging.
|
||||
// Check to see if we are a unified memory device
|
||||
cl_bool hasUnifiedMemory = CL_FALSE;
|
||||
if( ( error = clGetDeviceInfo( g_device_id, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( hasUnifiedMemory ), &hasUnifiedMemory, NULL )))
|
||||
{
|
||||
print_error( error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
|
||||
test_finish();
|
||||
return -1;
|
||||
}
|
||||
// we share unified memory so back off to 3/4 the global memory size.
|
||||
if( CL_TRUE == hasUnifiedMemory )
|
||||
{
|
||||
global_mem_size -= global_mem_size /4;
|
||||
log_info( "Device shares memory with the host, so backing off the maximum combined allocation size to be %gMB to avoid rampant paging.\n", toMB( global_mem_size ) );
|
||||
}
|
||||
|
||||
// Pick the baseline size based on whether we are doing a single large or multiple allocations
|
||||
if (!g_multiple_allocations) {
|
||||
max_size = (size_t)max_individual_allocation_size;
|
||||
} else {
|
||||
max_size = (size_t)global_mem_size;
|
||||
}
|
||||
|
||||
|
||||
// Adjust based on the percentage
|
||||
if (g_reduction_percentage != 100) {
|
||||
log_info("NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage);
|
||||
max_size = (size_t)((double)max_size * (double)g_reduction_percentage/100.0);
|
||||
}
|
||||
|
||||
// Round to nearest MB.
|
||||
max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
|
||||
|
||||
log_info("** Target allocation size (rounded to nearest MB) is: %lu bytes (%gMB).\n", max_size, toMB(max_size));
|
||||
|
||||
// Run all the requested tests
|
||||
RandomSeed seed( gRandomSeed );
|
||||
for (test=0; test<6; test++) {
|
||||
if (test == 0) test_to_run = BUFFER;
|
||||
if (test == 1) test_to_run = IMAGE_READ;
|
||||
if (test == 2) test_to_run = IMAGE_WRITE;
|
||||
if (test == 3) test_to_run = BUFFER_NON_BLOCKING;
|
||||
if (test == 4) test_to_run = IMAGE_READ_NON_BLOCKING;
|
||||
if (test == 5) test_to_run = IMAGE_WRITE_NON_BLOCKING;
|
||||
if (!(g_tests_to_run & test_to_run))
|
||||
continue;
|
||||
|
||||
// Skip image tests if we don't support images on the device
|
||||
if (test > 0 && checkForImageSupport(g_device_id)) {
|
||||
log_info("Can not test image allocation because device does not support images.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (test_to_run == BUFFER || test_to_run == BUFFER_NON_BLOCKING) log_info("** Allocating buffer(s) to size %gMB.\n", toMB(max_size));
|
||||
else if (test_to_run == IMAGE_READ || test_to_run == IMAGE_READ_NON_BLOCKING) log_info("** Allocating read-only image(s) to size %gMB.\n", toMB(max_size));
|
||||
else if (test_to_run == IMAGE_WRITE || test_to_run == IMAGE_WRITE_NON_BLOCKING) log_info("** Allocating write-only image(s) to size %gMB.\n", toMB(max_size));
|
||||
else {log_error("Test logic error.\n"); return -1;}
|
||||
|
||||
// Run the test the requested number of times
|
||||
for (count = 0; count < g_repetition_count; count++) {
|
||||
current_test_size = max_size;
|
||||
error = FAILED_TOO_BIG;
|
||||
log_info(" => Allocation %d\n", count+1);
|
||||
|
||||
while (error == FAILED_TOO_BIG && current_test_size > max_size/8) {
|
||||
// Reset our checksum for each allocation
|
||||
checksum = 0;
|
||||
|
||||
// Do the allocation
|
||||
error = allocate_size(g_context, &g_queue, g_device_id, g_multiple_allocations, current_test_size, test_to_run, mems, &number_of_mems_used, &final_size, g_write_allocations, seed);
|
||||
|
||||
// If we succeeded and we're supposed to execute a kernel, do so.
|
||||
if (error == SUCCEEDED && g_execute_kernel) {
|
||||
log_info("\tExecuting kernel with memory objects.\n");
|
||||
error = execute_kernel(g_context, &g_queue, g_device_id, test_to_run, mems, number_of_mems_used, g_write_allocations);
|
||||
}
|
||||
|
||||
// If we failed to allocate more than 1/8th of the requested amount return a failure.
|
||||
if (final_size < (size_t)max_size/8) {
|
||||
// log_error("===> Allocation %d failed to allocate more than 1/8th of the requested size.\n", count+1);
|
||||
failure_counts++;
|
||||
}
|
||||
// Clean up.
|
||||
for (int i=0; i<number_of_mems_used; i++)
|
||||
clReleaseMemObject(mems[i]);
|
||||
|
||||
if (error == FAILED_ABORT) {
|
||||
log_error(" => Allocation %d failed.\n", count+1);
|
||||
failure_counts++;
|
||||
}
|
||||
|
||||
if (error == FAILED_TOO_BIG) {
|
||||
current_test_size -= max_size/16;
|
||||
log_info("\tFailed at this size; trying a smaller size of %gMB.\n", toMB(current_test_size));
|
||||
}
|
||||
}
|
||||
if (error == SUCCEEDED && current_test_size == max_size)
|
||||
log_info("\tPASS: Allocation succeeded.\n");
|
||||
else if (error == SUCCEEDED && current_test_size > max_size/8)
|
||||
log_info("\tPASS: Allocation succeeded at reduced size.\n");
|
||||
else {
|
||||
log_error("\tFAIL: Allocation failed.\n");
|
||||
failure_counts++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (failure_counts)
|
||||
log_error("FAILED allocations test.\n");
|
||||
else
|
||||
log_info("PASSED allocations test.\n");
|
||||
|
||||
test_finish();
|
||||
return failure_counts;
|
||||
}
|
||||
|
||||
|
||||
62
test_conformance/allocations/testBase.h
Normal file
62
test_conformance/allocations/testBase.h
Normal file
@@ -0,0 +1,62 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
|
||||
#define MAX_NUMBER_TO_ALLOCATE 100
|
||||
|
||||
#define FAILED_CORRUPTED_QUEUE -2
|
||||
#define FAILED_ABORT -1
|
||||
#define FAILED_TOO_BIG 1
|
||||
#define SUCCEEDED 0
|
||||
|
||||
#define BUFFER 1
|
||||
#define IMAGE_READ 2
|
||||
#define IMAGE_WRITE 4
|
||||
#define BUFFER_NON_BLOCKING 8
|
||||
#define IMAGE_READ_NON_BLOCKING 16
|
||||
#define IMAGE_WRITE_NON_BLOCKING 32
|
||||
|
||||
#define test_error_abort(errCode,msg) test_error_ret_abort(errCode,msg,errCode)
|
||||
#define test_error_ret_abort(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
|
||||
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
51
test_conformance/api/CMakeLists.txt
Normal file
51
test_conformance/api/CMakeLists.txt
Normal file
@@ -0,0 +1,51 @@
|
||||
add_executable(conformance_test_api
|
||||
main.c
|
||||
test_bool.c
|
||||
test_retain.cpp
|
||||
test_retain_program.c
|
||||
test_queries.cpp
|
||||
test_create_kernels.c
|
||||
test_kernels.c
|
||||
test_api_min_max.c
|
||||
test_kernel_arg_changes.cpp
|
||||
test_kernel_arg_multi_setup.cpp
|
||||
test_binary.cpp
|
||||
test_native_kernel.cpp
|
||||
test_mem_objects.cpp
|
||||
test_create_context_from_type.cpp
|
||||
test_device_min_data_type_align_size_alignment.cpp
|
||||
test_platform.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
main.c
|
||||
test_bool.c
|
||||
test_retain.cpp
|
||||
test_retain_program.c
|
||||
test_create_kernels.c
|
||||
test_kernels.c
|
||||
test_api_min_max.c
|
||||
test_native_kernel.cpp
|
||||
test_mem_objects.cpp
|
||||
test_create_context_from_type.cpp
|
||||
test_device_min_data_type_align_size_alignment.cpp
|
||||
test_platform.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/msvc9.c
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
|
||||
TARGET_LINK_LIBRARIES(conformance_test_api
|
||||
${CLConform_LIBRARIES})
|
||||
27
test_conformance/api/Jamfile
Normal file
27
test_conformance/api/Jamfile
Normal file
@@ -0,0 +1,27 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
|
||||
exe test_api
|
||||
: main.c
|
||||
test_api_min_max.c
|
||||
test_binary.cpp
|
||||
test_create_kernels.c
|
||||
test_create_context_from_type.cpp
|
||||
test_kernel_arg_changes.cpp
|
||||
test_kernel_arg_multi_setup.cpp
|
||||
test_kernels.c
|
||||
test_native_kernel.cpp
|
||||
test_queries.cpp
|
||||
test_retain_program.c
|
||||
test_platform.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_api #test.lst
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/api
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/api
|
||||
;
|
||||
61
test_conformance/api/Makefile
Normal file
61
test_conformance/api/Makefile
Normal file
@@ -0,0 +1,61 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_retain_program.c \
|
||||
test_queries.cpp \
|
||||
test_create_kernels.c \
|
||||
test_kernels.c \
|
||||
test_kernel_arg_info.c \
|
||||
test_api_min_max.c \
|
||||
test_kernel_arg_changes.cpp \
|
||||
test_kernel_arg_multi_setup.cpp \
|
||||
test_binary.cpp \
|
||||
test_native_kernel.cpp \
|
||||
test_create_context_from_type.cpp \
|
||||
test_platform.cpp \
|
||||
test_retain.cpp \
|
||||
test_device_min_data_type_align_size_alignment.cpp \
|
||||
test_mem_objects.cpp \
|
||||
test_bool.c \
|
||||
test_null_buffer_arg.c \
|
||||
test_mem_object_info.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c
|
||||
|
||||
DEFINES = DONT_TEST_GARBAGE_POINTERS
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
HEADERS =
|
||||
TARGET = test_api
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
215
test_conformance/api/main.c
Normal file
215
test_conformance/api/main.c
Normal file
@@ -0,0 +1,215 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_get_platform_info,
|
||||
test_get_sampler_info,
|
||||
test_get_command_queue_info,
|
||||
test_get_context_info,
|
||||
test_get_device_info,
|
||||
test_enqueue_task,
|
||||
test_binary_get,
|
||||
test_program_binary_create,
|
||||
test_kernel_required_group_size,
|
||||
|
||||
test_release_kernel_order,
|
||||
test_release_during_execute,
|
||||
|
||||
test_load_single_kernel,
|
||||
test_load_two_kernels,
|
||||
test_load_two_kernels_in_one,
|
||||
test_load_two_kernels_manually,
|
||||
test_get_program_info_kernel_names,
|
||||
test_get_kernel_arg_info,
|
||||
test_create_kernels_in_program,
|
||||
test_get_kernel_info,
|
||||
test_execute_kernel_local_sizes,
|
||||
test_set_kernel_arg_by_index,
|
||||
test_set_kernel_arg_constant,
|
||||
test_set_kernel_arg_struct_array,
|
||||
test_kernel_global_constant,
|
||||
|
||||
test_min_max_thread_dimensions,
|
||||
test_min_max_work_items_sizes,
|
||||
test_min_max_work_group_size,
|
||||
test_min_max_read_image_args,
|
||||
test_min_max_write_image_args,
|
||||
test_min_max_mem_alloc_size,
|
||||
test_min_max_image_2d_width,
|
||||
test_min_max_image_2d_height,
|
||||
test_min_max_image_3d_width,
|
||||
test_min_max_image_3d_height,
|
||||
test_min_max_image_3d_depth,
|
||||
test_min_max_image_array_size,
|
||||
test_min_max_image_buffer_size,
|
||||
test_min_max_parameter_size,
|
||||
test_min_max_samplers,
|
||||
test_min_max_constant_buffer_size,
|
||||
test_min_max_constant_args,
|
||||
test_min_max_compute_units,
|
||||
test_min_max_address_bits,
|
||||
test_min_max_single_fp_config,
|
||||
test_min_max_double_fp_config,
|
||||
test_min_max_local_mem_size,
|
||||
test_min_max_kernel_preferred_work_group_size_multiple,
|
||||
test_min_max_execution_capabilities,
|
||||
test_min_max_queue_properties,
|
||||
test_min_max_device_version,
|
||||
test_min_max_language_version,
|
||||
|
||||
test_kernel_arg_changes,
|
||||
test_kernel_arg_multi_setup_random,
|
||||
|
||||
test_native_kernel,
|
||||
|
||||
test_create_context_from_type,
|
||||
|
||||
test_platform_extensions,
|
||||
test_get_platform_ids,
|
||||
test_for_bool_type,
|
||||
|
||||
test_repeated_setup_cleanup,
|
||||
|
||||
test_retain_queue_single,
|
||||
test_retain_queue_multiple,
|
||||
test_retain_mem_object_single,
|
||||
test_retain_mem_object_multiple,
|
||||
test_min_data_type_align_size_alignment,
|
||||
|
||||
test_mem_object_destructor_callback,
|
||||
test_null_buffer_arg,
|
||||
test_get_buffer_info,
|
||||
test_get_image2d_info,
|
||||
test_get_image3d_info,
|
||||
test_get_image1d_info,
|
||||
test_get_image1d_array_info,
|
||||
test_get_image2d_array_info,
|
||||
};
|
||||
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"get_platform_info",
|
||||
"get_sampler_info",
|
||||
"get_command_queue_info",
|
||||
"get_context_info",
|
||||
"get_device_info",
|
||||
"enqueue_task",
|
||||
"binary_get",
|
||||
"binary_create",
|
||||
"kernel_required_group_size",
|
||||
|
||||
"release_kernel_order",
|
||||
"release_during_execute",
|
||||
|
||||
"load_single_kernel",
|
||||
"load_two_kernels",
|
||||
"load_two_kernels_in_one",
|
||||
"load_two_kernels_manually",
|
||||
"get_program_info_kernel_names",
|
||||
"get_kernel_arg_info",
|
||||
"create_kernels_in_program",
|
||||
"get_kernel_info",
|
||||
"execute_kernel_local_sizes",
|
||||
"set_kernel_arg_by_index",
|
||||
"set_kernel_arg_constant",
|
||||
"set_kernel_arg_struct_array",
|
||||
"kernel_global_constant",
|
||||
|
||||
"min_max_thread_dimensions",
|
||||
"min_max_work_items_sizes",
|
||||
"min_max_work_group_size",
|
||||
"min_max_read_image_args",
|
||||
"min_max_write_image_args",
|
||||
"min_max_mem_alloc_size",
|
||||
"min_max_image_2d_width",
|
||||
"min_max_image_2d_height",
|
||||
"min_max_image_3d_width",
|
||||
"min_max_image_3d_height",
|
||||
"min_max_image_3d_depth",
|
||||
"min_max_image_array_size",
|
||||
"min_max_image_buffer_size",
|
||||
"min_max_parameter_size",
|
||||
"min_max_samplers",
|
||||
"min_max_constant_buffer_size",
|
||||
"min_max_constant_args",
|
||||
"min_max_compute_units",
|
||||
"min_max_address_bits",
|
||||
"min_max_single_fp_config",
|
||||
"min_max_double_fp_config",
|
||||
"min_max_local_mem_size",
|
||||
"min_max_kernel_preferred_work_group_size_multiple",
|
||||
"min_max_execution_capabilities",
|
||||
"min_max_queue_properties",
|
||||
"min_max_device_version",
|
||||
"min_max_language_version",
|
||||
|
||||
"kernel_arg_changes",
|
||||
"kernel_arg_multi_setup_random",
|
||||
|
||||
"native_kernel",
|
||||
|
||||
"create_context_from_type",
|
||||
"platform_extensions",
|
||||
|
||||
"get_platform_ids",
|
||||
"bool_type",
|
||||
|
||||
"repeated_setup_cleanup",
|
||||
|
||||
"retain_queue_single",
|
||||
"retain_queue_multiple",
|
||||
"retain_mem_object_single",
|
||||
"retain_mem_object_multiple",
|
||||
|
||||
"min_data_type_align_size_alignment",
|
||||
|
||||
"mem_object_destructor_callback",
|
||||
"null_buffer_arg",
|
||||
"get_buffer_info",
|
||||
"get_image2d_info",
|
||||
"get_image3d_info",
|
||||
"get_image1d_info",
|
||||
"get_image1d_array_info",
|
||||
"get_image2d_array_info",
|
||||
|
||||
"all",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
108
test_conformance/api/procs.h
Normal file
108
test_conformance/api/procs.h
Normal file
@@ -0,0 +1,108 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/clImageHelper.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
extern float calculate_ulperror(float a, float b);
|
||||
|
||||
extern int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_load_two_kernels_manually(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_for_bool_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_platform_extensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_release_during_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_arg_changes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_arg_multi_setup_random(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_null_buffer_arg( cl_device_id device_id, cl_context context, cl_command_queue queue, int num_elements );
|
||||
extern int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||
|
||||
36
test_conformance/api/testBase.h
Normal file
36
test_conformance/api/testBase.h
Normal file
@@ -0,0 +1,36 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
2033
test_conformance/api/test_api_min_max.c
Normal file
2033
test_conformance/api/test_api_min_max.c
Normal file
File diff suppressed because it is too large
Load Diff
236
test_conformance/api/test_binary.cpp
Normal file
236
test_conformance/api/test_binary.cpp
Normal file
@@ -0,0 +1,236 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
static const char *sample_binary_kernel_source[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid] + 1;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
int test_binary_get(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
size_t binarySize;
|
||||
|
||||
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
unsigned char *buffers[ 1 ] = { binary };
|
||||
|
||||
// Do another sanity check here first
|
||||
size_t size;
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
|
||||
test_error( error, "Unable to get expected size of binaries array" );
|
||||
if( size != sizeof( buffers ) )
|
||||
{
|
||||
log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d)\n", (int)sizeof( buffers ), (int)size );
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
|
||||
// No way to verify the binary is correct, so just be good with that
|
||||
free(binary);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int test_program_binary_create(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
/* To test this in a self-contained fashion, we have to create a program with
|
||||
source, then get the binary, then use that binary to reload the program, and then verify */
|
||||
|
||||
int error;
|
||||
clProgramWrapper program, program_from_binary;
|
||||
size_t binarySize;
|
||||
|
||||
|
||||
program = clCreateProgramWithSource( context, 1, sample_binary_kernel_source, NULL, &error );
|
||||
test_error( error, "Unable to create program from source" );
|
||||
|
||||
// Build so we have a binary to get
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
// Get the size of the resulting binary (only one device)
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
|
||||
test_error( error, "Unable to get binary size" );
|
||||
|
||||
// Sanity check
|
||||
if( binarySize == 0 )
|
||||
{
|
||||
log_error( "ERROR: Binary size of program is zero\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create a buffer and get the actual binary
|
||||
unsigned char *binary;
|
||||
binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
const unsigned char *buffers[ 1 ] = { binary };
|
||||
|
||||
error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary" );
|
||||
|
||||
cl_int loadErrors[ 1 ];
|
||||
program_from_binary = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, loadErrors, &error );
|
||||
test_error( error, "Unable to load valid program binary" );
|
||||
test_error( loadErrors[ 0 ], "Unable to load valid device binary into program" );
|
||||
|
||||
error = clBuildProgram( program_from_binary, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program" );
|
||||
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary2;
|
||||
binary2 = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
buffers[ 0 ] = binary2;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
|
||||
if( memcmp( binary, binary2, binarySize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Program binary is different when loaded from binary!\n" );
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Try again, this time without passing the status ptr in, to make sure we still
|
||||
// get a valid binary
|
||||
clProgramWrapper programWithoutStatus = clCreateProgramWithBinary( context, 1, &deviceID, &binarySize, buffers, NULL, &error );
|
||||
test_error( error, "Unable to load valid program binary when binary_status pointer is NULL" );
|
||||
|
||||
error = clBuildProgram( programWithoutStatus, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build binary program" );
|
||||
|
||||
// Now get the binary one more time and verify it loaded the right binary
|
||||
unsigned char *binary3;
|
||||
binary3 = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
|
||||
buffers[ 0 ] = binary3;
|
||||
error = clGetProgramInfo( program_from_binary, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
|
||||
test_error( error, "Unable to get program binary second time" );
|
||||
|
||||
if( memcmp( binary, binary3, binarySize ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Program binary is different when status pointer is NULL!\n" );
|
||||
free(binary3);
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
free(binary3);
|
||||
|
||||
// Now execute them both to see that they both do the same thing.
|
||||
clMemWrapper in, out, out_binary;
|
||||
clKernelWrapper kernel, kernel_binary;
|
||||
cl_int *out_data, *out_data_binary;
|
||||
cl_float *in_data;
|
||||
size_t size_to_run = 1000;
|
||||
|
||||
// Allocate some data
|
||||
in_data = (cl_float*)malloc(sizeof(cl_float)*size_to_run);
|
||||
out_data = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
|
||||
out_data_binary = (cl_int*)malloc(sizeof(cl_int)*size_to_run);
|
||||
memset(out_data, 0, sizeof(cl_int)*size_to_run);
|
||||
memset(out_data_binary, 0, sizeof(cl_int)*size_to_run);
|
||||
for (size_t i=0; i<size_to_run; i++)
|
||||
in_data[i] = (cl_float)i;
|
||||
|
||||
// Create the buffers
|
||||
in = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*size_to_run, in_data, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
out = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
out_binary = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*size_to_run, out_data_binary, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
// Create the kernels
|
||||
kernel = clCreateKernel(program, "sample_test", &error);
|
||||
test_error( error, "clCreateKernel failed");
|
||||
kernel_binary = clCreateKernel(program_from_binary, "sample_test", &error);
|
||||
test_error( error, "clCreateKernel from binary failed");
|
||||
|
||||
// Set the arguments
|
||||
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_binary, 0, sizeof(in), &in);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel_binary, 1, sizeof(out_binary), &out_binary);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
|
||||
// Execute the kernels
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueNDRangeKernel failed");
|
||||
error = clEnqueueNDRangeKernel(queue, kernel_binary, 1, NULL, &size_to_run, NULL, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueNDRangeKernel for binary kernel failed");
|
||||
|
||||
// Finish up
|
||||
error = clFinish(queue);
|
||||
test_error( error, "clFinish failed");
|
||||
|
||||
// Get the results back
|
||||
error = clEnqueueReadBuffer(queue, out, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
error = clEnqueueReadBuffer(queue, out_binary, CL_TRUE, 0, sizeof(cl_int)*size_to_run, out_data_binary, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
|
||||
// Compare the results
|
||||
if( memcmp( out_data, out_data_binary, sizeof(cl_int)*size_to_run ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Results from executing binary and regular kernel differ.\n" );
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// All done!
|
||||
free(in_data);
|
||||
free(out_data);
|
||||
free(out_data_binary);
|
||||
free(binary2);
|
||||
free(binary);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
52
test_conformance/api/test_bool.c
Normal file
52
test_conformance/api/test_bool.c
Normal file
@@ -0,0 +1,52 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
|
||||
const char *kernel_with_bool[] = {
|
||||
"__kernel void kernel_with_bool(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" bool myBool = (src[tid] < 0.5f) && (src[tid] > -0.5f);\n"
|
||||
" if(myBool)\n"
|
||||
" {\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" dst[tid] = 0;\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
int test_for_bool_type(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
int err = create_single_kernel_helper(context,
|
||||
&program,
|
||||
&kernel,
|
||||
1, kernel_with_bool,
|
||||
"kernel_with_bool" );
|
||||
return err;
|
||||
}
|
||||
|
||||
135
test_conformance/api/test_create_context_from_type.cpp
Normal file
135
test_conformance/api/test_create_context_from_type.cpp
Normal file
@@ -0,0 +1,135 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
||||
{
|
||||
log_info( "%s\n", errinfo );
|
||||
}
|
||||
|
||||
|
||||
int test_create_context_from_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
clContextWrapper context_to_test;
|
||||
clCommandQueueWrapper queue_to_test;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
const char *sample_single_test_kernel[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
cl_device_type type;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed\n");
|
||||
|
||||
cl_platform_id platform;
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed\n");
|
||||
|
||||
cl_context_properties properties[3] = {
|
||||
(cl_context_properties)CL_CONTEXT_PLATFORM,
|
||||
(cl_context_properties)platform,
|
||||
NULL
|
||||
};
|
||||
|
||||
context_to_test = clCreateContextFromType(properties, type, notify_callback, NULL, &error);
|
||||
test_error(error, "clCreateContextFromType failed");
|
||||
if (context_to_test == NULL) {
|
||||
log_error("clCreateContextFromType returned NULL, but error was CL_SUCCESS.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
queue_to_test = clCreateCommandQueue(context_to_test, deviceID, NULL, &error);
|
||||
test_error(error, "clCreateCommandQueue failed");
|
||||
if (queue_to_test == NULL) {
|
||||
log_error("clCreateCommandQueue returned NULL, but error was CL_SUCCESS.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context_to_test, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context_to_test, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue_to_test, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context_to_test, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue_to_test, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue_to_test, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
643
test_conformance/api/test_create_kernels.c
Normal file
643
test_conformance/api/test_create_kernels.c
Normal file
@@ -0,0 +1,643 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
|
||||
const char *sample_single_kernel[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
size_t sample_single_kernel_lengths[1];
|
||||
|
||||
const char *sample_two_kernels[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
size_t sample_two_kernel_lengths[2];
|
||||
|
||||
const char *sample_two_kernels_in_1[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n"
|
||||
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
size_t sample_two_kernels_in_1_lengths[1];
|
||||
|
||||
|
||||
const char *repeate_test_kernel =
|
||||
"__kernel void test_kernel(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" dst[get_global_id(0)] = src[get_global_id(0)]+1;\n"
|
||||
"}\n";
|
||||
|
||||
|
||||
|
||||
int test_load_single_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
cl_program testProgram;
|
||||
clKernelWrapper kernel;
|
||||
cl_context testContext;
|
||||
unsigned int numKernels;
|
||||
cl_char testName[512];
|
||||
cl_uint testArgCount;
|
||||
size_t realSize;
|
||||
|
||||
|
||||
/* Preprocess: calc the length of each source file line */
|
||||
sample_single_kernel_lengths[ 0 ] = strlen( sample_single_kernel[ 0 ] );
|
||||
|
||||
/* Create a program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_single_kernel, sample_single_kernel_lengths, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create single kernel program" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build single kernel program" );
|
||||
error = clCreateKernelsInProgram(program, 1, &kernel, &numKernels);
|
||||
test_error( error, "Unable to create single kernel program" );
|
||||
|
||||
/* Check program and context pointers */
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( cl_program ), &testProgram, &realSize );
|
||||
test_error( error, "Unable to get kernel's program" );
|
||||
if( (cl_program)testProgram != (cl_program)program )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel's program does not match program used to create it! (Got %p, expected %p)\n", (cl_program)testProgram, (cl_program)program );
|
||||
return -1;
|
||||
}
|
||||
if( realSize != sizeof( cl_program ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of kernel's program does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_program ), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( cl_context ), &testContext, &realSize );
|
||||
test_error( error, "Unable to get kernel's context" );
|
||||
if( (cl_context)testContext != (cl_context)context )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel's context does not match program used to create it! (Got %p, expected %p)\n", (cl_context)testContext, (cl_context)context );
|
||||
return -1;
|
||||
}
|
||||
if( realSize != sizeof( cl_context ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of kernel's context does not match expected size (expected %d, got %d)\n", (int)sizeof( cl_context ), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Test arg count */
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, &realSize );
|
||||
test_error( error, "Unable to get size of arg count info from kernel" );
|
||||
|
||||
if( realSize != sizeof( testArgCount ) )
|
||||
{
|
||||
log_error( "ERROR: size of arg count not valid! %d\n", (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
|
||||
test_error( error, "Unable to get arg count from kernel" );
|
||||
|
||||
if( testArgCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count does not match!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
/* Test function name */
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, &realSize );
|
||||
test_error( error, "Unable to get name from kernel" );
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Kernel names do not match!\n" );
|
||||
return -1;
|
||||
}
|
||||
if( realSize != strlen( (char *)testName ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Length of kernel name returned does not validate (expected %d, got %d)\n", (int)strlen( (char *)testName ) + 1, (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_load_two_kernels(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel[2];
|
||||
unsigned int numKernels;
|
||||
cl_char testName[ 512 ];
|
||||
cl_uint testArgCount;
|
||||
|
||||
|
||||
/* Preprocess: calc the length of each source file line */
|
||||
sample_two_kernel_lengths[ 0 ] = strlen( sample_two_kernels[ 0 ] );
|
||||
sample_two_kernel_lengths[ 1 ] = strlen( sample_two_kernels[ 1 ] );
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 2, sample_two_kernels, sample_two_kernel_lengths, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program!" );
|
||||
return -1;
|
||||
}
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build dual kernel program" );
|
||||
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
|
||||
test_error( error, "Unable to create dual kernel program" );
|
||||
|
||||
if( numKernels != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check first kernel */
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from kernel" );
|
||||
|
||||
int found_kernel1 = 0, found_kernel2 = 0;
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from second kernel" );
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
if (found_kernel1) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
if (found_kernel2) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( !found_kernel1 || !found_kernel2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel names do not match.\n" );
|
||||
if (!found_kernel1)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
if (!found_kernel2)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
|
||||
test_error( error, "Unable to get arg count from kernel" );
|
||||
|
||||
if( testArgCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of args for kernel\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_load_two_kernels_in_one(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel[2];
|
||||
unsigned int numKernels;
|
||||
cl_char testName[512];
|
||||
cl_uint testArgCount;
|
||||
|
||||
|
||||
/* Preprocess: calc the length of each source file line */
|
||||
sample_two_kernels_in_1_lengths[ 0 ] = strlen( sample_two_kernels_in_1[ 0 ] );
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, sample_two_kernels_in_1_lengths, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program" );
|
||||
return -1;
|
||||
}
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build dual kernel program" );
|
||||
error = clCreateKernelsInProgram(program, 2, &kernel[0], &numKernels);
|
||||
test_error( error, "Unable to create dual kernel program" );
|
||||
|
||||
if( numKernels != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of kernels! (%d)\n", numKernels );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check first kernel */
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from kernel" );
|
||||
|
||||
int found_kernel1 = 0, found_kernel2 = 0;
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel[0], CL_KERNEL_NUM_ARGS, sizeof( testArgCount ), &testArgCount, NULL );
|
||||
test_error( error, "Unable to get arg count from kernel" );
|
||||
|
||||
if( testArgCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: wrong # of args for kernel\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check second kernel */
|
||||
error = clGetKernelInfo( kernel[1], CL_KERNEL_FUNCTION_NAME, sizeof( testName ), testName, NULL );
|
||||
test_error( error, "Unable to get function name from kernel" );
|
||||
|
||||
if( strcmp( (char *)testName, "sample_test" ) == 0 ) {
|
||||
if (found_kernel1) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel1 = 1;
|
||||
} else if( strcmp( (char *)testName, "sample_test2" ) == 0 ) {
|
||||
if (found_kernel2) {
|
||||
log_error("Kernel \"%s\" returned twice.\n", (char *)testName);
|
||||
return -1;
|
||||
}
|
||||
found_kernel2 = 1;
|
||||
} else {
|
||||
log_error( "ERROR: Invalid kernel name returned: \"%s\" expected \"%s\" or \"%s\".\n", testName, "sample_test", "sample_test2");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( !found_kernel1 || !found_kernel2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel names do not match.\n" );
|
||||
if (!found_kernel1)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
if (!found_kernel2)
|
||||
log_error("Kernel \"%s\" not returned.\n", "sample_test");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_load_two_kernels_manually( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel1, kernel2;
|
||||
int error;
|
||||
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build kernel program" );
|
||||
|
||||
/* Try manually creating kernels (backwards just in case) */
|
||||
kernel1 = clCreateKernel( program, "sample_test2", &error );
|
||||
|
||||
if( kernel1 == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Could not get kernel 1" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel2 = clCreateKernel( program, "sample_test", &error );
|
||||
|
||||
if( kernel2 == NULL )
|
||||
{
|
||||
print_error( error, "Could not get kernel 2" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel1, kernel2;
|
||||
int error;
|
||||
size_t i;
|
||||
|
||||
/* Now create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, sample_two_kernels_in_1, NULL, &error );
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Unable to create dual kernel program" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build kernel program" );
|
||||
|
||||
/* Lookup the number of kernels in the program. */
|
||||
size_t total_kernels = 0;
|
||||
error = clGetProgramInfo(program, CL_PROGRAM_NUM_KERNELS, sizeof(size_t),&total_kernels,NULL);
|
||||
test_error( error, "Unable to get program info num kernels");
|
||||
|
||||
if (total_kernels != 2)
|
||||
{
|
||||
print_error( error, "Program did not contain two kernels" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Lookup the kernel names. */
|
||||
const char* actual_names[] = { "sample_test;sample_test2", "sample_test2;sample_test"} ;
|
||||
|
||||
size_t kernel_names_len = 0;
|
||||
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,0,NULL,&kernel_names_len);
|
||||
test_error( error, "Unable to get length of kernel names list." );
|
||||
|
||||
if (kernel_names_len != (strlen(actual_names[0])+1))
|
||||
{
|
||||
print_error( error, "Kernel names length did not match");
|
||||
return -1;
|
||||
}
|
||||
|
||||
const size_t len = (kernel_names_len+1)*sizeof(char);
|
||||
char* kernel_names = (char*)malloc(len);
|
||||
error = clGetProgramInfo(program,CL_PROGRAM_KERNEL_NAMES,len,kernel_names,&kernel_names_len);
|
||||
test_error( error, "Unable to get kernel names list." );
|
||||
|
||||
/* Check to see if the kernel name array is null terminated. */
|
||||
if (kernel_names[kernel_names_len-1] != '\0')
|
||||
{
|
||||
free(kernel_names);
|
||||
print_error( error, "Kernel name list was not null terminated");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Check to see if the correct kernel name string was returned. */
|
||||
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
|
||||
if( 0 == strcmp(actual_names[i],kernel_names) )
|
||||
break;
|
||||
|
||||
if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
|
||||
{
|
||||
free(kernel_names);
|
||||
log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
|
||||
for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
|
||||
log_error( "\t\t\"%s\"\n", actual_names[0] );
|
||||
return -1;
|
||||
}
|
||||
free(kernel_names);
|
||||
|
||||
/* Try manually creating kernels (backwards just in case) */
|
||||
kernel1 = clCreateKernel( program, "sample_test", &error );
|
||||
if( kernel1 == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
print_error( error, "Could not get kernel 1" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
kernel2 = clCreateKernel( program, "sample_test2", &error );
|
||||
if( kernel2 == NULL )
|
||||
{
|
||||
print_error( error, "Could not get kernel 2" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *single_task_kernel[] = {
|
||||
"__kernel void sample_test(__global int *dst, int count)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" for( int i = 0; i < count; i++ )\n"
|
||||
" dst[i] = tid + i;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
int test_enqueue_task(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper output;
|
||||
cl_int count;
|
||||
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, single_task_kernel, "sample_test" ) )
|
||||
return -1;
|
||||
|
||||
// Create args
|
||||
count = 100;
|
||||
output = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * count, NULL, &error );
|
||||
test_error( error, "Unable to create output buffer" );
|
||||
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &output );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_int ), &count );
|
||||
test_error( error, "Unable to set kernel argument" );
|
||||
|
||||
// Run task
|
||||
error = clEnqueueTask( queue, kernel, 0, NULL, NULL );
|
||||
test_error( error, "Unable to run task" );
|
||||
|
||||
// Read results
|
||||
cl_int *results = (cl_int*)malloc(sizeof(cl_int)*count);
|
||||
error = clEnqueueReadBuffer( queue, output, CL_TRUE, 0, sizeof( cl_int ) * count, results, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
// Validate
|
||||
for( cl_int i = 0; i < count; i++ )
|
||||
{
|
||||
if( results[ i ] != i )
|
||||
{
|
||||
log_error( "ERROR: Task result value %d did not validate! Expected %d, got %d\n", (int)i, (int)i, (int)results[ i ] );
|
||||
free(results);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
free(results);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define TEST_SIZE 1000
|
||||
int test_repeated_setup_cleanup(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
|
||||
cl_context local_context;
|
||||
cl_command_queue local_queue;
|
||||
cl_program local_program;
|
||||
cl_kernel local_kernel;
|
||||
cl_mem local_mem_in, local_mem_out;
|
||||
cl_event local_event;
|
||||
size_t global_dim[3];
|
||||
int i, j, error;
|
||||
global_dim[0] = TEST_SIZE;
|
||||
global_dim[1] = 1; global_dim[2] = 1;
|
||||
cl_int *inData, *outData;
|
||||
cl_int status;
|
||||
|
||||
inData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
|
||||
outData = (cl_int*)malloc(sizeof(cl_int)*TEST_SIZE);
|
||||
for (i=0; i<TEST_SIZE; i++) {
|
||||
inData[i] = i;
|
||||
}
|
||||
|
||||
|
||||
for (i=0; i<100; i++) {
|
||||
memset(outData, 0, sizeof(cl_int)*TEST_SIZE);
|
||||
|
||||
local_context = clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &error);
|
||||
test_error( error, "clCreateContext failed");
|
||||
|
||||
local_queue = clCreateCommandQueue(local_context, deviceID, 0, &error);
|
||||
test_error( error, "clCreateCommandQueue failed");
|
||||
|
||||
local_program = clCreateProgramWithSource(local_context, 1, &repeate_test_kernel, NULL, &error);
|
||||
test_error( error, "clCreateProgramWithSource failed");
|
||||
|
||||
error = clBuildProgram(local_program, 0, NULL, NULL, NULL, NULL);
|
||||
test_error( error, "clBuildProgram failed");
|
||||
|
||||
local_kernel = clCreateKernel(local_program, "test_kernel", &error);
|
||||
test_error( error, "clCreateKernel failed");
|
||||
|
||||
local_mem_in = clCreateBuffer(local_context, CL_MEM_READ_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
local_mem_out = clCreateBuffer(local_context, CL_MEM_WRITE_ONLY, TEST_SIZE*sizeof(cl_int), NULL, &error);
|
||||
test_error( error, "clCreateBuffer failed");
|
||||
|
||||
error = clEnqueueWriteBuffer(local_queue, local_mem_in, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), inData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clEnqueueWriteBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueWriteBuffer failed");
|
||||
|
||||
error = clSetKernelArg(local_kernel, 0, sizeof(local_mem_in), &local_mem_in);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
|
||||
error = clSetKernelArg(local_kernel, 1, sizeof(local_mem_out), &local_mem_out);
|
||||
test_error( error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(local_queue, local_kernel, 1, NULL, global_dim, NULL, 0, NULL, &local_event);
|
||||
test_error( error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clWaitForEvents(1, &local_event);
|
||||
test_error( error, "clWaitForEvents failed");
|
||||
|
||||
error = clGetEventInfo(local_event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status), &status, NULL);
|
||||
test_error( error, "clGetEventInfo failed");
|
||||
|
||||
if (status != CL_COMPLETE) {
|
||||
log_error( "Kernel execution not complete: status %d.\n", status);
|
||||
free(inData);
|
||||
free(outData);
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clEnqueueReadBuffer(local_queue, local_mem_out, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), outData, 0, NULL, NULL);
|
||||
test_error( error, "clEnqueueReadBuffer failed");
|
||||
|
||||
clReleaseEvent(local_event);
|
||||
clReleaseMemObject(local_mem_in);
|
||||
clReleaseMemObject(local_mem_out);
|
||||
clReleaseKernel(local_kernel);
|
||||
clReleaseProgram(local_program);
|
||||
clReleaseCommandQueue(local_queue);
|
||||
clReleaseContext(local_context);
|
||||
|
||||
for (j=0; j<TEST_SIZE; j++) {
|
||||
if (outData[j] != inData[j] + 1) {
|
||||
log_error("Results failed to validate at iteration %d. %d != %d.\n", i, outData[j], inData[j] + 1);
|
||||
free(inData);
|
||||
free(outData);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(inData);
|
||||
free(outData);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
int IsAPowerOfTwo( unsigned long x )
|
||||
{
|
||||
return 0 == (x & (x-1));
|
||||
}
|
||||
|
||||
|
||||
int test_min_data_type_align_size_alignment(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
cl_uint min_alignment;
|
||||
|
||||
if (gHasLong)
|
||||
min_alignment = sizeof(cl_long)*16;
|
||||
else
|
||||
min_alignment = sizeof(cl_int)*16;
|
||||
|
||||
int error = 0;
|
||||
cl_uint alignment;
|
||||
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(alignment), &alignment, NULL);
|
||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN failed");
|
||||
log_info("Device reported CL_DEVICE_MEM_BASE_ADDR_ALIGN = %lu bits.\n", (unsigned long)alignment);
|
||||
|
||||
// Verify the size is large enough
|
||||
if (alignment < min_alignment*8) {
|
||||
log_error("ERROR: alignment too small. Minimum alignment for %s16 is %lu bits, device reported %lu bits.",
|
||||
(gHasLong) ? "long" : "int",
|
||||
(unsigned long)(min_alignment*8), (unsigned long)alignment);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Verify the size is a power of two
|
||||
if (!IsAPowerOfTwo((unsigned long)alignment)) {
|
||||
log_error("ERROR: alignment is not a power of two.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
141
test_conformance/api/test_kernel_arg_changes.cpp
Normal file
141
test_conformance/api/test_kernel_arg_changes.cpp
Normal file
@@ -0,0 +1,141 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
extern "C" { extern cl_uint gRandomSeed;}
|
||||
|
||||
// This test is designed to stress changing kernel arguments between execute calls (that are asynchronous and thus
|
||||
// potentially overlapping) to make sure each kernel gets the right arguments
|
||||
|
||||
// Note: put a delay loop in the kernel to make sure we have time to queue the next kernel before this one finishes
|
||||
const char *inspect_image_kernel_source[] = {
|
||||
"__kernel void sample_test(read_only image2d_t src, __global int *outDimensions )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0), i;\n"
|
||||
" for( i = 0; i < 100000; i++ ); \n"
|
||||
" outDimensions[tid * 2] = get_image_width(src) * tid;\n"
|
||||
" outDimensions[tid * 2 + 1] = get_image_height(src) * tid;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
#define NUM_TRIES 100
|
||||
#define NUM_THREADS 2048
|
||||
|
||||
int test_kernel_arg_changes(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error, i;
|
||||
clMemWrapper images[ NUM_TRIES ];
|
||||
size_t sizes[ NUM_TRIES ][ 2 ];
|
||||
clMemWrapper results[ NUM_TRIES ];
|
||||
cl_image_format imageFormat;
|
||||
size_t maxWidth, maxHeight;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int resultArray[ NUM_THREADS * 2 ];
|
||||
char errStr[ 128 ];
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
||||
|
||||
// Just get any ol format to test with
|
||||
error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &imageFormat );
|
||||
test_error( error, "Unable to obtain suitable image format to test with!" );
|
||||
|
||||
// Create our testing kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, inspect_image_kernel_source, "sample_test" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Get max dimensions for each of our images
|
||||
error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
|
||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
|
||||
test_error( error, "Unable to get max image dimensions for device" );
|
||||
|
||||
// Get the number of threads we'll be able to run
|
||||
threads[0] = NUM_THREADS;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size for kernel" );
|
||||
|
||||
// Create a variety of images and output arrays
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
sizes[ i ][ 0 ] = genrand_int32(seed) % (maxWidth/32) + 1;
|
||||
sizes[ i ][ 1 ] = genrand_int32(seed) % (maxHeight/32) + 1;
|
||||
|
||||
images[ i ] = create_image_2d( context, (cl_mem_flags)(CL_MEM_READ_ONLY),
|
||||
&imageFormat, sizes[ i ][ 0], sizes[ i ][ 1 ], 0, NULL, &error );
|
||||
if( images[i] == NULL )
|
||||
{
|
||||
log_error("Failed to create image %d of size %d x %d (%s).\n", i, (int)sizes[i][0], (int)sizes[i][1], IGetErrorString( error ));
|
||||
return -1;
|
||||
}
|
||||
results[ i ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof( cl_int ) * threads[0] * 2, NULL, &error );
|
||||
if( results[i] == NULL)
|
||||
{
|
||||
log_error("Failed to create array %d of size %d.\n", i, (int)threads[0]*2);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Start setting arguments and executing kernels
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
// Set the arguments for this try
|
||||
error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &images[ i ] );
|
||||
sprintf( errStr, "Unable to set argument 0 for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
|
||||
error = clSetKernelArg( kernel, 1, sizeof( cl_mem ), &results[ i ] );
|
||||
sprintf( errStr, "Unable to set argument 1 for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
|
||||
// Queue up execution
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
sprintf( errStr, "Unable to execute kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
}
|
||||
|
||||
// Read the results back out, one at a time, and verify
|
||||
for( i = 0; i < NUM_TRIES; i++ )
|
||||
{
|
||||
error = clEnqueueReadBuffer( queue, results[ i ], CL_TRUE, 0, sizeof( cl_int ) * threads[0] * 2, resultArray, 0, NULL, NULL );
|
||||
sprintf( errStr, "Unable to read results for kernel try %d", i );
|
||||
test_error( error, errStr );
|
||||
|
||||
// Verify. Each entry should be n * the (width/height) of image i
|
||||
for( int j = 0; j < NUM_THREADS; j++ )
|
||||
{
|
||||
if( resultArray[ j * 2 + 0 ] != (int)sizes[ i ][ 0 ] * j )
|
||||
{
|
||||
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a width of %d, got %d\n",
|
||||
i, j, (int)sizes[ i ][ 0 ] * j, resultArray[ j * 2 + 0 ] );
|
||||
return -1;
|
||||
}
|
||||
if( resultArray[ j * 2 + 1 ] != (int)sizes[ i ][ 1 ] * j )
|
||||
{
|
||||
log_error( "ERROR: Verficiation for kernel try %d, sample %d FAILED, expected a height of %d, got %d\n",
|
||||
i, j, (int)sizes[ i ][ 1 ] * j, resultArray[ j * 2 + 1 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we got here, everything verified successfully
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
5191
test_conformance/api/test_kernel_arg_info.c
Normal file
5191
test_conformance/api/test_kernel_arg_info.c
Normal file
File diff suppressed because it is too large
Load Diff
277
test_conformance/api/test_kernel_arg_multi_setup.cpp
Normal file
277
test_conformance/api/test_kernel_arg_multi_setup.cpp
Normal file
@@ -0,0 +1,277 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
// This test is designed to stress passing multiple vector parameters to kernels and verifying access between them all
|
||||
|
||||
const char *multi_arg_kernel_source_pattern =
|
||||
"__kernel void sample_test(__global %s *src1, __global %s *src2, __global %s *src3, __global %s *dst1, __global %s *dst2, __global %s *dst3 )\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst1[tid] = src1[tid];\n"
|
||||
" dst2[tid] = src2[tid];\n"
|
||||
" dst3[tid] = src3[tid];\n"
|
||||
"}\n";
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
#define MAX_ERROR_TOLERANCE 0.0005f
|
||||
|
||||
int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
|
||||
ExplicitType vec1Type, int vec1Size,
|
||||
ExplicitType vec2Type, int vec2Size,
|
||||
ExplicitType vec3Type, int vec3Size, MTdata d)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
int error, i, j;
|
||||
clMemWrapper streams[ 6 ];
|
||||
size_t threads[1], localThreads[1];
|
||||
char programSrc[ 10248 ], vec1Name[ 64 ], vec2Name[ 64 ], vec3Name[ 64 ];
|
||||
char sizeNames[][ 4 ] = { "", "2", "3", "4", "", "", "", "8" };
|
||||
const char *ptr;
|
||||
void *initData[3], *resultData[3];
|
||||
|
||||
|
||||
// Create the program source
|
||||
sprintf( vec1Name, "%s%s", get_explicit_type_name( vec1Type ), sizeNames[ vec1Size - 1 ] );
|
||||
sprintf( vec2Name, "%s%s", get_explicit_type_name( vec2Type ), sizeNames[ vec2Size - 1 ] );
|
||||
sprintf( vec3Name, "%s%s", get_explicit_type_name( vec3Type ), sizeNames[ vec3Size - 1 ] );
|
||||
|
||||
sprintf( programSrc, multi_arg_kernel_source_pattern,
|
||||
vec1Name, vec2Name, vec3Name, vec1Name, vec2Name, vec3Name,
|
||||
vec1Size, vec1Size, vec2Size, vec2Size, vec3Size, vec3Size );
|
||||
ptr = programSrc;
|
||||
|
||||
// Create our testing kernel
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" );
|
||||
test_error( error, "Unable to create testing kernel" );
|
||||
|
||||
// Get thread dimensions
|
||||
threads[0] = 1024;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size for kernel" );
|
||||
|
||||
// Create input streams
|
||||
initData[ 0 ] = create_random_data( vec1Type, d, (unsigned int)threads[ 0 ] * vec1Size );
|
||||
streams[ 0 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, initData[ 0 ], &error );
|
||||
test_error( error, "Unable to create testing stream" );
|
||||
|
||||
initData[ 1 ] = create_random_data( vec2Type, d, (unsigned int)threads[ 0 ] * vec2Size );
|
||||
streams[ 1 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, initData[ 1 ], &error );
|
||||
test_error( error, "Unable to create testing stream" );
|
||||
|
||||
initData[ 2 ] = create_random_data( vec3Type, d, (unsigned int)threads[ 0 ] * vec3Size );
|
||||
streams[ 2 ] = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, initData[ 2 ], &error );
|
||||
test_error( error, "Unable to create testing stream" );
|
||||
|
||||
streams[ 3 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec1Type ) * threads[0] * vec1Size, NULL, &error );
|
||||
test_error( error, "Unable to create testing stream" );
|
||||
|
||||
streams[ 4 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec2Type ) * threads[0] * vec2Size, NULL, &error );
|
||||
test_error( error, "Unable to create testing stream" );
|
||||
|
||||
streams[ 5 ] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( vec3Type ) * threads[0] * vec3Size, NULL, &error );
|
||||
test_error( error, "Unable to create testing stream" );
|
||||
|
||||
// Set the arguments
|
||||
error = 0;
|
||||
for( i = 0; i < 6; i++ )
|
||||
error |= clSetKernelArg( kernel, i, sizeof( cl_mem ), &streams[ i ] );
|
||||
test_error( error, "Unable to set arguments for kernel" );
|
||||
|
||||
// Execute!
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute kernel" );
|
||||
|
||||
// Read results
|
||||
resultData[0] = malloc( get_explicit_type_size( vec1Type ) * vec1Size * threads[0] );
|
||||
resultData[1] = malloc( get_explicit_type_size( vec2Type ) * vec2Size * threads[0] );
|
||||
resultData[2] = malloc( get_explicit_type_size( vec3Type ) * vec3Size * threads[0] );
|
||||
error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, get_explicit_type_size( vec1Type ) * vec1Size * threads[ 0 ], resultData[0], 0, NULL, NULL );
|
||||
error |= clEnqueueReadBuffer( queue, streams[ 4 ], CL_TRUE, 0, get_explicit_type_size( vec2Type ) * vec2Size * threads[ 0 ], resultData[1], 0, NULL, NULL );
|
||||
error |= clEnqueueReadBuffer( queue, streams[ 5 ], CL_TRUE, 0, get_explicit_type_size( vec3Type ) * vec3Size * threads[ 0 ], resultData[2], 0, NULL, NULL );
|
||||
test_error( error, "Unable to read result stream" );
|
||||
|
||||
// Verify
|
||||
char *ptr1 = (char *)initData[ 0 ], *ptr2 = (char *)resultData[ 0 ];
|
||||
size_t span = get_explicit_type_size( vec1Type );
|
||||
for( i = 0; i < (int)threads[0]; i++ )
|
||||
{
|
||||
for( j = 0; j < vec1Size; j++ )
|
||||
{
|
||||
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Value did not validate for component %d of item %d of stream 0!\n", j, i );
|
||||
free( initData[ 0 ] );
|
||||
free( initData[ 1 ] );
|
||||
free( initData[ 2 ] );
|
||||
free( resultData[ 0 ] );
|
||||
free( resultData[ 1 ] );
|
||||
free( resultData[ 2 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ptr1 += span * vec1Size;
|
||||
ptr2 += span * vec1Size;
|
||||
}
|
||||
|
||||
ptr1 = (char *)initData[ 1 ];
|
||||
ptr2 = (char *)resultData[ 1 ];
|
||||
span = get_explicit_type_size( vec2Type );
|
||||
for( i = 0; i < (int)threads[0]; i++ )
|
||||
{
|
||||
for( j = 0; j < vec2Size; j++ )
|
||||
{
|
||||
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Value did not validate for component %d of item %d of stream 1!\n", j, i );
|
||||
free( initData[ 0 ] );
|
||||
free( initData[ 1 ] );
|
||||
free( initData[ 2 ] );
|
||||
free( resultData[ 0 ] );
|
||||
free( resultData[ 1 ] );
|
||||
free( resultData[ 2 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ptr1 += span * vec2Size;
|
||||
ptr2 += span * vec2Size;
|
||||
}
|
||||
|
||||
ptr1 = (char *)initData[ 2 ];
|
||||
ptr2 = (char *)resultData[ 2 ];
|
||||
span = get_explicit_type_size( vec3Type );
|
||||
for( i = 0; i < (int)threads[0]; i++ )
|
||||
{
|
||||
for( j = 0; j < vec3Size; j++ )
|
||||
{
|
||||
if( memcmp( ptr1 + span * j , ptr2 + span * j, span ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Value did not validate for component %d of item %d of stream 2!\n", j, i );
|
||||
free( initData[ 0 ] );
|
||||
free( initData[ 1 ] );
|
||||
free( initData[ 2 ] );
|
||||
free( resultData[ 0 ] );
|
||||
free( resultData[ 1 ] );
|
||||
free( resultData[ 2 ] );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
ptr1 += span * vec3Size;
|
||||
ptr2 += span * vec3Size;
|
||||
}
|
||||
|
||||
// If we got here, everything verified successfully
|
||||
free( initData[ 0 ] );
|
||||
free( initData[ 1 ] );
|
||||
free( initData[ 2 ] );
|
||||
free( resultData[ 0 ] );
|
||||
free( resultData[ 1 ] );
|
||||
free( resultData[ 2 ] );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_kernel_arg_multi_setup_exhaustive(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
// Loop through every combination of input and output types
|
||||
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
|
||||
int type1, type2, type3;
|
||||
int size1, size2, size3;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
log_info( "\n" ); // for formatting
|
||||
|
||||
for( type1 = 0; types[ type1 ] != kNumExplicitTypes; type1++ )
|
||||
{
|
||||
for( type2 = 0; types[ type2 ] != kNumExplicitTypes; type2++ )
|
||||
{
|
||||
for( type3 = 0; types[ type3 ] != kNumExplicitTypes; type3++ )
|
||||
{
|
||||
log_info( "\n\ttesting %s, %s, %s...", get_explicit_type_name( types[ type1 ] ), get_explicit_type_name( types[ type2 ] ), get_explicit_type_name( types[ type3 ] ) );
|
||||
|
||||
// Loop through every combination of vector size
|
||||
for( size1 = 2; size1 <= 8; size1 <<= 1 )
|
||||
{
|
||||
for( size2 = 2; size2 <= 8; size2 <<= 1 )
|
||||
{
|
||||
for( size3 = 2; size3 <= 8; size3 <<= 1 )
|
||||
{
|
||||
log_info(".");
|
||||
fflush( stdout);
|
||||
if( test_multi_arg_set( device, context, queue,
|
||||
types[ type1 ], size1,
|
||||
types[ type2 ], size2,
|
||||
types[ type3 ], size3, seed ) )
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log_info( "\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_kernel_arg_multi_setup_random(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
// Loop through a selection of combinations
|
||||
ExplicitType types[] = { kChar, kShort, kInt, kFloat, kNumExplicitTypes };
|
||||
int type1, type2, type3;
|
||||
int size1, size2, size3;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
|
||||
num_elements = 3*3*3*4;
|
||||
log_info( "Testing %d random configurations\n", num_elements );
|
||||
|
||||
// Loop through every combination of vector size
|
||||
for( size1 = 2; size1 <= 8; size1 <<= 1 )
|
||||
{
|
||||
for( size2 = 2; size2 <= 8; size2 <<= 1 )
|
||||
{
|
||||
for( size3 = 2; size3 <= 8; size3 <<= 1 )
|
||||
{
|
||||
// Loop through 4 type combinations for each size combination
|
||||
int n;
|
||||
for (n=0; n<4; n++) {
|
||||
type1 = (int)get_random_float(0,4, seed);
|
||||
type2 = (int)get_random_float(0,4, seed);
|
||||
type3 = (int)get_random_float(0,4, seed);
|
||||
|
||||
|
||||
log_info( "\ttesting %s%d, %s%d, %s%d...\n",
|
||||
get_explicit_type_name( types[ type1 ] ), size1,
|
||||
get_explicit_type_name( types[ type2 ] ), size2,
|
||||
get_explicit_type_name( types[ type3 ] ), size3 );
|
||||
|
||||
if( test_multi_arg_set( device, context, queue,
|
||||
types[ type1 ], size1,
|
||||
types[ type2 ], size2,
|
||||
types[ type3 ], size3, seed ) )
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
704
test_conformance/api/test_kernels.c
Normal file
704
test_conformance/api/test_kernels.c
Normal file
@@ -0,0 +1,704 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
const char *sample_single_test_kernel[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_struct_test_kernel[] = {
|
||||
"typedef struct {\n"
|
||||
"__global int *A;\n"
|
||||
"__global int *B;\n"
|
||||
"} input_pair_t;\n"
|
||||
"\n"
|
||||
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src->A[tid] + src->B[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_struct_array_test_kernel[] = {
|
||||
"typedef struct {\n"
|
||||
"int A;\n"
|
||||
"int B;\n"
|
||||
"} input_pair_t;\n"
|
||||
"\n"
|
||||
"__kernel void sample_test(__global input_pair_t *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src[tid].A + src[tid].B;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_const_test_kernel[] = {
|
||||
"__kernel void sample_test(__constant int *src1, __constant int *src2, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src1[tid] + src2[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_const_global_test_kernel[] = {
|
||||
"__constant int addFactor = 1024;\n"
|
||||
"__kernel void sample_test(__global int *src1, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src1[tid] + addFactor;\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
const char *sample_two_kernel_program[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
"__kernel void sample_test2(__global int *src, __global float *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = (float)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
|
||||
|
||||
|
||||
int test_get_kernel_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program, testProgram;
|
||||
cl_context testContext;
|
||||
cl_kernel kernel;
|
||||
cl_char name[ 512 ];
|
||||
cl_uint numArgs, numInstances;
|
||||
size_t paramSize;
|
||||
|
||||
|
||||
/* Create reference */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, NULL, 0, ¶mSize );
|
||||
test_error( error, "Unable to get kernel function name param size" );
|
||||
if( paramSize != strlen( "sample_test" ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Kernel function name param returns invalid size (expected %d, got %d)\n", (int)strlen( "sample_test" ) + 1, (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_FUNCTION_NAME, sizeof( name ), name, NULL );
|
||||
test_error( error, "Unable to get kernel function name" );
|
||||
if( strcmp( (char *)name, "sample_test" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Kernel function name returned invalid value (expected sample_test, got %s)\n", (char *)name );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, 0, NULL, ¶mSize );
|
||||
test_error( error, "Unable to get kernel arg count param size" );
|
||||
if( paramSize != sizeof( numArgs ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numArgs ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_NUM_ARGS, sizeof( numArgs ), &numArgs, NULL );
|
||||
test_error( error, "Unable to get kernel arg count" );
|
||||
if( numArgs != 2 )
|
||||
{
|
||||
log_error( "ERROR: Kernel arg count returned invalid value (expected %d, got %d)\n", 2, numArgs );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, 0, NULL, ¶mSize );
|
||||
test_error( error, "Unable to get kernel reference count param size" );
|
||||
if( paramSize != sizeof( numInstances ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel reference count param returns invalid size (expected %d, got %d)\n", (int)sizeof( numInstances ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL );
|
||||
test_error( error, "Unable to get kernel reference count" );
|
||||
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, NULL, 0, ¶mSize );
|
||||
test_error( error, "Unable to get kernel program param size" );
|
||||
if( paramSize != sizeof( testProgram ) )
|
||||
{
|
||||
log_error( "ERROR: Kernel program param returns invalid size (expected %d, got %d)\n", (int)sizeof( testProgram ), (int)paramSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_PROGRAM, sizeof( testProgram ), &testProgram, NULL );
|
||||
test_error( error, "Unable to get kernel program" );
|
||||
if( testProgram != program )
|
||||
{
|
||||
log_error( "ERROR: Kernel program returned invalid value (expected %p, got %p)\n", program, testProgram );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetKernelInfo( kernel, CL_KERNEL_CONTEXT, sizeof( testContext ), &testContext, NULL );
|
||||
test_error( error, "Unable to get kernel context" );
|
||||
if( testContext != context )
|
||||
{
|
||||
log_error( "ERROR: Kernel context returned invalid value (expected %p, got %p)\n", context, testContext );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Release memory */
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_execute_kernel_local_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[100];
|
||||
cl_int outputData[100];
|
||||
RandomSeed seed( gRandomSeed );
|
||||
int i;
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 100, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 100, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*100, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
||||
test_error( error, "Unable to set kernel arguments" );
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)100;
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try again */
|
||||
if( localThreads[0] > 1 )
|
||||
localThreads[0] /= 2;
|
||||
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
|
||||
localThreads[0]--;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* And again */
|
||||
if( localThreads[0] > 1 )
|
||||
localThreads[0] /= 2;
|
||||
while( localThreads[0] > 1 && 0 != threads[0] % localThreads[0] )
|
||||
localThreads[0]--;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* One more time */
|
||||
localThreads[0] = (unsigned int)1;
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*100, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<100; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_set_kernel_arg_by_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_float inputData[10];
|
||||
cl_int outputData[10];
|
||||
RandomSeed seed( gRandomSeed );
|
||||
int i;
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Write some test data */
|
||||
memset( outputData, 0, sizeof( outputData ) );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
inputData[i] = get_random_float(-(float) 0x7fffffff, (float) 0x7fffffff, seed);
|
||||
|
||||
error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*10, (void *)inputData, 0, NULL, NULL);
|
||||
test_error( error, "Unable to set testing kernel data" );
|
||||
|
||||
/* Test setting the arguments by index manually */
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != (int)inputData[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify on first pass!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_set_kernel_arg_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
void *args[2];
|
||||
cl_mem outStream;
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10], randomTestDataB[10];
|
||||
MTdata d;
|
||||
|
||||
struct img_pair_t
|
||||
{
|
||||
cl_mem streamA;
|
||||
cl_mem streamB;
|
||||
} image_pair;
|
||||
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d);
|
||||
randomTestDataB[i] = (cl_int)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
image_pair.streamA = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
image_pair.streamB = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
outStream = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
args[0] = &image_pair;
|
||||
args[1] = outStream;
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof( image_pair ), &image_pair);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), &args[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, outStream, CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
|
||||
{
|
||||
log_error( "ERROR: Data did not verify!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
clReleaseMemObject( image_pair.streamA );
|
||||
clReleaseMemObject( image_pair.streamB );
|
||||
clReleaseMemObject( outStream );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_set_kernel_arg_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[3];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10], randomTestDataB[10];
|
||||
cl_ulong maxSize;
|
||||
MTdata d;
|
||||
|
||||
/* Verify our test buffer won't be bigger than allowed */
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
|
||||
test_error( error, "Unable to get max constant buffer size" );
|
||||
if( maxSize < sizeof( cl_int ) * 10 )
|
||||
{
|
||||
log_error( "ERROR: Unable to test constant argument to kernel: max size of constant buffer is reported as %d!\n", (int)maxSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffffff; /* Make sure values are positive, just so we don't have to */
|
||||
randomTestDataB[i] = (cl_int)genrand_int32(d) & 0xffffff; /* deal with overflow on the verification */
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataB, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 2, sizeof( streams[2] ), &streams[2]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != randomTestDataA[i] + randomTestDataB[i])
|
||||
{
|
||||
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + %d (%d)\n", i, outputData[i], randomTestDataA[i], randomTestDataB[i], ( randomTestDataA[i] + randomTestDataB[i] ) );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_set_kernel_arg_struct_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
MTdata d;
|
||||
|
||||
typedef struct img_pair_type
|
||||
{
|
||||
int A;
|
||||
int B;
|
||||
} image_pair_t;
|
||||
|
||||
image_pair_t image_pair[ 10 ];
|
||||
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_struct_array_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
image_pair[i].A = (cl_int)genrand_int32(d);
|
||||
image_pair[i].A = (cl_int)genrand_int32(d);
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(image_pair_t) * 10, (void *)image_pair, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != image_pair[i].A + image_pair[i].B)
|
||||
{
|
||||
log_error( "ERROR: Data did not verify!\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_create_kernels_in_program(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel[3];
|
||||
unsigned int kernelCount;
|
||||
|
||||
/* Create a test program */
|
||||
program = clCreateProgramWithSource( context, 2, sample_two_kernel_program, NULL, &error);
|
||||
if( program == NULL || error != CL_SUCCESS )
|
||||
{
|
||||
log_error( "ERROR: Unable to create test program!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Build */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build test program" );
|
||||
|
||||
/* Try getting the kernel count */
|
||||
error = clCreateKernelsInProgram( program, 0, NULL, &kernelCount );
|
||||
test_error( error, "Unable to get kernel count for built program" );
|
||||
if( kernelCount != 2 )
|
||||
{
|
||||
log_error( "ERROR: Returned kernel count from clCreateKernelsInProgram is incorrect! (got %d, expected 2)\n", kernelCount );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Try actually getting the kernels */
|
||||
error = clCreateKernelsInProgram( program, 2, kernel, NULL );
|
||||
test_error( error, "Unable to get kernels for built program" );
|
||||
clReleaseKernel( kernel[0] );
|
||||
clReleaseKernel( kernel[1] );
|
||||
|
||||
clReleaseProgram( program );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_kernel_global_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper streams[2];
|
||||
size_t threads[1], localThreads[1];
|
||||
cl_int outputData[10];
|
||||
int i;
|
||||
cl_int randomTestDataA[10];
|
||||
MTdata d;
|
||||
|
||||
|
||||
/* Create a kernel to test with */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_global_test_kernel, "sample_test" ) != 0 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Create some I/O streams */
|
||||
d = init_genrand( gRandomSeed );
|
||||
for( i = 0; i < 10; i++ )
|
||||
{
|
||||
randomTestDataA[i] = (cl_int)genrand_int32(d) & 0xffff; /* Make sure values are positive and small, just so we don't have to */
|
||||
}
|
||||
free_mtdata(d); d = NULL;
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_COPY_HOST_PTR), sizeof(cl_int) * 10, randomTestDataA, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
|
||||
/* Test running the kernel and verifying it */
|
||||
threads[0] = (size_t)10;
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||
test_error( error, "Unable to get work group size to use" );
|
||||
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
|
||||
test_error( error, "Kernel execution failed" );
|
||||
|
||||
error = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_int)*10, (void *)outputData, 0, NULL, NULL );
|
||||
test_error( error, "Unable to get result data" );
|
||||
|
||||
for (i=0; i<10; i++)
|
||||
{
|
||||
if (outputData[i] != randomTestDataA[i] + 1024)
|
||||
{
|
||||
log_error( "ERROR: Data sample %d did not verify! %d does not match %d + 1024 (%d)\n", i, outputData[i], randomTestDataA[i], ( randomTestDataA[i] + 1024 ) );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
751
test_conformance/api/test_mem_object_info.cpp
Normal file
751
test_conformance/api/test_mem_object_info.cpp
Normal file
@@ -0,0 +1,751 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../images/image_helpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
|
||||
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get mem object " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type " from %s:%d)\n", \
|
||||
expected, (cast)val, __FILE__, __LINE__ ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d from %s:%d)\n", \
|
||||
(int)sizeof( val ), (int)size , __FILE__, __LINE__ ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
static void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void * data )
|
||||
{
|
||||
free( data );
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
get_image_dim(MTdata *d, unsigned int mod)
|
||||
{
|
||||
unsigned int val = 0;
|
||||
|
||||
do
|
||||
{
|
||||
val = (unsigned int)genrand_int32(*d) % mod;
|
||||
} while (val == 0);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
void * buffer = NULL;
|
||||
|
||||
clMemWrapper bufferObject;
|
||||
clMemWrapper subBufferObject;
|
||||
|
||||
cl_mem_flags bufferFlags[] = {
|
||||
CL_MEM_READ_WRITE,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_READ_ONLY,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
};
|
||||
|
||||
cl_mem_flags subBufferFlags[] = {
|
||||
CL_MEM_READ_WRITE,
|
||||
CL_MEM_READ_ONLY,
|
||||
CL_MEM_WRITE_ONLY,
|
||||
0,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_READ_ONLY | 0,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_WRITE_ONLY | 0,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_NO_ACCESS | 0,
|
||||
};
|
||||
|
||||
|
||||
// Get the address alignment, so we can make sure the sub-buffer test later works properly.
|
||||
cl_uint addressAlignBits;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(addressAlignBits), &addressAlignBits, NULL );
|
||||
|
||||
size_t addressAlign = addressAlignBits/8;
|
||||
if ( addressAlign < 128 )
|
||||
{
|
||||
addressAlign = 128;
|
||||
}
|
||||
|
||||
for ( unsigned int i = 0; i < sizeof(bufferFlags) / sizeof(cl_mem_flags); ++i )
|
||||
{
|
||||
//printf("@@@ bufferFlags[%u]=0x%x\n", i, bufferFlags[ i ]);
|
||||
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
// Create a buffer object to test against.
|
||||
buffer = malloc( addressAlign * 4 );
|
||||
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( buffer );
|
||||
test_error( error, "Unable to create buffer (CL_MEM_USE_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
|
||||
void * ptr;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_HOST_PTR, ptr, buffer, "host pointer", "%p", void * )
|
||||
}
|
||||
else if ( (bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR) && (bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR) )
|
||||
{
|
||||
// Create a buffer object to test against.
|
||||
buffer = malloc( addressAlign * 4 );
|
||||
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( buffer );
|
||||
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
}
|
||||
else if ( bufferFlags[ i ] & CL_MEM_ALLOC_HOST_PTR )
|
||||
{
|
||||
// Create a buffer object to test against.
|
||||
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
|
||||
test_error( error, "Unable to create buffer (CL_MEM_ALLOC_HOST_PTR) to test with" );
|
||||
}
|
||||
else if ( bufferFlags[ i ] & CL_MEM_COPY_HOST_PTR )
|
||||
{
|
||||
// Create a buffer object to test against.
|
||||
buffer = malloc( addressAlign * 4 );
|
||||
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, buffer, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( buffer );
|
||||
test_error( error, "Unable to create buffer (CL_MEM_COPY_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure buffer is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( bufferObject, mem_obj_destructor_callback, buffer );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create a buffer object to test against.
|
||||
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
|
||||
test_error( error, "Unable to create buffer to test with" );
|
||||
}
|
||||
|
||||
// Perform buffer object queries.
|
||||
cl_mem_object_type type;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
|
||||
|
||||
cl_mem_flags flags;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_FLAGS, flags, (unsigned int)bufferFlags[ i ], "flags", "%d", unsigned int )
|
||||
|
||||
size_t sz;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign * 4 ), "size", "%ld", size_t )
|
||||
|
||||
cl_uint mapCount;
|
||||
error = clGetMemObjectInfo( bufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
|
||||
test_error( error, "Unable to get mem object map count" );
|
||||
if( size != sizeof( mapCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
|
||||
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_uint refCount;
|
||||
error = clGetMemObjectInfo( bufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
|
||||
test_error( error, "Unable to get mem object reference count" );
|
||||
if( size != sizeof( refCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
|
||||
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_context otherCtx;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
|
||||
|
||||
cl_mem origObj;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (void *)NULL, "associated mem object", "%p", void * )
|
||||
|
||||
size_t offset;
|
||||
TEST_MEM_OBJECT_PARAM( bufferObject, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
|
||||
|
||||
cl_buffer_region region;
|
||||
region.origin = addressAlign;
|
||||
region.size = addressAlign;
|
||||
|
||||
// Loop over possible sub-buffer objects to create.
|
||||
for ( unsigned int j = 0; j < sizeof(subBufferFlags) / sizeof(cl_mem_flags); ++j )
|
||||
{
|
||||
if ( subBufferFlags[ j ] & CL_MEM_READ_WRITE )
|
||||
{
|
||||
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) )
|
||||
continue; // Buffer must be read_write for sub-buffer to be read_write.
|
||||
}
|
||||
if ( subBufferFlags[ j ] & CL_MEM_READ_ONLY )
|
||||
{
|
||||
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_READ_ONLY) )
|
||||
continue; // Buffer must be read_write or read_only for sub-buffer to be read_only
|
||||
}
|
||||
if ( subBufferFlags[ j ] & CL_MEM_WRITE_ONLY )
|
||||
{
|
||||
if ( !(bufferFlags[ i ] & CL_MEM_READ_WRITE) && !(bufferFlags[ i ] & CL_MEM_WRITE_ONLY) )
|
||||
continue; // Buffer must be read_write or write_only for sub-buffer to be write_only
|
||||
}
|
||||
if ( subBufferFlags[ j ] & CL_MEM_HOST_READ_ONLY )
|
||||
{
|
||||
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_WRITE_ONLY) )
|
||||
continue; // Buffer must be host all access or host read_only for sub-buffer to be host read_only
|
||||
}
|
||||
if ( subBufferFlags[ j ] & CL_MEM_HOST_WRITE_ONLY )
|
||||
{
|
||||
if ( (bufferFlags[ i ] & CL_MEM_HOST_NO_ACCESS) || (bufferFlags[ i ] & CL_MEM_HOST_READ_ONLY) )
|
||||
continue; // Buffer must be host all access or host write_only for sub-buffer to be host write_only
|
||||
}
|
||||
//printf("@@@ bufferFlags[%u]=0x%x subBufferFlags[%u]=0x%x\n", i, bufferFlags[ i ], j, subBufferFlags[ j ]);
|
||||
|
||||
subBufferObject = clCreateSubBuffer( bufferObject, subBufferFlags[ j ], CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error );
|
||||
test_error( error, "Unable to create sub-buffer to test against" );
|
||||
|
||||
// Perform sub-buffer object queries.
|
||||
cl_mem_object_type type;
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_TYPE, type, CL_MEM_OBJECT_BUFFER, "type", "%d", int )
|
||||
|
||||
cl_mem_flags flags;
|
||||
cl_mem_flags inheritedFlags = subBufferFlags[ j ];
|
||||
if ( (subBufferFlags[ j ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY)) == 0 )
|
||||
{
|
||||
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY);
|
||||
}
|
||||
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR);
|
||||
if ( (subBufferFlags[ j ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)) == 0)
|
||||
{
|
||||
inheritedFlags |= bufferFlags[ i ] & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS);
|
||||
}
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_FLAGS, flags, (unsigned int)inheritedFlags, "flags", "%d", unsigned int )
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_SIZE, sz, (size_t)( addressAlign ), "size", "%ld", size_t )
|
||||
|
||||
if ( bufferFlags[ i ] & CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
void * ptr;
|
||||
void * offsetInBuffer = (char *)buffer + addressAlign;
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_HOST_PTR, ptr, offsetInBuffer, "host pointer", "%p", void * )
|
||||
}
|
||||
|
||||
cl_uint mapCount;
|
||||
error = clGetMemObjectInfo( subBufferObject, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
|
||||
test_error( error, "Unable to get mem object map count" );
|
||||
if( size != sizeof( mapCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
|
||||
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_uint refCount;
|
||||
error = clGetMemObjectInfo( subBufferObject, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
|
||||
test_error( error, "Unable to get mem object reference count" );
|
||||
if( size != sizeof( refCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
|
||||
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_context otherCtx;
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
|
||||
|
||||
clReleaseMemObject( subBufferObject );
|
||||
subBufferObject = NULL;
|
||||
|
||||
}
|
||||
|
||||
clReleaseMemObject( bufferObject );
|
||||
bufferObject = NULL;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_image_desc *imageInfo, cl_image_format *imageFormat, size_t pixelSize, cl_context context )
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
cl_mem_object_type type;
|
||||
cl_mem_flags flags;
|
||||
cl_uint mapCount;
|
||||
cl_uint refCount;
|
||||
size_t rowPitchMultiplier;
|
||||
size_t slicePitchMultiplier;
|
||||
cl_context otherCtx;
|
||||
size_t offset;
|
||||
size_t sz;
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_TYPE, type, imageInfo->image_type, "type", "%d", int )
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_FLAGS, flags, (unsigned int)objectFlags, "flags", "%d", unsigned int )
|
||||
|
||||
error = clGetMemObjectInfo( *image, CL_MEM_SIZE, sizeof( sz ), &sz, NULL );
|
||||
test_error( error, "Unable to get mem size" );
|
||||
|
||||
// The size returned is not constrained by the spec.
|
||||
|
||||
error = clGetMemObjectInfo( *image, CL_MEM_MAP_COUNT, sizeof( mapCount ), &mapCount, &size );
|
||||
test_error( error, "Unable to get mem object map count" );
|
||||
if( size != sizeof( mapCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of mem object map count does not validate! (expected %d, got %d from %s:%d)\n",
|
||||
(int)sizeof( mapCount ), (int)size, __FILE__, __LINE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clGetMemObjectInfo( *image, CL_MEM_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
|
||||
test_error( error, "Unable to get mem object reference count" );
|
||||
if( size != sizeof( refCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of mem object reference count does not validate! (expected %d, got %d from %s:%d)\n",
|
||||
(int)sizeof( refCount ), (int)size, __FILE__, __LINE__ );
|
||||
return -1;
|
||||
}
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_CONTEXT, otherCtx, context, "context", "%p", cl_context )
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( *image, CL_MEM_OFFSET, offset, 0L, "offset", "%ld", size_t )
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int test_get_image_info( cl_device_id deviceID, cl_context context, cl_mem_object_type type )
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
void * image = NULL;
|
||||
|
||||
cl_mem imageObject;
|
||||
cl_image_desc imageInfo;
|
||||
|
||||
cl_mem_flags imageFlags[] = {
|
||||
CL_MEM_READ_WRITE,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_READ_ONLY,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_READ_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_HOST_NO_ACCESS | CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
};
|
||||
MTdata d;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
|
||||
|
||||
cl_image_format imageFormat;
|
||||
size_t pixelSize = 4;
|
||||
|
||||
imageFormat.image_channel_order = CL_RGBA;
|
||||
imageFormat.image_channel_data_type = CL_UNORM_INT8;
|
||||
|
||||
imageInfo.image_width = imageInfo.image_height = imageInfo.image_depth = 1;
|
||||
imageInfo.image_array_size = 0;
|
||||
imageInfo.num_mip_levels = imageInfo.num_samples = 0;
|
||||
imageInfo.buffer = NULL;
|
||||
|
||||
d = init_genrand( gRandomSeed );
|
||||
|
||||
for ( unsigned int i = 0; i < sizeof(imageFlags) / sizeof(cl_mem_flags); ++i )
|
||||
{
|
||||
imageInfo.image_row_pitch = 0;
|
||||
imageInfo.image_slice_pitch = 0;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
imageInfo.image_width = get_image_dim(&d, 1023);
|
||||
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D;
|
||||
break;
|
||||
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
imageInfo.image_width = get_image_dim(&d, 1023);
|
||||
imageInfo.image_height = get_image_dim(&d, 1023);
|
||||
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
break;
|
||||
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
imageInfo.image_width = get_image_dim(&d, 127);
|
||||
imageInfo.image_height = get_image_dim(&d, 127);
|
||||
imageInfo.image_depth = get_image_dim(&d, 127);
|
||||
imageInfo.image_type = CL_MEM_OBJECT_IMAGE3D;
|
||||
break;
|
||||
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
imageInfo.image_width = get_image_dim(&d, 1023);
|
||||
imageInfo.image_array_size = get_image_dim(&d, 1023);
|
||||
imageInfo.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
|
||||
break;
|
||||
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
imageInfo.image_width = get_image_dim(&d, 255);
|
||||
imageInfo.image_height = get_image_dim(&d, 255);
|
||||
imageInfo.image_array_size = get_image_dim(&d, 255);
|
||||
imageInfo.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
|
||||
break;
|
||||
}
|
||||
|
||||
if ( imageFlags[i] & CL_MEM_USE_HOST_PTR )
|
||||
{
|
||||
// Create an image object to test against.
|
||||
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
|
||||
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( image );
|
||||
test_error( error, "Unable to create image with (CL_MEM_USE_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
|
||||
void * ptr;
|
||||
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
|
||||
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
// release image object
|
||||
clReleaseMemObject(imageObject);
|
||||
|
||||
// Try again with non-zero rowPitch.
|
||||
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
|
||||
switch (type)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
|
||||
break;
|
||||
}
|
||||
|
||||
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
|
||||
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( image );
|
||||
test_error( error, "Unable to create image2d (CL_MEM_USE_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure image2d is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
|
||||
TEST_MEM_OBJECT_PARAM( imageObject, CL_MEM_HOST_PTR, ptr, image, "host pointer", "%p", void * )
|
||||
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
}
|
||||
else if ( (imageFlags[i] & CL_MEM_ALLOC_HOST_PTR) && (imageFlags[i] & CL_MEM_COPY_HOST_PTR) )
|
||||
{
|
||||
// Create an image object to test against.
|
||||
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
|
||||
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( image );
|
||||
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
int ret = test_get_imageObject_info( &imageObject, imageFlags[ i ], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
// release image object
|
||||
clReleaseMemObject(imageObject);
|
||||
|
||||
// Try again with non-zero rowPitch.
|
||||
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
|
||||
switch (type)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
|
||||
break;
|
||||
}
|
||||
|
||||
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
|
||||
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( image );
|
||||
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
}
|
||||
else if ( imageFlags[i] & CL_MEM_ALLOC_HOST_PTR )
|
||||
{
|
||||
// Create an image object to test against.
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
|
||||
test_error( error, "Unable to create image with (CL_MEM_ALLOC_HOST_PTR) to test with" );
|
||||
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
}
|
||||
else if ( imageFlags[i] & CL_MEM_COPY_HOST_PTR )
|
||||
{
|
||||
// Create an image object to test against.
|
||||
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
|
||||
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( image );
|
||||
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
clReleaseMemObject(imageObject);
|
||||
|
||||
// Try again with non-zero rowPitch.
|
||||
imageInfo.image_row_pitch = imageInfo.image_width * pixelSize;
|
||||
switch (type)
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
imageInfo.image_slice_pitch = imageInfo.image_row_pitch * imageInfo.image_height;
|
||||
break;
|
||||
}
|
||||
|
||||
image = malloc( imageInfo.image_width * imageInfo.image_height * imageInfo.image_depth * pixelSize *
|
||||
((imageInfo.image_array_size == 0) ? 1 : imageInfo.image_array_size) );
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, image, &error );
|
||||
if ( error )
|
||||
{
|
||||
free( image );
|
||||
test_error( error, "Unable to create image with (CL_MEM_COPY_HOST_PTR) to test with" );
|
||||
}
|
||||
|
||||
// Make sure image is cleaned up appropriately if we encounter an error in the rest of the calls.
|
||||
error = clSetMemObjectDestructorCallback( imageObject, mem_obj_destructor_callback, image );
|
||||
test_error( error, "Unable to set mem object destructor callback" );
|
||||
ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create an image object to test against.
|
||||
imageObject = clCreateImage( context, imageFlags[i], &imageFormat, &imageInfo, NULL, &error );
|
||||
test_error( error, "Unable to create image to test with" );
|
||||
int ret = test_get_imageObject_info( &imageObject, imageFlags[i], &imageInfo, &imageFormat, pixelSize, context );
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
clReleaseMemObject( imageObject );
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int test_get_image2d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
|
||||
{
|
||||
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D);
|
||||
}
|
||||
|
||||
int test_get_image3d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
|
||||
{
|
||||
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE3D);
|
||||
}
|
||||
|
||||
int test_get_image1d_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
|
||||
{
|
||||
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D);
|
||||
}
|
||||
|
||||
int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
|
||||
{
|
||||
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE1D_ARRAY);
|
||||
}
|
||||
|
||||
int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements )
|
||||
{
|
||||
return test_get_image_info(deviceID, context, CL_MEM_OBJECT_IMAGE2D_ARRAY);
|
||||
}
|
||||
|
||||
|
||||
108
test_conformance/api/test_mem_objects.cpp
Normal file
108
test_conformance/api/test_mem_objects.cpp
Normal file
@@ -0,0 +1,108 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
static volatile cl_int sDestructorIndex;
|
||||
|
||||
void CL_CALLBACK mem_destructor_callback( cl_mem memObject, void * userData )
|
||||
{
|
||||
int * userPtr = (int *)userData;
|
||||
|
||||
// ordering of callbacks is guaranteed, meaning we don't need to do atomic operation here
|
||||
*userPtr = ++sDestructorIndex;
|
||||
}
|
||||
|
||||
#ifndef ABS
|
||||
#define ABS( x ) ( ( x < 0 ) ? -x : x )
|
||||
#endif
|
||||
|
||||
int test_mem_object_destructor_callback_single( clMemWrapper &memObject )
|
||||
{
|
||||
cl_int error;
|
||||
int i;
|
||||
|
||||
// Set up some variables to catch the order in which callbacks are called
|
||||
volatile int callbackOrders[ 3 ] = { 0, 0, 0 };
|
||||
sDestructorIndex = 0;
|
||||
|
||||
// Set up the callbacks
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 0 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 1 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
|
||||
error = clSetMemObjectDestructorCallback( memObject, mem_destructor_callback, (void*) &callbackOrders[ 2 ] );
|
||||
test_error( error, "Unable to set destructor callback" );
|
||||
|
||||
// Now release the buffer, which SHOULD call the callbacks
|
||||
error = clReleaseMemObject( memObject );
|
||||
test_error( error, "Unable to release test buffer" );
|
||||
|
||||
// Note: since we manually released the mem wrapper, we need to set it to NULL to prevent a double-release
|
||||
memObject = NULL;
|
||||
|
||||
// At this point, all three callbacks should have already been called
|
||||
int numErrors = 0;
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
// Spin waiting for the release to finish. If you don't call the mem_destructor_callback, you will not
|
||||
// pass the test. bugzilla 6316
|
||||
while( 0 == callbackOrders[i] )
|
||||
{}
|
||||
|
||||
if( ABS( callbackOrders[ i ] ) != 3-i )
|
||||
{
|
||||
log_error( "\tERROR: Callback %d was called in the wrong order! (Was called order %d, should have been order %d)\n",
|
||||
i+1, ABS( callbackOrders[ i ] ), i );
|
||||
numErrors++;
|
||||
}
|
||||
}
|
||||
|
||||
return ( numErrors > 0 ) ? -1 : 0;
|
||||
}
|
||||
|
||||
int test_mem_object_destructor_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clMemWrapper testBuffer, testImage;
|
||||
cl_int error;
|
||||
|
||||
|
||||
// Create a buffer and an image to test callbacks against
|
||||
testBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE, 1024, NULL, &error );
|
||||
test_error( error, "Unable to create testing buffer" );
|
||||
|
||||
if( test_mem_object_destructor_callback_single( testBuffer ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Destructor callbacks for buffer object FAILED\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( checkForImageSupport( deviceID ) == 0 )
|
||||
{
|
||||
cl_image_format imageFormat = { CL_RGBA, CL_SIGNED_INT8 };
|
||||
testImage = create_image_2d( context, CL_MEM_READ_ONLY, &imageFormat, 16, 16, 0, NULL, &error );
|
||||
test_error( error, "Unable to create testing image" );
|
||||
|
||||
if( test_mem_object_destructor_callback_single( testImage ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Destructor callbacks for image object FAILED\n" );
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
121
test_conformance/api/test_native_kernel.cpp
Normal file
121
test_conformance/api/test_native_kernel.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
static void CL_CALLBACK test_native_kernel_fn( void *userData )
|
||||
{
|
||||
struct arg_struct {
|
||||
cl_int * source;
|
||||
cl_int * dest;
|
||||
cl_int count;
|
||||
} *args = (arg_struct *)userData;
|
||||
|
||||
for( cl_int i = 0; i < args->count; i++ )
|
||||
args->dest[ i ] = args->source[ i ];
|
||||
}
|
||||
|
||||
int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
|
||||
{
|
||||
int error;
|
||||
RandomSeed seed( gRandomSeed );
|
||||
// Check if we support native kernels
|
||||
cl_device_exec_capabilities capabilities;
|
||||
error = clGetDeviceInfo(device, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof(capabilities), &capabilities, NULL);
|
||||
if (!(capabilities & CL_EXEC_NATIVE_KERNEL)) {
|
||||
log_info("Device does not support CL_EXEC_NATIVE_KERNEL.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
clMemWrapper streams[ 2 ];
|
||||
#if !(defined (_WIN32) && defined (_MSC_VER))
|
||||
cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
|
||||
#else
|
||||
cl_int* inBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
|
||||
cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
|
||||
#endif
|
||||
clEventWrapper finishEvent;
|
||||
|
||||
struct arg_struct
|
||||
{
|
||||
cl_mem inputStream;
|
||||
cl_mem outputStream;
|
||||
cl_int count;
|
||||
} args;
|
||||
|
||||
|
||||
// Create some input values
|
||||
generate_random_data( kInt, n_elems, seed, inBuffer );
|
||||
|
||||
|
||||
// Create I/O streams
|
||||
streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
|
||||
test_error( error, "Unable to create I/O stream" );
|
||||
|
||||
|
||||
// Set up the arrays to call with
|
||||
args.inputStream = streams[ 0 ];
|
||||
args.outputStream = streams[ 1 ];
|
||||
args.count = n_elems;
|
||||
|
||||
void * memLocs[ 2 ] = { &args.inputStream, &args.outputStream };
|
||||
|
||||
|
||||
// Run the kernel
|
||||
error = clEnqueueNativeKernel( queue, test_native_kernel_fn,
|
||||
&args, sizeof( args ),
|
||||
2, &streams[ 0 ],
|
||||
(const void **)memLocs,
|
||||
0, NULL, &finishEvent );
|
||||
test_error( error, "Unable to queue native kernel" );
|
||||
|
||||
// Finish and wait for the kernel to complete
|
||||
error = clFinish( queue );
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
error = clWaitForEvents( 1, &finishEvent );
|
||||
test_error(error, "clWaitForEvents failed");
|
||||
|
||||
// Now read the results and verify
|
||||
error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
|
||||
test_error( error, "Unable to read results" );
|
||||
|
||||
for( int i = 0; i < n_elems; i++ )
|
||||
{
|
||||
if( inBuffer[ i ] != outBuffer[ i ] )
|
||||
{
|
||||
log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
|
||||
i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
162
test_conformance/api/test_null_buffer_arg.c
Normal file
162
test_conformance/api/test_null_buffer_arg.c
Normal file
@@ -0,0 +1,162 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#if defined(__APPLE__)
|
||||
#include <OpenCL/opencl.h>
|
||||
#include <OpenCL/cl_platform.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#include <CL/cl_platform.h>
|
||||
#endif
|
||||
#include "procs.h"
|
||||
|
||||
|
||||
enum { SUCCESS, FAILURE };
|
||||
typedef enum { NON_NULL_PATH, ADDROF_NULL_PATH, NULL_PATH } test_type;
|
||||
|
||||
#define NITEMS 4096
|
||||
|
||||
/* places the casted long value of the src ptr into each element of the output
|
||||
* array, to allow testing that the kernel actually _gets_ the NULL value */
|
||||
const char *kernel_string =
|
||||
"kernel void test_kernel(global float *src, global long *dst)\n"
|
||||
"{\n"
|
||||
" uint tid = get_global_id(0);\n"
|
||||
" dst[tid] = (long)src;\n"
|
||||
"}\n";
|
||||
|
||||
/*
|
||||
* The guts of the test:
|
||||
* call setKernelArgs with a regular buffer, &NULL, or NULL depending on
|
||||
* the value of 'test_type'
|
||||
*/
|
||||
static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
|
||||
cl_mem test_buf, cl_mem result_buf, test_type type)
|
||||
{
|
||||
unsigned int test_success = 0;
|
||||
|
||||
unsigned int i;
|
||||
cl_int status;
|
||||
char *typestr;
|
||||
|
||||
if (type == NON_NULL_PATH) {
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
|
||||
typestr = "non-NULL";
|
||||
} else if (type == ADDROF_NULL_PATH) {
|
||||
test_buf = NULL;
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
|
||||
typestr = "&NULL";
|
||||
} else if (type == NULL_PATH) {
|
||||
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
|
||||
typestr = "NULL";
|
||||
}
|
||||
|
||||
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
|
||||
|
||||
if (status != CL_SUCCESS) {
|
||||
log_error("clSetKernelArg failed with status: %d\n", status);
|
||||
return FAILURE; // no point in continuing *this* test
|
||||
}
|
||||
|
||||
size_t global = NITEMS;
|
||||
status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global,
|
||||
NULL, 0, NULL, NULL);
|
||||
test_error(status, "NDRangeKernel failed.");
|
||||
|
||||
cl_long* host_result = (cl_long*)malloc(NITEMS*sizeof(cl_long));
|
||||
status = clEnqueueReadBuffer(queue, result_buf, CL_TRUE, 0,
|
||||
sizeof(cl_long)*NITEMS, host_result, 0, NULL, NULL);
|
||||
test_error(status, "ReadBuffer failed.");
|
||||
|
||||
// in the non-null case, we expect NONZERO values:
|
||||
if (type == NON_NULL_PATH) {
|
||||
for (i=0; i<NITEMS; i++) {
|
||||
if (host_result[i] == 0) {
|
||||
log_error("failure: item %d in the result buffer was unexpectedly NULL.\n", i);
|
||||
test_success = FAILURE; break;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (type == ADDROF_NULL_PATH || type == NULL_PATH) {
|
||||
for (i=0; i<NITEMS; i++) {
|
||||
if (host_result[i] != 0) {
|
||||
log_error("failure: item %d in the result buffer was unexpectedly non-NULL.\n", i);
|
||||
test_success = FAILURE; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(host_result);
|
||||
|
||||
if (test_success == SUCCESS) {
|
||||
log_info("\t%s ok.\n", typestr);
|
||||
}
|
||||
|
||||
return test_success;
|
||||
}
|
||||
|
||||
int test_null_buffer_arg(cl_device_id device, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
unsigned int test_success = 0;
|
||||
unsigned int i;
|
||||
cl_int status;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
|
||||
// prep kernel:
|
||||
program = clCreateProgramWithSource(context, 1, &kernel_string, NULL, &status);
|
||||
test_error(status, "CreateProgramWithSource failed.");
|
||||
|
||||
status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
|
||||
test_error(status, "BuildProgram failed.");
|
||||
|
||||
kernel = clCreateKernel(program, "test_kernel", &status);
|
||||
test_error(status, "CreateKernel failed.");
|
||||
|
||||
cl_mem dev_src = clCreateBuffer(context, CL_MEM_READ_ONLY, NITEMS*sizeof(cl_float),
|
||||
NULL, NULL);
|
||||
|
||||
cl_mem dev_dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, NITEMS*sizeof(cl_long),
|
||||
NULL, NULL);
|
||||
|
||||
// set the destination buffer normally:
|
||||
status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &dev_dst);
|
||||
test_error(status, "SetKernelArg failed.");
|
||||
|
||||
//
|
||||
// we test three cases:
|
||||
//
|
||||
// - typical case, used everyday: non-null buffer
|
||||
// - the case of src as &NULL (the spec-compliance test)
|
||||
// - the case of src as NULL (the backwards-compatibility test, Apple only)
|
||||
//
|
||||
|
||||
test_success = test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NON_NULL_PATH);
|
||||
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, ADDROF_NULL_PATH);
|
||||
|
||||
#ifdef __APPLE__
|
||||
test_success |= test_setargs_and_execution(queue, kernel, dev_src, dev_dst, NULL_PATH);
|
||||
#endif
|
||||
|
||||
// clean up:
|
||||
if (dev_src) clReleaseMemObject(dev_src);
|
||||
clReleaseMemObject(dev_dst);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
return test_success;
|
||||
}
|
||||
289
test_conformance/api/test_platform.cpp
Normal file
289
test_conformance/api/test_platform.cpp
Normal file
@@ -0,0 +1,289 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define EXTENSION_NAME_BUF_SIZE 4096
|
||||
|
||||
#define PRINT_EXTENSION_INFO 0
|
||||
|
||||
int test_platform_extensions(cl_device_id deviceID, cl_context context,
|
||||
cl_command_queue queue, int num_elements)
|
||||
{
|
||||
const char * extensions[] = {
|
||||
"cl_khr_byte_addressable_store",
|
||||
// "cl_APPLE_SetMemObjectDestructor",
|
||||
"cl_khr_global_int32_base_atomics",
|
||||
"cl_khr_global_int32_extended_atomics",
|
||||
"cl_khr_local_int32_base_atomics",
|
||||
"cl_khr_local_int32_extended_atomics",
|
||||
"cl_khr_int64_base_atomics",
|
||||
"cl_khr_int64_extended_atomics",
|
||||
// need to put in entires for various atomics
|
||||
"cl_khr_3d_image_writes",
|
||||
"cl_khr_fp16",
|
||||
"cl_khr_fp64",
|
||||
NULL
|
||||
};
|
||||
|
||||
bool extensionsSupported[] = {
|
||||
false, //"cl_khr_byte_addressable_store",
|
||||
false, // need to put in entires for various atomics
|
||||
false, // "cl_khr_global_int32_base_atomics",
|
||||
false, // "cl_khr_global_int32_extended_atomics",
|
||||
false, // "cl_khr_local_int32_base_atomics",
|
||||
false, // "cl_khr_local_int32_extended_atomics",
|
||||
false, // "cl_khr_int64_base_atomics",
|
||||
false, // "cl_khr_int64_extended_atomics",
|
||||
false, //"cl_khr_3d_image_writes",
|
||||
false, //"cl_khr_fp16",
|
||||
false, //"cl_khr_fp64",
|
||||
false //NULL
|
||||
};
|
||||
|
||||
int extensionIndex;
|
||||
|
||||
cl_platform_id platformID;
|
||||
cl_int err;
|
||||
|
||||
char platform_extensions[EXTENSION_NAME_BUF_SIZE];
|
||||
char device_extensions[EXTENSION_NAME_BUF_SIZE];
|
||||
|
||||
// Okay, so what we're going to do is just check the device indicated by
|
||||
// deviceID against the platform that includes this device
|
||||
|
||||
|
||||
// pass CL_DEVICE_PLATFORM to clGetDeviceInfo
|
||||
// to get a result of type cl_platform_id
|
||||
|
||||
err = clGetDeviceInfo(deviceID,
|
||||
CL_DEVICE_PLATFORM,
|
||||
sizeof(cl_platform_id),
|
||||
(void *)(&platformID),
|
||||
NULL);
|
||||
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
vlog_error("test_platform_extensions : could not get platformID from device\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// now we grab the set of extensions specified by the platform
|
||||
err = clGetPlatformInfo(platformID,
|
||||
CL_PLATFORM_EXTENSIONS,
|
||||
sizeof(platform_extensions),
|
||||
(void *)(&platform_extensions[0]),
|
||||
NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
vlog_error("test_platform_extensions : could not get extension string from platform\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if PRINT_EXTENSION_INFO
|
||||
log_info("Platform extensions include \"%s\"\n\n", platform_extensions);
|
||||
#endif
|
||||
|
||||
// here we parse the platform extensions, to look for the "important" ones
|
||||
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
|
||||
{
|
||||
if(strstr(platform_extensions, extensions[extensionIndex]) != NULL)
|
||||
{
|
||||
// we found it
|
||||
#if PRINT_EXTENSION_INFO
|
||||
log_info("Found \"%s\" in platform extensions\n",
|
||||
extensions[extensionIndex]);
|
||||
#endif
|
||||
extensionsSupported[extensionIndex] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// and then we grab the set of extensions specified by the device
|
||||
// (this can be turned into a "loop over all devices in this platform")
|
||||
err = clGetDeviceInfo(deviceID,
|
||||
CL_DEVICE_EXTENSIONS,
|
||||
sizeof(device_extensions),
|
||||
(void *)(&device_extensions[0]),
|
||||
NULL);
|
||||
if(err != CL_SUCCESS)
|
||||
{
|
||||
vlog_error("test_platform_extensions : could not get extension string from device\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
#if PRINT_EXTENSION_INFO
|
||||
log_info("Device extensions include \"%s\"\n\n", device_extensions);
|
||||
#endif
|
||||
|
||||
for(extensionIndex=0; extensions[extensionIndex] != NULL; ++extensionIndex)
|
||||
{
|
||||
if(extensionsSupported[extensionIndex] == false)
|
||||
{
|
||||
continue; // skip this one
|
||||
}
|
||||
|
||||
if(strstr(device_extensions, extensions[extensionIndex]) == NULL)
|
||||
{
|
||||
// device does not support it
|
||||
vlog_error("Platform supports extension \"%s\" but device does not\n",
|
||||
extensions[extensionIndex]);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_get_platform_ids(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
|
||||
cl_platform_id platforms[16];
|
||||
cl_uint num_platforms;
|
||||
char *string_returned;
|
||||
|
||||
string_returned = (char*)malloc(8192);
|
||||
|
||||
int total_errors = 0;
|
||||
int err = CL_SUCCESS;
|
||||
|
||||
|
||||
err = clGetPlatformIDs(16, platforms, &num_platforms);
|
||||
test_error(err, "clGetPlatformIDs failed");
|
||||
|
||||
if (num_platforms <= 16) {
|
||||
// Try with NULL
|
||||
err = clGetPlatformIDs(num_platforms, platforms, NULL);
|
||||
test_error(err, "clGetPlatformIDs failed with NULL for return size");
|
||||
}
|
||||
|
||||
if (num_platforms < 1) {
|
||||
log_error("Found 0 platforms.\n");
|
||||
return -1;
|
||||
}
|
||||
log_info("Found %d platforms.\n", num_platforms);
|
||||
|
||||
|
||||
for (int p=0; p<(int)num_platforms; p++) {
|
||||
cl_device_id *devices;
|
||||
cl_uint num_devices;
|
||||
size_t size;
|
||||
|
||||
|
||||
log_info("Platform %d (%p):\n", p, platforms[p]);
|
||||
|
||||
memset(string_returned, 0, 8192);
|
||||
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_PROFILE, 8192, string_returned, &size);
|
||||
test_error(err, "clGetPlatformInfo for CL_PLATFORM_PROFILE failed");
|
||||
log_info("\tCL_PLATFORM_PROFILE: %s\n", string_returned);
|
||||
if (strlen(string_returned)+1 != size) {
|
||||
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
memset(string_returned, 0, 8192);
|
||||
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VERSION, 8192, string_returned, &size);
|
||||
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VERSION failed");
|
||||
log_info("\tCL_PLATFORM_VERSION: %s\n", string_returned);
|
||||
if (strlen(string_returned)+1 != size) {
|
||||
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
memset(string_returned, 0, 8192);
|
||||
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_NAME, 8192, string_returned, &size);
|
||||
test_error(err, "clGetPlatformInfo for CL_PLATFORM_NAME failed");
|
||||
log_info("\tCL_PLATFORM_NAME: %s\n", string_returned);
|
||||
if (strlen(string_returned)+1 != size) {
|
||||
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
memset(string_returned, 0, 8192);
|
||||
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_VENDOR, 8192, string_returned, &size);
|
||||
test_error(err, "clGetPlatformInfo for CL_PLATFORM_VENDOR failed");
|
||||
log_info("\tCL_PLATFORM_VENDOR: %s\n", string_returned);
|
||||
if (strlen(string_returned)+1 != size) {
|
||||
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
memset(string_returned, 0, 8192);
|
||||
err = clGetPlatformInfo(platforms[p], CL_PLATFORM_EXTENSIONS, 8192, string_returned, &size);
|
||||
test_error(err, "clGetPlatformInfo for CL_PLATFORM_EXTENSIONS failed");
|
||||
log_info("\tCL_PLATFORM_EXTENSIONS: %s\n", string_returned);
|
||||
if (strlen(string_returned)+1 != size) {
|
||||
log_error("Returned string length %ld does not equal reported one %ld.\n", strlen(string_returned)+1, size);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
|
||||
test_error(err, "clGetDeviceIDs size failed.\n");
|
||||
devices = (cl_device_id *)malloc(num_devices*sizeof(cl_device_id));
|
||||
memset(devices, 0, sizeof(cl_device_id)*num_devices);
|
||||
err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
|
||||
test_error(err, "clGetDeviceIDs failed.\n");
|
||||
|
||||
log_info("\tPlatform has %d devices.\n", (int)num_devices);
|
||||
for (int d=0; d<(int)num_devices; d++) {
|
||||
size_t returned_size;
|
||||
cl_platform_id returned_platform;
|
||||
cl_context context;
|
||||
cl_context_properties properties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[p], 0 };
|
||||
|
||||
err = clGetDeviceInfo(devices[d], CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &returned_platform, &returned_size);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM\n");
|
||||
if (returned_size != sizeof(cl_platform_id)) {
|
||||
log_error("Reported return size (%ld) does not match expected size (%ld).\n", returned_size, sizeof(cl_platform_id));
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
memset(string_returned, 0, 8192);
|
||||
err = clGetDeviceInfo(devices[d], CL_DEVICE_NAME, 8192, string_returned, NULL);
|
||||
test_error(err, "clGetDeviceInfo failed for CL_DEVICE_NAME\n");
|
||||
|
||||
log_info("\t\tPlatform for device %d (%s) is %p.\n", d, string_returned, returned_platform);
|
||||
|
||||
log_info("\t\t\tTesting clCreateContext for the platform/device...\n");
|
||||
// Try creating a context for the platform
|
||||
context = clCreateContext(properties, 1, &devices[d], NULL, NULL, &err);
|
||||
test_error(err, "\t\tclCreateContext failed for device with platform properties\n");
|
||||
|
||||
memset(properties, 0, sizeof(cl_context_properties)*3);
|
||||
|
||||
err = clGetContextInfo(context, CL_CONTEXT_PROPERTIES, sizeof(cl_context_properties)*3, properties, &returned_size);
|
||||
test_error(err, "clGetContextInfo for CL_CONTEXT_PROPERTIES failed");
|
||||
if (returned_size != sizeof(cl_context_properties)*3) {
|
||||
log_error("Invalid size returned from clGetContextInfo for CL_CONTEXT_PROPERTIES. Got %ld, expected %ld.\n",
|
||||
returned_size, sizeof(cl_context_properties)*3);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
if (properties[0] != (cl_context_properties)CL_CONTEXT_PLATFORM || properties[1] != (cl_context_properties)platforms[p]) {
|
||||
log_error("Wrong properties returned. Expected: [%p %p], got [%p %p]\n",
|
||||
(void*)CL_CONTEXT_PLATFORM, platforms[p], (void*)properties[0], (void*)properties[1]);
|
||||
total_errors++;
|
||||
}
|
||||
|
||||
err = clReleaseContext(context);
|
||||
test_error(err, "clReleaseContext failed");
|
||||
}
|
||||
free(devices);
|
||||
}
|
||||
|
||||
free(string_returned);
|
||||
|
||||
return total_errors;
|
||||
}
|
||||
635
test_conformance/api/test_queries.cpp
Normal file
635
test_conformance/api/test_queries.cpp
Normal file
@@ -0,0 +1,635 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/imageHelpers.h"
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_int error;
|
||||
char buffer[ 4098 ];
|
||||
size_t length;
|
||||
|
||||
// Get the platform to use
|
||||
error = clGetPlatformIDs(1, &platform, NULL);
|
||||
test_error( error, "Unable to get platform" );
|
||||
|
||||
// Platform profile should either be FULL_PROFILE or EMBEDDED_PROFILE
|
||||
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof( buffer ), buffer, &length );
|
||||
test_error( error, "Unable to get platform profile string" );
|
||||
|
||||
log_info("Returned CL_PLATFORM_PROFILE %s.\n", buffer);
|
||||
|
||||
if( strcmp( buffer, "FULL_PROFILE" ) != 0 && strcmp( buffer, "EMBEDDED_PROFILE" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Returned platform profile string is not a valid string by OpenCL 1.2! (Returned: %s)\n", buffer );
|
||||
return -1;
|
||||
}
|
||||
if( strlen( buffer )+1 != length )
|
||||
{
|
||||
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
|
||||
(int)strlen( buffer )+1, (int)length );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check just length return
|
||||
error = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, 0, NULL, &length );
|
||||
test_error( error, "Unable to get platform profile length" );
|
||||
if( strlen( (char *)buffer )+1 != length )
|
||||
{
|
||||
log_error( "ERROR: Returned length of profile string is incorrect (actual length: %d, returned length: %d)\n",
|
||||
(int)strlen( (char *)buffer )+1, (int)length );
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// Platform version should fit the regex "OpenCL *[0-9]+\.[0-9]+"
|
||||
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof( buffer ), buffer, &length );
|
||||
test_error( error, "Unable to get platform version string" );
|
||||
|
||||
log_info("Returned CL_PLATFORM_VERSION %s.\n", buffer);
|
||||
|
||||
if( memcmp( buffer, "OpenCL ", strlen( "OpenCL " ) ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Initial part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
|
||||
return -1;
|
||||
}
|
||||
char *p1 = (char *)buffer + strlen( "OpenCL " );
|
||||
while( *p1 == ' ' )
|
||||
p1++;
|
||||
char *p2 = p1;
|
||||
while( isdigit( *p2 ) )
|
||||
p2++;
|
||||
if( *p2 != '.' )
|
||||
{
|
||||
log_error( "ERROR: Numeric part of platform version string does not match required format! (returned: %s)\n", (char *)buffer );
|
||||
return -1;
|
||||
}
|
||||
char *p3 = p2 + 1;
|
||||
while( isdigit( *p3 ) )
|
||||
p3++;
|
||||
if( *p3 != ' ' )
|
||||
{
|
||||
log_error( "ERROR: space expected after minor version number! (returned: %s)\n", (char *)buffer );
|
||||
return -1;
|
||||
}
|
||||
*p2 = ' '; // Put in a space for atoi below.
|
||||
p2++;
|
||||
|
||||
// make sure it is null terminated
|
||||
for( ; p3 != buffer + length; p3++ )
|
||||
if( *p3 == '\0' )
|
||||
break;
|
||||
if( p3 == buffer + length )
|
||||
{
|
||||
log_error( "ERROR: platform version string is not NUL terminated!\n" );
|
||||
return -1;
|
||||
}
|
||||
|
||||
int major = atoi( p1 );
|
||||
int minor = atoi( p2 );
|
||||
int minor_revision = 2;
|
||||
if( major * 10 + minor < 10 + minor_revision )
|
||||
{
|
||||
log_error( "ERROR: OpenCL profile version returned is less than 1.%d!\n", minor_revision );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Sanity checks on the returned values
|
||||
if( length != strlen( (char *)buffer ) + 1)
|
||||
{
|
||||
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer )+1, (int)length );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check just length
|
||||
error = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &length );
|
||||
test_error( error, "Unable to get platform version length" );
|
||||
if( length != strlen( (char *)buffer )+1 )
|
||||
{
|
||||
log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( buffer )+1, (int)length );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_get_sampler_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
|
||||
PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
|
||||
|
||||
clSamplerWrapper sampler = clCreateSampler( context, CL_TRUE, CL_ADDRESS_CLAMP, CL_FILTER_LINEAR, &error );
|
||||
test_error( error, "Unable to create sampler to test with" );
|
||||
|
||||
cl_uint refCount;
|
||||
error = clGetSamplerInfo( sampler, CL_SAMPLER_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
|
||||
test_error( error, "Unable to get sampler ref count" );
|
||||
if( size != sizeof( refCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of sampler refcount does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_context otherCtx;
|
||||
error = clGetSamplerInfo( sampler, CL_SAMPLER_CONTEXT, sizeof( otherCtx ), &otherCtx, &size );
|
||||
test_error( error, "Unable to get sampler context" );
|
||||
if( otherCtx != context )
|
||||
{
|
||||
log_error( "ERROR: Sampler context does not validate! (expected %p, got %p)\n", context, otherCtx );
|
||||
return -1;
|
||||
}
|
||||
if( size != sizeof( otherCtx ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of sampler context does not validate! (expected %d, got %d)\n", (int)sizeof( otherCtx ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_addressing_mode mode;
|
||||
error = clGetSamplerInfo( sampler, CL_SAMPLER_ADDRESSING_MODE, sizeof( mode ), &mode, &size );
|
||||
test_error( error, "Unable to get sampler addressing mode" );
|
||||
if( mode != CL_ADDRESS_CLAMP )
|
||||
{
|
||||
log_error( "ERROR: Sampler addressing mode does not validate! (expected %d, got %d)\n", (int)CL_ADDRESS_CLAMP, (int)mode );
|
||||
return -1;
|
||||
}
|
||||
if( size != sizeof( mode ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of sampler addressing mode does not validate! (expected %d, got %d)\n", (int)sizeof( mode ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_filter_mode fmode;
|
||||
error = clGetSamplerInfo( sampler, CL_SAMPLER_FILTER_MODE, sizeof( fmode ), &fmode, &size );
|
||||
test_error( error, "Unable to get sampler filter mode" );
|
||||
if( fmode != CL_FILTER_LINEAR )
|
||||
{
|
||||
log_error( "ERROR: Sampler filter mode does not validate! (expected %d, got %d)\n", (int)CL_FILTER_LINEAR, (int)fmode );
|
||||
return -1;
|
||||
}
|
||||
if( size != sizeof( fmode ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of sampler filter mode does not validate! (expected %d, got %d)\n", (int)sizeof( fmode ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_int norm;
|
||||
error = clGetSamplerInfo( sampler, CL_SAMPLER_NORMALIZED_COORDS, sizeof( norm ), &norm, &size );
|
||||
test_error( error, "Unable to get sampler normalized flag" );
|
||||
if( norm != CL_TRUE )
|
||||
{
|
||||
log_error( "ERROR: Sampler normalized flag does not validate! (expected %d, got %d)\n", (int)CL_TRUE, (int)norm );
|
||||
return -1;
|
||||
}
|
||||
if( size != sizeof( norm ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of sampler normalized flag does not validate! (expected %d, got %d)\n", (int)sizeof( norm ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define TEST_COMMAND_QUEUE_PARAM( queue, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetCommandQueueInfo( queue, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get command queue " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Command queue " name " did not validate! (expected " type ", got " type ")\n", (cast)expected, (cast)val ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of command queue " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
int test_get_command_queue_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
|
||||
cl_command_queue_properties device_props;
|
||||
clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
|
||||
log_info("CL_DEVICE_QUEUE_PROPERTIES is %d\n", (int)device_props);
|
||||
|
||||
clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, device_props, &error );
|
||||
test_error( error, "Unable to create command queue to test with" );
|
||||
|
||||
cl_uint refCount;
|
||||
error = clGetCommandQueueInfo( queue, CL_QUEUE_REFERENCE_COUNT, sizeof( refCount ), &refCount, &size );
|
||||
test_error( error, "Unable to get command queue reference count" );
|
||||
if( size != sizeof( refCount ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of command queue reference count does not validate! (expected %d, got %d)\n", (int)sizeof( refCount ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_context otherCtx;
|
||||
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_CONTEXT, otherCtx, context, "context", "%p", cl_context )
|
||||
|
||||
cl_device_id otherDevice;
|
||||
error = clGetCommandQueueInfo( queue, CL_QUEUE_DEVICE, sizeof(otherDevice), &otherDevice, &size);
|
||||
test_error(error, "clGetCommandQueue failed.");
|
||||
|
||||
if (size != sizeof(cl_device_id)) {
|
||||
log_error( " ERROR: Returned size of command queue CL_QUEUE_DEVICE does not validate! (expected %d, got %d)\n", (int)sizeof( otherDevice ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Since the device IDs are opaque types we check the CL_DEVICE_VENDOR_ID which is unique for identical hardware. */
|
||||
cl_uint otherDevice_vid, deviceID_vid;
|
||||
error = clGetDeviceInfo(otherDevice, CL_DEVICE_VENDOR_ID, sizeof(otherDevice_vid), &otherDevice_vid, NULL );
|
||||
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_VENDOR_ID, sizeof(deviceID_vid), &deviceID_vid, NULL );
|
||||
test_error( error, "Unable to get device CL_DEVICE_VENDOR_ID" );
|
||||
|
||||
if( otherDevice_vid != deviceID_vid )
|
||||
{
|
||||
log_error( "ERROR: Incorrect device returned for queue! (Expected vendor ID 0x%x, got 0x%x)\n", deviceID_vid, otherDevice_vid );
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_command_queue_properties props;
|
||||
TEST_COMMAND_QUEUE_PARAM( queue, CL_QUEUE_PROPERTIES, props, (unsigned int)( device_props ), "properties", "%d", unsigned int )
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
cl_context_properties props;
|
||||
|
||||
error = clGetContextInfo( context, CL_CONTEXT_PROPERTIES, sizeof( props ), &props, &size );
|
||||
test_error( error, "Unable to get context props" );
|
||||
|
||||
if (size == 0) {
|
||||
// Valid size
|
||||
return 0;
|
||||
} else if (size == sizeof(cl_context_properties)) {
|
||||
// Data must be NULL
|
||||
if (props != 0) {
|
||||
log_error("ERROR: Returned properties is no NULL.\n");
|
||||
return -1;
|
||||
}
|
||||
// Valid data and size
|
||||
return 0;
|
||||
}
|
||||
// Size was not 0 or 1
|
||||
log_error( "ERROR: Returned size of context props is not valid! (expected 0 or %d, got %d)\n",
|
||||
(int)sizeof(cl_context_properties), (int)size );
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast ) \
|
||||
error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get mem object " name ); \
|
||||
if( val != expected ) \
|
||||
{ \
|
||||
log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val ); \
|
||||
return -1; \
|
||||
} \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
}
|
||||
|
||||
void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
|
||||
{
|
||||
free( data );
|
||||
}
|
||||
|
||||
// All possible combinations of valid cl_mem_flags.
|
||||
static cl_mem_flags all_flags[16] = {
|
||||
0,
|
||||
CL_MEM_READ_WRITE,
|
||||
CL_MEM_READ_ONLY,
|
||||
CL_MEM_WRITE_ONLY,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
|
||||
};
|
||||
|
||||
#define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast ) \
|
||||
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get device " name ); \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
} \
|
||||
log_info( "\tReported device " name " : " type "\n", (cast)val );
|
||||
|
||||
#define TEST_DEVICE_PARAM_MEM( device, paramName, val, name, type, div ) \
|
||||
error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size ); \
|
||||
test_error( error, "Unable to get device " name ); \
|
||||
if( size != sizeof( val ) ) \
|
||||
{ \
|
||||
log_error( "ERROR: Returned size of device " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size ); \
|
||||
return -1; \
|
||||
} \
|
||||
log_info( "\tReported device " name " : " type "\n", (int)( val / div ) );
|
||||
|
||||
int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
|
||||
cl_uint vendorID;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_VENDOR_ID, vendorID, "vendor ID", "0x%08x", int )
|
||||
|
||||
char extensions[ 10240 ];
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_EXTENSIONS, sizeof( extensions ), &extensions, &size );
|
||||
test_error( error, "Unable to get device extensions" );
|
||||
if( size != strlen( extensions ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Returned size of device extensions does not validate! (expected %d, got %d)\n", (int)( strlen( extensions ) + 1 ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
log_info( "\tReported device extensions: %s \n", extensions );
|
||||
|
||||
cl_uint preferred;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, preferred, "preferred vector char width", "%d", int )
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, preferred, "preferred vector short width", "%d", int )
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, preferred, "preferred vector int width", "%d", int )
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, preferred, "preferred vector long width", "%d", int )
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, preferred, "preferred vector float width", "%d", int )
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, preferred, "preferred vector double width", "%d", int )
|
||||
|
||||
// Note that even if cl_khr_fp64, the preferred width for double can be non-zero. For example, vendors
|
||||
// extensions can support double but may not support cl_khr_fp64, which implies math library support.
|
||||
|
||||
cl_uint baseAddrAlign;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, baseAddrAlign, "base address alignment", "%d bytes", int )
|
||||
|
||||
cl_uint maxDataAlign;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, maxDataAlign, "min data type alignment", "%d bytes", int )
|
||||
|
||||
cl_device_mem_cache_type cacheType;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof( cacheType ), &cacheType, &size );
|
||||
test_error( error, "Unable to get device global mem cache type" );
|
||||
if( size != sizeof( cacheType ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of device global mem cache type does not validate! (expected %d, got %d)\n", (int)sizeof( cacheType ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
const char *cacheTypeName = ( cacheType == CL_NONE ) ? "CL_NONE" : ( cacheType == CL_READ_ONLY_CACHE ) ? "CL_READ_ONLY_CACHE" : ( cacheType == CL_READ_WRITE_CACHE ) ? "CL_READ_WRITE_CACHE" : "<unknown>";
|
||||
log_info( "\tReported device global mem cache type: %s \n", cacheTypeName );
|
||||
|
||||
cl_uint cachelineSize;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cachelineSize, "global mem cacheline size", "%d bytes", int )
|
||||
|
||||
cl_ulong cacheSize;
|
||||
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cacheSize, "global mem cache size", "%d KB", 1024 )
|
||||
|
||||
cl_ulong memSize;
|
||||
TEST_DEVICE_PARAM_MEM( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, memSize, "global mem size", "%d MB", ( 1024 * 1024 ) )
|
||||
|
||||
cl_device_local_mem_type localMemType;
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_TYPE, sizeof( localMemType ), &localMemType, &size );
|
||||
test_error( error, "Unable to get device local mem type" );
|
||||
if( size != sizeof( cacheType ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of device local mem type does not validate! (expected %d, got %d)\n", (int)sizeof( localMemType ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
const char *localMemTypeName = ( localMemType == CL_LOCAL ) ? "CL_LOCAL" : ( cacheType == CL_GLOBAL ) ? "CL_GLOBAL" : "<unknown>";
|
||||
log_info( "\tReported device local mem type: %s \n", localMemTypeName );
|
||||
|
||||
|
||||
cl_bool errSupport;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ERROR_CORRECTION_SUPPORT, errSupport, "error correction support", "%d", int )
|
||||
|
||||
size_t timerResolution;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, timerResolution, "profiling timer resolution", "%ld nanoseconds", long )
|
||||
|
||||
cl_bool endian;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_ENDIAN_LITTLE, endian, "little endian flag", "%d", int )
|
||||
|
||||
cl_bool avail;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_AVAILABLE, avail, "available flag", "%d", int )
|
||||
|
||||
cl_bool compilerAvail;
|
||||
TEST_DEVICE_PARAM( deviceID, CL_DEVICE_COMPILER_AVAILABLE, compilerAvail, "compiler available flag", "%d", int )
|
||||
|
||||
char profile[ 1024 ];
|
||||
error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof( profile ), &profile, &size );
|
||||
test_error( error, "Unable to get device profile" );
|
||||
if( size != strlen( profile ) + 1 )
|
||||
{
|
||||
log_error( "ERROR: Returned size of device profile does not validate! (expected %d, got %d)\n", (int)( strlen( profile ) + 1 ), (int)size );
|
||||
return -1;
|
||||
}
|
||||
if( strcmp( profile, "FULL_PROFILE" ) != 0 && strcmp( profile, "EMBEDDED_PROFILE" ) != 0 )
|
||||
{
|
||||
log_error( "ERROR: Returned profile of device not FULL or EMBEDDED as required by OpenCL 1.2! (Returned %s)\n", profile );
|
||||
return -1;
|
||||
}
|
||||
log_info( "\tReported device profile: %s \n", profile );
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static const char *sample_compile_size[2] = {
|
||||
"__kernel void sample_test(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"\n"
|
||||
"}\n",
|
||||
"__kernel __attribute__((reqd_work_group_size(%d,%d,%d))) void sample_test(__global int *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
size_t realSize;
|
||||
size_t kernel_max_workgroup_size;
|
||||
size_t global[] = {64,14,10};
|
||||
size_t local[] = {0,0,0};
|
||||
|
||||
cl_uint max_dimensions;
|
||||
|
||||
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dimensions), &max_dimensions, NULL);
|
||||
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
||||
log_info("Device reported CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS = %d.\n", (int)max_dimensions);
|
||||
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, &sample_compile_size[ 0 ], "sample_test" );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(kernel_max_workgroup_size), &kernel_max_workgroup_size, NULL);
|
||||
test_error( error, "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
|
||||
log_info("The CL_KERNEL_WORK_GROUP_SIZE for the kernel is %d.\n", (int)kernel_max_workgroup_size);
|
||||
|
||||
size_t size[ 3 ];
|
||||
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
|
||||
test_error( error, "Unable to get work group info" );
|
||||
|
||||
if( size[ 0 ] != 0 || size[ 1 ] != 0 || size[ 2 ] != 0 )
|
||||
{
|
||||
log_error( "ERROR: Nonzero compile work group size returned for nonspecified size! (returned %d,%d,%d)\n", (int)size[0], (int)size[1], (int)size[2] );
|
||||
return -1;
|
||||
}
|
||||
|
||||
if( realSize != sizeof( size ) )
|
||||
{
|
||||
log_error( "ERROR: Returned size of compile work group size not valid! (Expected %d, got %d)\n", (int)sizeof( size ), (int)realSize );
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Determine some local dimensions to use for the test.
|
||||
if (max_dimensions == 1) {
|
||||
error = get_max_common_work_group_size(context, kernel, global[0], &local[0]);
|
||||
test_error( error, "get_max_common_work_group_size failed");
|
||||
log_info("For global dimension %d, kernel will require local dimension %d.\n", (int)global[0], (int)local[0]);
|
||||
} else if (max_dimensions == 2) {
|
||||
error = get_max_common_2D_work_group_size(context, kernel, global, local);
|
||||
test_error( error, "get_max_common_2D_work_group_size failed");
|
||||
log_info("For global dimension %d x %d, kernel will require local dimension %d x %d.\n", (int)global[0], (int)global[1], (int)local[0], (int)local[1]);
|
||||
} else {
|
||||
error = get_max_common_3D_work_group_size(context, kernel, global, local);
|
||||
test_error( error, "get_max_common_3D_work_group_size failed");
|
||||
log_info("For global dimension %d x %d x %d, kernel will require local dimension %d x %d x %d.\n",
|
||||
(int)global[0], (int)global[1], (int)global[2], (int)local[0], (int)local[1], (int)local[2]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper in, out;
|
||||
//char source[1024];
|
||||
char *source = (char*)malloc(1024);
|
||||
source[0] = '\0';
|
||||
|
||||
sprintf(source, sample_compile_size[1], local[0], local[1], local[2]);
|
||||
|
||||
error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&source, "sample_test" );
|
||||
if( error != 0 )
|
||||
return error;
|
||||
|
||||
size_t size[ 3 ];
|
||||
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, sizeof( size ), size, &realSize );
|
||||
test_error( error, "Unable to get work group info" );
|
||||
|
||||
if( size[ 0 ] != local[0] || size[ 1 ] != local[1] || size[ 2 ] != local[2] )
|
||||
{
|
||||
log_error( "ERROR: Incorrect compile work group size returned for specified size! (returned %d,%d,%d, expected %d,%d,%d)\n",
|
||||
(int)size[0], (int)size[1], (int)size[2], (int)local[0], (int)local[1], (int)local[2]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Verify that the kernel will only execute with that size.
|
||||
in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*global[0], NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*global[0], NULL, &error);
|
||||
test_error(error, "clCreateBuffer failed");
|
||||
|
||||
error = clSetKernelArg(kernel, 0, sizeof(in), &in);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
error = clSetKernelArg(kernel, 1, sizeof(out), &out);
|
||||
test_error(error, "clSetKernelArg failed");
|
||||
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
|
||||
test_error(error, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
log_info("kernel_required_group_size may report spurious ERRORS in the conformance log.\n");
|
||||
|
||||
local[0]++;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
|
||||
if (error != CL_INVALID_WORK_GROUP_SIZE) {
|
||||
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
|
||||
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2] );
|
||||
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
if (max_dimensions == 1) {
|
||||
free(source);
|
||||
return 0;
|
||||
}
|
||||
|
||||
local[0]--; local[1]++;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
|
||||
if (error != CL_INVALID_WORK_GROUP_SIZE) {
|
||||
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
|
||||
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
|
||||
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
|
||||
if (max_dimensions == 2) {
|
||||
return 0;
|
||||
free(source);
|
||||
}
|
||||
|
||||
local[1]--; local[2]++;
|
||||
error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, local, 0, NULL, NULL);
|
||||
if (error != CL_INVALID_WORK_GROUP_SIZE) {
|
||||
log_error("Incorrect error returned for executing a kernel with the wrong required local work group size. (used %d,%d,%d, required %d,%d,%d)\n",
|
||||
(int)local[0], (int)local[1], (int)local[2], (int)local[0]-1, (int)local[1], (int)local[2]);
|
||||
print_error(error, "Expected: CL_INVALID_WORK_GROUP_SIZE.");
|
||||
return -1;
|
||||
}
|
||||
|
||||
error = clFinish(queue);
|
||||
test_error(error, "clFinish failed");
|
||||
free(source);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
234
test_conformance/api/test_retain.cpp
Normal file
234
test_conformance/api/test_retain.cpp
Normal file
@@ -0,0 +1,234 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif // !_WIN32
|
||||
|
||||
// Note: According to spec, the various functions to get instance counts should return an error when passed in an object
|
||||
// that has already been released. However, the spec is out of date. If it gets re-updated to allow such action, re-enable
|
||||
// this define.
|
||||
//#define VERIFY_AFTER_RELEASE 1
|
||||
|
||||
#define GET_QUEUE_INSTANCE_COUNT(p) numInstances = ( (err = clGetCommandQueueInfo(p, CL_QUEUE_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
|
||||
#define GET_MEM_INSTANCE_COUNT(p) numInstances = ( (err = clGetMemObjectInfo(p, CL_MEM_REFERENCE_COUNT, sizeof( numInstances ), &numInstances, NULL)) == CL_SUCCESS ? numInstances : 0 )
|
||||
|
||||
#define VERIFY_INSTANCE_COUNT(c,rightValue) if( c != rightValue ) { \
|
||||
log_error( "ERROR: Instance count for test object is not valid! (should be %d, really is %d)\n", rightValue, c ); \
|
||||
return -1; }
|
||||
|
||||
int test_retain_queue_single(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
|
||||
{
|
||||
cl_command_queue queue;
|
||||
cl_uint numInstances;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test queue */
|
||||
queue = clCreateCommandQueue( context, deviceID, 0, &err );
|
||||
test_error( err, "Unable to create command queue to test with" );
|
||||
|
||||
/* Test the instance count */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* Now release the program */
|
||||
clReleaseCommandQueue( queue );
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
if( err != CL_INVALID_COMMAND_QUEUE )
|
||||
{
|
||||
print_error( err, "Command queue was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_retain_queue_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queueNotUsed, int num_elements)
|
||||
{
|
||||
cl_command_queue queue;
|
||||
unsigned int numInstances, i;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test program */
|
||||
queue = clCreateCommandQueue( context, deviceID, 0, &err );
|
||||
test_error( err, "Unable to create command queue to test with" );
|
||||
|
||||
/* Increment 9 times, which should bring the count to 10 */
|
||||
for( i = 0; i < 9; i++ )
|
||||
{
|
||||
clRetainCommandQueue( queue );
|
||||
}
|
||||
|
||||
/* Test the instance count */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 10 );
|
||||
|
||||
/* Now release 5 times, which should take us to 5 */
|
||||
for( i = 0; i < 5; i++ )
|
||||
{
|
||||
clReleaseCommandQueue( queue );
|
||||
}
|
||||
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 5 );
|
||||
|
||||
/* Retain again three times, which should take us to 8 */
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
clRetainCommandQueue( queue );
|
||||
}
|
||||
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 8 );
|
||||
|
||||
/* Release 7 times, which should take it to 1 */
|
||||
for( i = 0; i < 7; i++ )
|
||||
{
|
||||
clReleaseCommandQueue( queue );
|
||||
}
|
||||
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
test_error( err, "Unable to get queue instance count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* And one last one */
|
||||
clReleaseCommandQueue( queue );
|
||||
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_QUEUE_INSTANCE_COUNT( queue );
|
||||
if( err != CL_INVALID_COMMAND_QUEUE )
|
||||
{
|
||||
print_error( err, "Command queue was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_retain_mem_object_single(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem object;
|
||||
cl_uint numInstances;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test object */
|
||||
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
|
||||
test_error( err, "Unable to create buffer to test with" );
|
||||
|
||||
/* Test the instance count */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* Now release the program */
|
||||
clReleaseMemObject( object );
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
if( err != CL_INVALID_MEM_OBJECT )
|
||||
{
|
||||
print_error( err, "Mem object was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int test_retain_mem_object_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_mem object;
|
||||
unsigned int numInstances, i;
|
||||
int err;
|
||||
|
||||
|
||||
/* Create a test object */
|
||||
object = clCreateBuffer( context, CL_MEM_READ_ONLY, 32, NULL, &err );
|
||||
test_error( err, "Unable to create buffer to test with" );
|
||||
|
||||
/* Increment 9 times, which should bring the count to 10 */
|
||||
for( i = 0; i < 9; i++ )
|
||||
{
|
||||
clRetainMemObject( object );
|
||||
}
|
||||
|
||||
/* Test the instance count */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 10 );
|
||||
|
||||
/* Now release 5 times, which should take us to 5 */
|
||||
for( i = 0; i < 5; i++ )
|
||||
{
|
||||
clReleaseMemObject( object );
|
||||
}
|
||||
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 5 );
|
||||
|
||||
/* Retain again three times, which should take us to 8 */
|
||||
for( i = 0; i < 3; i++ )
|
||||
{
|
||||
clRetainMemObject( object );
|
||||
}
|
||||
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 8 );
|
||||
|
||||
/* Release 7 times, which should take it to 1 */
|
||||
for( i = 0; i < 7; i++ )
|
||||
{
|
||||
clReleaseMemObject( object );
|
||||
}
|
||||
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
test_error( err, "Unable to get mem object count" );
|
||||
VERIFY_INSTANCE_COUNT( numInstances, 1 );
|
||||
|
||||
/* And one last one */
|
||||
clReleaseMemObject( object );
|
||||
|
||||
#ifdef VERIFY_AFTER_RELEASE
|
||||
/* We're not allowed to get the instance count after the object has been completely released. But that's
|
||||
exactly how we can tell the release worked--by making sure getting the instance count fails! */
|
||||
GET_MEM_INSTANCE_COUNT( object );
|
||||
if( err != CL_INVALID_MEM_OBJECT )
|
||||
{
|
||||
print_error( err, "Mem object was not properly released" );
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
109
test_conformance/api/test_retain_program.c
Normal file
109
test_conformance/api/test_retain_program.c
Normal file
@@ -0,0 +1,109 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
|
||||
int test_release_kernel_order(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
int error;
|
||||
const char *testProgram[] = { "__kernel void sample_test(__global int *data){}" };
|
||||
|
||||
/* Create a test program */
|
||||
program = clCreateProgramWithSource( context, 1, testProgram, NULL, &error);
|
||||
test_error( error, "Unable to create program to test with" );
|
||||
|
||||
/* Compile the program */
|
||||
error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
|
||||
test_error( error, "Unable to build sample program to test with" );
|
||||
|
||||
/* And create a kernel from it */
|
||||
kernel = clCreateKernel( program, "sample_test", &error );
|
||||
test_error( error, "Unable to create kernel" );
|
||||
|
||||
/* Now try freeing the program first, then the kernel. If refcounts are right, this should work just fine */
|
||||
clReleaseProgram( program );
|
||||
clReleaseKernel( kernel );
|
||||
|
||||
/* If we got here fine, we succeeded. If not, well, we won't be able to return an error :) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *sample_delay_kernel[] = {
|
||||
"__kernel void sample_test(__global float *src, __global int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" for( int i = 0; i < 1000000; i++ ); \n"
|
||||
" dst[tid] = (int)src[tid];\n"
|
||||
"\n"
|
||||
"}\n" };
|
||||
|
||||
int test_release_during_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
int error;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
cl_mem streams[2];
|
||||
size_t threads[1] = { 10 }, localThreadSize;
|
||||
|
||||
|
||||
/* We now need an event to test. So we'll execute a kernel to get one */
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, sample_delay_kernel, "sample_test" ) )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
streams[1] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_int) * 10, NULL, &error);
|
||||
test_error( error, "Creating test array failed" );
|
||||
|
||||
/* Set the arguments */
|
||||
error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[ 0 ]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[ 1 ]);
|
||||
test_error( error, "Unable to set indexed kernel arguments" );
|
||||
|
||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreadSize );
|
||||
test_error( error, "Unable to calc local thread size" );
|
||||
|
||||
|
||||
/* Execute the kernel */
|
||||
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &localThreadSize, 0, NULL, NULL );
|
||||
test_error( error, "Unable to execute test kernel" );
|
||||
|
||||
/* The kernel should still be executing, but we should still be able to release it. It's not terribly
|
||||
useful, but we should be able to do it, if the internal refcounting is indeed correct. */
|
||||
|
||||
clReleaseMemObject( streams[ 1 ] );
|
||||
clReleaseMemObject( streams[ 0 ] );
|
||||
clReleaseKernel( kernel );
|
||||
clReleaseProgram( program );
|
||||
|
||||
/* Now make sure we're really finished before we go on. */
|
||||
error = clFinish(queue);
|
||||
test_error( error, "Unable to finish context.");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
28
test_conformance/atomics/CMakeLists.txt
Normal file
28
test_conformance/atomics/CMakeLists.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
add_executable(conformance_test_atomics
|
||||
main.c
|
||||
test_atomics.cpp
|
||||
test_indexed_cases.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
main.c
|
||||
test_atomics.cpp
|
||||
test_indexed_cases.c
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
|
||||
TARGET_LINK_LIBRARIES(conformance_test_atomics
|
||||
${CLConform_LIBRARIES})
|
||||
17
test_conformance/atomics/Jamfile
Normal file
17
test_conformance/atomics/Jamfile
Normal file
@@ -0,0 +1,17 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_atomics
|
||||
: main.c
|
||||
test_atomics.c
|
||||
test_indexed_cases.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_atomics
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/atomics
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/atomics
|
||||
;
|
||||
44
test_conformance/atomics/Makefile
Normal file
44
test_conformance/atomics/Makefile
Normal file
@@ -0,0 +1,44 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_atomics.cpp \
|
||||
test_indexed_cases.c \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c \
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_atomics
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
78
test_conformance/atomics/main.c
Normal file
78
test_conformance/atomics/main.c
Normal file
@@ -0,0 +1,78 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "procs.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_atomic_add,
|
||||
test_atomic_sub,
|
||||
test_atomic_xchg,
|
||||
test_atomic_min,
|
||||
test_atomic_max,
|
||||
test_atomic_inc,
|
||||
test_atomic_dec,
|
||||
test_atomic_cmpxchg,
|
||||
test_atomic_and,
|
||||
test_atomic_or,
|
||||
test_atomic_xor,
|
||||
|
||||
test_atomic_add_index,
|
||||
test_atomic_add_index_bin
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"atomic_add",
|
||||
"atomic_sub",
|
||||
"atomic_xchg",
|
||||
"atomic_min",
|
||||
"atomic_max",
|
||||
"atomic_inc",
|
||||
"atomic_dec",
|
||||
"atomic_cmpxchg",
|
||||
"atomic_and",
|
||||
"atomic_or",
|
||||
"atomic_xor",
|
||||
|
||||
"atomic_add_index",
|
||||
"atomic_add_index_bin",
|
||||
|
||||
"all",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
return runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
}
|
||||
|
||||
|
||||
39
test_conformance/atomics/procs.h
Normal file
39
test_conformance/atomics/procs.h
Normal file
@@ -0,0 +1,39 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/threadTesting.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
|
||||
extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
|
||||
|
||||
extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
|
||||
36
test_conformance/atomics/testBase.h
Normal file
36
test_conformance/atomics/testBase.h
Normal file
@@ -0,0 +1,36 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef _testBase_h
|
||||
#define _testBase_h
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
#endif // _testBase_h
|
||||
|
||||
|
||||
|
||||
1124
test_conformance/atomics/test_atomics.cpp
Normal file
1124
test_conformance/atomics/test_atomics.cpp
Normal file
File diff suppressed because it is too large
Load Diff
380
test_conformance/atomics/test_indexed_cases.c
Normal file
380
test_conformance/atomics/test_indexed_cases.c
Normal file
@@ -0,0 +1,380 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "testBase.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
|
||||
extern cl_uint gRandomSeed;
|
||||
|
||||
const char * atomic_index_source =
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"// Counter keeps track of which index in counts we are using.\n"
|
||||
"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
|
||||
"// At the end of this we should have all thread IDs in some random location in counts\n"
|
||||
"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
|
||||
"// will be missing some.\n"
|
||||
"\n"
|
||||
"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
" \n"
|
||||
" int counter_to_use = atom_add(counter, 1);\n"
|
||||
" counts[counter_to_use] = tid;\n"
|
||||
"}";
|
||||
|
||||
int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
clMemWrapper counter, counters;
|
||||
size_t numGlobalThreads, numLocalThreads;
|
||||
int fail = 0, succeed = 0, err;
|
||||
|
||||
/* Check if atomics are supported. */
|
||||
if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
|
||||
log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
//===== add_index test
|
||||
// The index test replicates what particles does.
|
||||
// It uses one memory location to keep track of the current index and then each thread
|
||||
// does an atomic add to it to get its new location. The threads then write to their
|
||||
// assigned location. At the end we check to make sure that each thread's ID shows up
|
||||
// exactly once in the output.
|
||||
|
||||
numGlobalThreads = 2048;
|
||||
|
||||
if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
|
||||
return -1;
|
||||
|
||||
if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
|
||||
return -1;
|
||||
|
||||
log_info("Execute global_threads:%d local_threads:%d\n",
|
||||
(int)numGlobalThreads, (int)numLocalThreads);
|
||||
|
||||
// Create the counter that will keep track of where each thread writes.
|
||||
counter = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * 1, NULL, NULL);
|
||||
// Create the counters that will hold the results of each thread writing
|
||||
// its ID into a (hopefully) unique location.
|
||||
counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
|
||||
|
||||
// Reset all those locations to -1 to indciate they have not been used.
|
||||
cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
|
||||
if (values == NULL) {
|
||||
log_error("add_index_test FAILED to allocate memory for initial values.\n");
|
||||
fail = 1; succeed = -1;
|
||||
} else {
|
||||
memset(values, -1, numLocalThreads);
|
||||
unsigned int i=0;
|
||||
for (i=0; i<numGlobalThreads; i++)
|
||||
values[i] = -1;
|
||||
int init=0;
|
||||
err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
|
||||
err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to execute kernel: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
} else {
|
||||
err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_test FAILED to read back results: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
} else {
|
||||
unsigned int looking_for, index;
|
||||
for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
|
||||
int instances_found=0;
|
||||
for (index=0; index<numGlobalThreads; index++) {
|
||||
if (values[index]==(int)looking_for)
|
||||
instances_found++;
|
||||
}
|
||||
if (instances_found != 1) {
|
||||
log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!fail) {
|
||||
log_info("add_index_test passed. Each thread used exactly one index.\n");
|
||||
}
|
||||
free(values);
|
||||
}
|
||||
return fail;
|
||||
}
|
||||
|
||||
const char *add_index_bin_kernel[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
|
||||
"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
|
||||
"// using an atomic add to keep track of the current location to write into in each bin.\n"
|
||||
"// This is the same as the memory update for the particles demo.\n"
|
||||
"\n"
|
||||
"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" int location = bin_assignments[tid];\n"
|
||||
" int counter = atom_add(&bin_counters[location], 1);\n"
|
||||
" bins[location*max_counts_per_bin + counter] = tid;\n"
|
||||
"}" };
|
||||
|
||||
// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
|
||||
// using an atomic add to keep track of the current location to write into in each bin.
|
||||
// This is the same as the memory update for the particles demo.
|
||||
int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
|
||||
{
|
||||
int number_of_items = (int)global_threads[0];
|
||||
size_t local_threads[1];
|
||||
int divisor = 12;
|
||||
int number_of_bins = number_of_items/divisor;
|
||||
int max_counts_per_bin = divisor*2;
|
||||
|
||||
int fail = 0;
|
||||
int succeed = 0;
|
||||
int err;
|
||||
|
||||
clProgramWrapper program;
|
||||
clKernelWrapper kernel;
|
||||
|
||||
// log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
|
||||
// number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
|
||||
|
||||
//===== add_index_bin test
|
||||
// The index test replicates what particles does.
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
|
||||
test_error( err, "Unable to create testing kernel" );
|
||||
|
||||
if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
|
||||
return -1;
|
||||
|
||||
log_info("Execute global_threads:%d local_threads:%d\n",
|
||||
(int)global_threads[0], (int)local_threads[0]);
|
||||
|
||||
// Allocate our storage
|
||||
cl_mem bin_counters = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * number_of_bins, NULL, NULL);
|
||||
cl_mem bins = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),
|
||||
sizeof(cl_int) * number_of_bins*max_counts_per_bin, NULL, NULL);
|
||||
cl_mem bin_assignments = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_ONLY),
|
||||
sizeof(cl_int) * number_of_items, NULL, NULL);
|
||||
|
||||
if (bin_counters == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bins == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bins.\n");
|
||||
return -1;
|
||||
}
|
||||
if (bin_assignments == NULL) {
|
||||
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize our storage
|
||||
cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
|
||||
if (!l_bin_counts) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
|
||||
return -1;
|
||||
}
|
||||
int i;
|
||||
for (i=0; i<number_of_bins; i++)
|
||||
l_bin_counts[i] = 0;
|
||||
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
|
||||
cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
|
||||
if (!values) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<number_of_bins*max_counts_per_bin; i++)
|
||||
values[i] = -1;
|
||||
err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
free(values);
|
||||
|
||||
cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
|
||||
if (!l_bin_assignments) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
for (i=0; i<number_of_items; i++) {
|
||||
int bin = random_in_range(0, number_of_bins-1, d);
|
||||
while (l_bin_counts[bin] >= max_counts_per_bin) {
|
||||
bin = random_in_range(0, number_of_bins-1, d);
|
||||
}
|
||||
if (bin >= number_of_bins)
|
||||
log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
|
||||
if (l_bin_counts[bin]+1 > max_counts_per_bin)
|
||||
log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
|
||||
l_bin_counts[bin]++;
|
||||
l_bin_assignments[i] = bin;
|
||||
// log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
|
||||
}
|
||||
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
|
||||
return -1;
|
||||
}
|
||||
// Setup the kernel
|
||||
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
|
||||
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
|
||||
err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
|
||||
fail=1; succeed=-1;
|
||||
}
|
||||
|
||||
cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
|
||||
if (!final_bin_assignments) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
|
||||
cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
|
||||
if (!final_bin_counts) {
|
||||
log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
|
||||
return -1;
|
||||
}
|
||||
err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
|
||||
if (err) {
|
||||
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
|
||||
fail = 1; succeed=-1;
|
||||
}
|
||||
|
||||
// Verification.
|
||||
int errors=0;
|
||||
int current_bin;
|
||||
int search;
|
||||
// Print out all the contents of the bins.
|
||||
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
|
||||
// for (search=0; search<max_counts_per_bin; search++)
|
||||
// log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
|
||||
|
||||
// First verify that there are the correct number in each bin.
|
||||
for (current_bin=0; current_bin<number_of_bins; current_bin++) {
|
||||
int expected_number = l_bin_counts[current_bin];
|
||||
int actual_number = final_bin_counts[current_bin];
|
||||
if (expected_number != actual_number) {
|
||||
log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
|
||||
errors++;
|
||||
}
|
||||
for (search=0; search<expected_number; search++) {
|
||||
if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
|
||||
log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
for (search=expected_number; search<max_counts_per_bin; search++) {
|
||||
if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
|
||||
log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Now verify that the correct ones are in each bin
|
||||
int index;
|
||||
for (index=0; index<number_of_items; index++) {
|
||||
int expected_bin = l_bin_assignments[index];
|
||||
int found_it = 0;
|
||||
for (search=0; search<l_bin_counts[expected_bin]; search++) {
|
||||
if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
|
||||
found_it = 1;
|
||||
}
|
||||
}
|
||||
if (found_it == 0) {
|
||||
log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
free(l_bin_counts);
|
||||
free(l_bin_assignments);
|
||||
free(final_bin_assignments);
|
||||
free(final_bin_counts);
|
||||
clReleaseMemObject(bin_counters);
|
||||
clReleaseMemObject(bins);
|
||||
clReleaseMemObject(bin_assignments);
|
||||
if (errors == 0) {
|
||||
log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
|
||||
return 0;
|
||||
} else {
|
||||
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||
{
|
||||
//===== add_index_bin test
|
||||
size_t numGlobalThreads = 2048;
|
||||
int iteration=0;
|
||||
int err, failed = 0;
|
||||
MTdata d = init_genrand( gRandomSeed );
|
||||
|
||||
/* Check if atomics are supported. */
|
||||
if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
|
||||
log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
|
||||
free_mtdata( d );
|
||||
return 0;
|
||||
}
|
||||
|
||||
for(iteration=0; iteration<10; iteration++) {
|
||||
log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
|
||||
err = add_index_bin_test(&numGlobalThreads, queue, context, d);
|
||||
if (err) {
|
||||
failed++;
|
||||
break;
|
||||
}
|
||||
numGlobalThreads*=2;
|
||||
}
|
||||
free_mtdata( d );
|
||||
return failed;
|
||||
}
|
||||
|
||||
|
||||
121
test_conformance/basic/CMakeLists.txt
Normal file
121
test_conformance/basic/CMakeLists.txt
Normal file
@@ -0,0 +1,121 @@
|
||||
add_executable(conformance_test_basic
|
||||
main.c
|
||||
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
|
||||
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
|
||||
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
|
||||
test_hiloeo.c test_local.c test_pointercast.c
|
||||
test_if.c test_loop.c
|
||||
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
|
||||
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
|
||||
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
|
||||
test_multireadimageonefmt.c test_multireadimagemultifmt.c
|
||||
test_imagedim.c
|
||||
test_vloadstore.c
|
||||
test_int2float.c test_float2int.c
|
||||
test_createkernelsinprogram.c
|
||||
test_hostptr.c
|
||||
test_explicit_s2v.cpp
|
||||
test_constant.c
|
||||
test_image_multipass.c
|
||||
test_imagereadwrite.c test_imagereadwrite3d.c
|
||||
test_image_param.c
|
||||
test_imagenpot.c
|
||||
test_image_r8.c
|
||||
test_barrier.c
|
||||
test_basic_parameter_types.c
|
||||
test_arrayreadwrite.c
|
||||
test_arraycopy.c
|
||||
test_imagearraycopy.c
|
||||
test_imagearraycopy3d.c
|
||||
test_imagecopy.c
|
||||
test_imagerandomcopy.c
|
||||
test_arrayimagecopy.c
|
||||
test_arrayimagecopy3d.c
|
||||
test_imagecopy3d.c
|
||||
test_enqueue_map.cpp
|
||||
test_work_item_functions.cpp
|
||||
test_astype.cpp
|
||||
test_async_copy.cpp
|
||||
test_sizeof.c
|
||||
test_vector_creation.cpp
|
||||
test_vec_type_hint.c
|
||||
test_numeric_constants.cpp
|
||||
test_constant_source.cpp
|
||||
test_bufferreadwriterect.c
|
||||
test_async_strided_copy.cpp
|
||||
test_preprocessors.cpp
|
||||
test_kernel_memory_alignment.cpp
|
||||
test_global_work_offsets.cpp
|
||||
test_kernel_call_kernel_function.cpp
|
||||
test_local_kernel_scope.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/typeWrappers.cpp
|
||||
../../test_common/harness/imageHelpers.cpp
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/rounding_mode.c
|
||||
../../test_common/harness/msvc9.c
|
||||
)
|
||||
|
||||
set_source_files_properties(
|
||||
main.c
|
||||
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c
|
||||
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c
|
||||
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c
|
||||
test_hiloeo.c test_local.c test_pointercast.c
|
||||
test_if.c test_loop.c
|
||||
test_readimage.c test_readimage_int16.c test_readimage_fp32.c
|
||||
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c
|
||||
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c
|
||||
test_multireadimageonefmt.c test_multireadimagemultifmt.c
|
||||
test_imagedim.c
|
||||
test_vloadstore.c
|
||||
test_int2float.c test_float2int.c
|
||||
test_createkernelsinprogram.c
|
||||
test_hostptr.c
|
||||
test_constant.c
|
||||
test_image_multipass.c
|
||||
test_imagereadwrite.c test_imagereadwrite3d.c
|
||||
test_image_param.c
|
||||
test_imagenpot.c
|
||||
test_image_r8.c
|
||||
test_barrier.c
|
||||
test_basic_parameter_types.c
|
||||
test_arrayreadwrite.c
|
||||
test_arraycopy.c
|
||||
test_imagearraycopy.c
|
||||
test_imagearraycopy3d.c
|
||||
test_imagecopy.c
|
||||
test_imagerandomcopy.c
|
||||
test_arrayimagecopy.c
|
||||
test_arrayimagecopy3d.c
|
||||
test_imagecopy3d.c
|
||||
test_enqueue_map.cpp
|
||||
test_work_item_functions.cpp
|
||||
test_astype.cpp
|
||||
test_async_copy.cpp
|
||||
test_sizeof.c
|
||||
test_vector_creation.cpp
|
||||
test_vec_type_hint.c
|
||||
test_constant_source.cpp
|
||||
test_bufferreadwriterect.c
|
||||
test_async_strided_copy.cpp
|
||||
test_preprocessors.cpp
|
||||
test_kernel_memory_alignment.cpp
|
||||
test_global_work_offsets.cpp
|
||||
test_local_kernel_scope.cpp
|
||||
../../test_common/harness/errorHelpers.c
|
||||
../../test_common/harness/threadTesting.c
|
||||
../../test_common/harness/testHarness.c
|
||||
../../test_common/harness/kernelHelpers.c
|
||||
../../test_common/harness/mt19937.c
|
||||
../../test_common/harness/conversions.c
|
||||
../../test_common/harness/msvc9.c
|
||||
../../test_common/harness/rounding_mode.c
|
||||
PROPERTIES LANGUAGE CXX)
|
||||
|
||||
TARGET_LINK_LIBRARIES(conformance_test_basic
|
||||
${CLConform_LIBRARIES})
|
||||
75
test_conformance/basic/Jamfile
Normal file
75
test_conformance/basic/Jamfile
Normal file
@@ -0,0 +1,75 @@
|
||||
project
|
||||
: requirements
|
||||
<toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe test_basic
|
||||
: main.c
|
||||
test_arraycopy.c
|
||||
test_arrayimagecopy3d.c
|
||||
test_arrayimagecopy.c
|
||||
test_arrayreadwrite.c
|
||||
test_astype.cpp
|
||||
test_async_copy.cpp
|
||||
test_barrier.c
|
||||
test_basic_parameter_types.c
|
||||
test_constant.c
|
||||
test_createkernelsinprogram.c
|
||||
test_enqueue_map.cpp
|
||||
test_explicit_s2v.cpp
|
||||
test_float2int.c
|
||||
test_fpmath_float2.c
|
||||
test_fpmath_float4.c
|
||||
test_fpmath_float.c
|
||||
test_hiloeo.c
|
||||
test_hostptr.c
|
||||
test_if.c
|
||||
test_imagearraycopy3d.c
|
||||
test_imagearraycopy.c
|
||||
test_imagecopy3d.c
|
||||
test_imagecopy.c
|
||||
test_imagedim.c
|
||||
test_image_multipass.c
|
||||
test_imagenpot.c
|
||||
test_image_param.c
|
||||
test_image_r8.c
|
||||
test_imagerandomcopy.c
|
||||
test_imagereadwrite3d.c
|
||||
test_imagereadwrite.c
|
||||
test_int2float.c
|
||||
test_intmath_int2.c
|
||||
test_intmath_int4.c
|
||||
test_intmath_int.c
|
||||
test_intmath_long2.c
|
||||
test_intmath_long4.c
|
||||
test_intmath_long.c
|
||||
test_local.c
|
||||
test_loop.c
|
||||
test_multireadimagemultifmt.c
|
||||
test_multireadimageonefmt.c
|
||||
test_pointercast.c
|
||||
test_readimage3d.c
|
||||
test_readimage3d_fp32.c
|
||||
test_readimage3d_int16.c
|
||||
test_readimage.c
|
||||
test_readimage_fp32.c
|
||||
test_readimage_int16.c
|
||||
test_sizeof.c
|
||||
test_vec_type_hint.c
|
||||
test_vector_creation.cpp
|
||||
test_vloadstore.c
|
||||
test_work_item_functions.cpp
|
||||
test_writeimage.c
|
||||
test_writeimage_fp32.c
|
||||
test_writeimage_int16.c
|
||||
test_numeric_constants.cpp
|
||||
test_kernel_call_kernel_function.cpp
|
||||
;
|
||||
|
||||
install dist
|
||||
: test_basic
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/basic
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/basic
|
||||
;
|
||||
|
||||
94
test_conformance/basic/Makefile
Normal file
94
test_conformance/basic/Makefile
Normal file
@@ -0,0 +1,94 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
SRCS = main.c \
|
||||
test_fpmath_float.c test_fpmath_float2.c test_fpmath_float4.c \
|
||||
test_intmath_int.c test_intmath_int2.c test_intmath_int4.c \
|
||||
test_intmath_long.c test_intmath_long2.c test_intmath_long4.c \
|
||||
test_hiloeo.c test_local.c test_local_kernel_scope.cpp test_pointercast.c \
|
||||
test_if.c test_sizeof.c test_loop.c \
|
||||
test_readimage.c test_readimage_int16.c test_readimage_fp32.c \
|
||||
test_readimage3d.c test_readimage3d_int16.c test_readimage3d_fp32.c \
|
||||
test_writeimage.c test_writeimage_int16.c test_writeimage_fp32.c \
|
||||
test_multireadimageonefmt.c test_multireadimagemultifmt.c \
|
||||
test_imagedim.c \
|
||||
test_vloadstore.c \
|
||||
test_int2float.c test_float2int.c \
|
||||
test_createkernelsinprogram.c \
|
||||
test_hostptr.c \
|
||||
test_explicit_s2v.cpp \
|
||||
test_constant.c \
|
||||
test_constant_source.cpp \
|
||||
test_image_multipass.c \
|
||||
test_imagereadwrite.c test_imagereadwrite3d.c \
|
||||
test_bufferreadwriterect.c \
|
||||
test_image_param.c \
|
||||
test_imagenpot.c \
|
||||
test_image_r8.c \
|
||||
test_barrier.c \
|
||||
test_arrayreadwrite.c \
|
||||
test_arraycopy.c \
|
||||
test_imagearraycopy.c \
|
||||
test_imagearraycopy3d.c \
|
||||
test_imagecopy.c \
|
||||
test_imagerandomcopy.c \
|
||||
test_arrayimagecopy.c \
|
||||
test_arrayimagecopy3d.c\
|
||||
test_imagecopy3d.c \
|
||||
test_enqueue_map.cpp \
|
||||
test_work_item_functions.cpp \
|
||||
test_astype.cpp \
|
||||
test_async_copy.cpp \
|
||||
test_async_strided_copy.cpp \
|
||||
test_numeric_constants.cpp \
|
||||
test_kernel_call_kernel_function.cpp \
|
||||
test_basic_parameter_types.c \
|
||||
test_vector_creation.cpp \
|
||||
test_vec_type_hint.c \
|
||||
test_preprocessors.cpp \
|
||||
test_kernel_memory_alignment.cpp \
|
||||
test_global_work_offsets.cpp \
|
||||
../../test_common/harness/errorHelpers.c \
|
||||
../../test_common/harness/threadTesting.c \
|
||||
../../test_common/harness/testHarness.c \
|
||||
../../test_common/harness/rounding_mode.c \
|
||||
../../test_common/harness/kernelHelpers.c \
|
||||
../../test_common/harness/typeWrappers.cpp \
|
||||
../../test_common/harness/imageHelpers.cpp \
|
||||
../../test_common/harness/mt19937.c \
|
||||
../../test_common/harness/conversions.c
|
||||
|
||||
DEFINES =
|
||||
|
||||
SOURCES = $(abspath $(SRCS))
|
||||
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||
LIBPATH += -L.
|
||||
FRAMEWORK = $(SOURCES)
|
||||
HEADERS =
|
||||
TARGET = test_basic
|
||||
INCLUDE =
|
||||
COMPILERFLAGS = -c -Wall -g -O0 -Wshorten-64-to-32
|
||||
CC = c++
|
||||
CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
CXXFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} $(DEFINES:%=-D%) $(INCLUDE)
|
||||
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||
|
||||
OBJECTS := ${SOURCES:.c=.o}
|
||||
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||
|
||||
TARGETOBJECT =
|
||||
all: $(TARGET)
|
||||
|
||||
$(TARGET): $(OBJECTS)
|
||||
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJECTS)
|
||||
|
||||
.DEFAULT:
|
||||
@echo The target \"$@\" does not exist in Makefile.
|
||||
|
||||
|
||||
|
||||
264
test_conformance/basic/main.c
Normal file
264
test_conformance/basic/main.c
Normal file
@@ -0,0 +1,264 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "procs.h"
|
||||
|
||||
basefn basefn_list[] = {
|
||||
test_hostptr,
|
||||
test_fpmath_float,
|
||||
test_fpmath_float2,
|
||||
test_fpmath_float4,
|
||||
test_intmath_int,
|
||||
test_intmath_int2,
|
||||
test_intmath_int4,
|
||||
test_intmath_long,
|
||||
test_intmath_long2,
|
||||
test_intmath_long4,
|
||||
test_hiloeo,
|
||||
test_if,
|
||||
test_sizeof,
|
||||
test_loop,
|
||||
test_pointer_cast,
|
||||
test_local_arg_def,
|
||||
test_local_kernel_def,
|
||||
test_local_kernel_scope,
|
||||
test_constant,
|
||||
test_constant_source,
|
||||
test_readimage,
|
||||
test_readimage_int16,
|
||||
test_readimage_fp32,
|
||||
test_writeimage,
|
||||
test_writeimage_int16,
|
||||
test_writeimage_fp32,
|
||||
test_multireadimageonefmt,
|
||||
|
||||
test_multireadimagemultifmt,
|
||||
test_image_r8,
|
||||
test_barrier,
|
||||
test_int2float,
|
||||
test_float2int,
|
||||
test_imagereadwrite,
|
||||
test_imagereadwrite3d,
|
||||
test_readimage3d,
|
||||
test_readimage3d_int16,
|
||||
test_readimage3d_fp32,
|
||||
test_bufferreadwriterect,
|
||||
test_arrayreadwrite,
|
||||
test_arraycopy,
|
||||
test_imagearraycopy,
|
||||
test_imagearraycopy3d,
|
||||
test_imagecopy,
|
||||
test_imagecopy3d,
|
||||
test_imagerandomcopy,
|
||||
test_arrayimagecopy,
|
||||
test_arrayimagecopy3d,
|
||||
test_imagenpot,
|
||||
|
||||
test_vload_global,
|
||||
test_vload_local,
|
||||
test_vload_constant,
|
||||
test_vload_private,
|
||||
test_vstore_global,
|
||||
test_vstore_local,
|
||||
test_vstore_private,
|
||||
|
||||
test_createkernelsinprogram,
|
||||
test_imagedim_pow2,
|
||||
test_imagedim_non_pow2,
|
||||
test_image_param,
|
||||
test_image_multipass_integer_coord,
|
||||
test_image_multipass_float_coord,
|
||||
test_explicit_s2v_bool,
|
||||
test_explicit_s2v_char,
|
||||
test_explicit_s2v_uchar,
|
||||
test_explicit_s2v_short,
|
||||
test_explicit_s2v_ushort,
|
||||
test_explicit_s2v_int,
|
||||
test_explicit_s2v_uint,
|
||||
test_explicit_s2v_long,
|
||||
test_explicit_s2v_ulong,
|
||||
test_explicit_s2v_float,
|
||||
test_explicit_s2v_double,
|
||||
|
||||
test_enqueue_map_buffer,
|
||||
test_enqueue_map_image,
|
||||
|
||||
test_work_item_functions,
|
||||
|
||||
test_astype,
|
||||
|
||||
test_async_copy_global_to_local,
|
||||
test_async_copy_local_to_global,
|
||||
test_async_strided_copy_global_to_local,
|
||||
test_async_strided_copy_local_to_global,
|
||||
test_prefetch,
|
||||
|
||||
test_kernel_call_kernel_function,
|
||||
test_host_numeric_constants,
|
||||
test_kernel_numeric_constants,
|
||||
test_kernel_limit_constants,
|
||||
test_kernel_preprocessor_macros,
|
||||
|
||||
test_basic_parameter_types,
|
||||
test_vector_creation,
|
||||
test_vec_type_hint,
|
||||
test_kernel_memory_alignment_local,
|
||||
test_kernel_memory_alignment_global,
|
||||
test_kernel_memory_alignment_constant,
|
||||
test_kernel_memory_alignment_private,
|
||||
|
||||
test_global_work_offsets,
|
||||
test_get_global_offset
|
||||
};
|
||||
|
||||
const char *basefn_names[] = {
|
||||
"hostptr",
|
||||
"fpmath_float",
|
||||
"fpmath_float2",
|
||||
"fpmath_float4",
|
||||
"intmath_int",
|
||||
"intmath_int2",
|
||||
"intmath_int4",
|
||||
"intmath_long",
|
||||
"intmath_long2",
|
||||
"intmath_long4",
|
||||
"hiloeo",
|
||||
"if",
|
||||
"sizeof",
|
||||
"loop",
|
||||
"pointer_cast",
|
||||
"local_arg_def",
|
||||
"local_kernel_def",
|
||||
"local_kernel_scope",
|
||||
"constant",
|
||||
"constant_source",
|
||||
"readimage",
|
||||
"readimage_int16",
|
||||
"readimage_fp32",
|
||||
"writeimage",
|
||||
"writeimage_int16",
|
||||
"writeimage_fp32",
|
||||
"mri_one",
|
||||
|
||||
"mri_multiple",
|
||||
"image_r8",
|
||||
"barrier",
|
||||
"int2float",
|
||||
"float2int",
|
||||
"imagereadwrite",
|
||||
"imagereadwrite3d",
|
||||
"readimage3d",
|
||||
"readimage3d_int16",
|
||||
"readimage3d_fp32",
|
||||
"bufferreadwriterect",
|
||||
"arrayreadwrite",
|
||||
"arraycopy",
|
||||
"imagearraycopy",
|
||||
"imagearraycopy3d",
|
||||
"imagecopy",
|
||||
"imagecopy3d",
|
||||
"imagerandomcopy",
|
||||
"arrayimagecopy",
|
||||
"arrayimagecopy3d",
|
||||
"imagenpot",
|
||||
|
||||
"vload_global",
|
||||
"vload_local",
|
||||
"vload_constant",
|
||||
"vload_private",
|
||||
"vstore_global",
|
||||
"vstore_local",
|
||||
"vstore_private",
|
||||
|
||||
"createkernelsinprogram",
|
||||
"imagedim_pow2",
|
||||
"imagedim_non_pow2",
|
||||
"image_param",
|
||||
"image_multipass_integer_coord",
|
||||
"image_multipass_float_coord",
|
||||
"explicit_s2v_bool",
|
||||
"explicit_s2v_char",
|
||||
"explicit_s2v_uchar",
|
||||
"explicit_s2v_short",
|
||||
"explicit_s2v_ushort",
|
||||
"explicit_s2v_int",
|
||||
"explicit_s2v_uint",
|
||||
"explicit_s2v_long",
|
||||
"explicit_s2v_ulong",
|
||||
"explicit_s2v_float",
|
||||
"explicit_s2v_double",
|
||||
|
||||
"enqueue_map_buffer",
|
||||
"enqueue_map_image",
|
||||
|
||||
"work_item_functions",
|
||||
|
||||
"astype",
|
||||
|
||||
"async_copy_global_to_local",
|
||||
"async_copy_local_to_global",
|
||||
"async_strided_copy_global_to_local",
|
||||
"async_strided_copy_local_to_global",
|
||||
"prefetch",
|
||||
|
||||
"kernel_call_kernel_function",
|
||||
"host_numeric_constants",
|
||||
"kernel_numeric_constants",
|
||||
"kernel_limit_constants",
|
||||
"kernel_preprocessor_macros",
|
||||
|
||||
"parameter_types",
|
||||
|
||||
"vector_creation",
|
||||
"vec_type_hint",
|
||||
|
||||
"kernel_memory_alignment_local",
|
||||
"kernel_memory_alignment_global",
|
||||
"kernel_memory_alignment_constant",
|
||||
"kernel_memory_alignment_private",
|
||||
|
||||
"global_work_offsets",
|
||||
"get_global_offset",
|
||||
|
||||
"all",
|
||||
};
|
||||
|
||||
ct_assert((sizeof(basefn_names) / sizeof(basefn_names[0]) - 1) == (sizeof(basefn_list) / sizeof(basefn_list[0])));
|
||||
|
||||
int num_fns = sizeof(basefn_names) / sizeof(char *);
|
||||
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
int err = runTestHarness( argc, argv, num_fns, basefn_list, basefn_names, false, false, 0 );
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
142
test_conformance/basic/procs.h
Normal file
142
test_conformance/basic/procs.h
Normal file
@@ -0,0 +1,142 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "../../test_common/harness/kernelHelpers.h"
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#include "../../test_common/harness/errorHelpers.h"
|
||||
#include "../../test_common/harness/typeWrappers.h"
|
||||
#include "../../test_common/harness/conversions.h"
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
|
||||
extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
||||
|
||||
extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_intmath_long4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_hiloeo(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_if(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sizeof(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_loop(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_pointer_cast(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_arg_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_def(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_local_kernel_scope(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_constant_source(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_writeimage(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_writeimage_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_writeimage_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_multireadimageonefmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_multireadimagemultifmt(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagereadwrite3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage3d_int16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_readimage3d_fp32(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_bufferreadwriterect(cl_device_id device, cl_context context, cl_command_queue queue_, int num_elements);
|
||||
extern int test_imagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagerandomcopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
|
||||
extern int test_arrayimagecopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_arrayimagecopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagenpot(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sampler_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_sampler_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_createkernelsinprogram(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_single_large_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_multiple_max_allocation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_arrayreadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagedim_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_imagedim_non_pow2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_param(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_multipass_integer_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_image_multipass_float_coord(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_vload_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vload_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vload_constant(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vload_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vstore_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vstore_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vstore_private(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_explicit_s2v_bool(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_explicit_s2v_double(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_astype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_native_kernel(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_async_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_async_strided_copy_local_to_global(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_prefetch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_host_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_numeric_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_kernel_limit_constants(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_kernel_call_kernel_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
extern int test_basic_parameter_types(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
extern int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
||||
|
||||
|
||||
extern int test_kernel_memory_alignment_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_kernel_memory_alignment_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_kernel_memory_alignment_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_kernel_memory_alignment_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
extern int test_global_work_offsets(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
extern int test_get_global_offset(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems );
|
||||
|
||||
|
||||
|
||||
3
test_conformance/basic/run_array
Normal file
3
test_conformance/basic/run_array
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic arrayreadwrite arraycopy bufferreadwriterect $@
|
||||
3
test_conformance/basic/run_array_image_copy
Normal file
3
test_conformance/basic/run_array_image_copy
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic arrayimagecopy arrayimagecopy3d imagearraycopy
|
||||
17
test_conformance/basic/run_image
Normal file
17
test_conformance/basic/run_image
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic \
|
||||
imagecopy imagerandomcopy \
|
||||
imagearraycopy imagearraycopy3d \
|
||||
image_r8 \
|
||||
readimage readimage_int16 readimage_fp32 \
|
||||
writeimage writeimage_int16 writeimage_fp32 \
|
||||
imagenpot \
|
||||
image_param \
|
||||
image_multipass_integer_coord \
|
||||
readimage3d \
|
||||
readimage3d_int16 \
|
||||
readimage3d_fp32 \
|
||||
imagereadwrite3d \
|
||||
imagereadwrite \
|
||||
$@
|
||||
4
test_conformance/basic/run_multi_read_image
Normal file
4
test_conformance/basic/run_multi_read_image
Normal file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
cd `dirname $0`
|
||||
./test_basic mri_one mri_multiple
|
||||
|
||||
206
test_conformance/basic/test_arraycopy.c
Normal file
206
test_conformance/basic/test_arraycopy.c
Normal file
@@ -0,0 +1,206 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
|
||||
#include "procs.h"
|
||||
|
||||
const char *copy_kernel_code =
|
||||
"__kernel void test_copy(__global unsigned int *src, __global unsigned int *dst)\n"
|
||||
"{\n"
|
||||
" int tid = get_global_id(0);\n"
|
||||
"\n"
|
||||
" dst[tid] = src[tid];\n"
|
||||
"}\n";
|
||||
|
||||
int
|
||||
test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
|
||||
{
|
||||
cl_uint *input_ptr, *output_ptr;
|
||||
cl_mem streams[4], results;
|
||||
cl_program program;
|
||||
cl_kernel kernel;
|
||||
unsigned num_elements = 128 * 1024;
|
||||
cl_uint num_copies = 1;
|
||||
size_t delta_offset;
|
||||
unsigned i;
|
||||
cl_int err;
|
||||
MTdata d;
|
||||
|
||||
int error_count = 0;
|
||||
|
||||
input_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
|
||||
|
||||
// results
|
||||
results = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
/*****************************************************************************************************************************************/
|
||||
#pragma mark client backing
|
||||
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
d = init_genrand( gRandomSeed );
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
// client backing
|
||||
streams[0] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
delta_offset = num_elements * sizeof(cl_uint) / num_copies;
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
err = clEnqueueCopyBuffer(queue, streams[0], results, offset, offset, delta_offset, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
|
||||
// Try upload from client backing
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with clEnqueueCopyBuffer passed\n");
|
||||
|
||||
|
||||
|
||||
#pragma mark framework backing (no client data)
|
||||
|
||||
log_info("Testing with clEnqueueWriteBuffer and clEnqueueCopyBuffer\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
|
||||
// no backing
|
||||
streams[2] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE) , sizeof(cl_uint) * num_elements, NULL, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
for (i=0; i<num_copies; i++)
|
||||
{
|
||||
size_t offset = i * delta_offset;
|
||||
|
||||
// Copy the array up from host ptr
|
||||
err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_uint)*num_elements, input_ptr, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueWriteBuffer failed");
|
||||
|
||||
err = clEnqueueCopyBuffer(queue, streams[2], results, offset, offset, delta_offset, 0, NULL, NULL);
|
||||
test_error(err, "clEnqueueCopyBuffer failed");
|
||||
}
|
||||
|
||||
err = clEnqueueReadBuffer( queue, results, true, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err)
|
||||
log_error("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer FAILED\n");
|
||||
else
|
||||
log_info("\tclEnqueueWriteBuffer and clEnqueueCopyBuffer passed\n");
|
||||
|
||||
/*****************************************************************************************************************************************/
|
||||
#pragma mark kernel copy test
|
||||
|
||||
log_info("Testing CL_MEM_USE_HOST_PTR buffer with kernel copy\n");
|
||||
// randomize data
|
||||
for (i=0; i<num_elements; i++)
|
||||
input_ptr[i] = (cl_uint)(genrand_int32(d) & 0x7FFFFFFF);
|
||||
free_mtdata(d); d= NULL;
|
||||
|
||||
// client backing
|
||||
streams[3] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_USE_HOST_PTR), sizeof(cl_uint) * num_elements, input_ptr, &err);
|
||||
test_error(err, "clCreateBuffer failed");
|
||||
|
||||
err = create_single_kernel_helper(context, &program, &kernel, 1, ©_kernel_code, "test_copy" );
|
||||
test_error(err, "create_single_kernel_helper failed");
|
||||
|
||||
err = clSetKernelArg(kernel, 0, sizeof streams[3], &streams[3]);
|
||||
err |= clSetKernelArg(kernel, 1, sizeof results, &results);
|
||||
test_error(err, "clSetKernelArg failed");
|
||||
|
||||
size_t threads[3] = {num_elements, 0, 0};
|
||||
|
||||
err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueNDRangeKernel failed");
|
||||
|
||||
err = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, num_elements*sizeof(cl_uint), output_ptr, 0, NULL, NULL );
|
||||
test_error(err, "clEnqueueReadBuffer failed");
|
||||
|
||||
for (i=0; i<num_elements; i++)
|
||||
{
|
||||
if (input_ptr[i] != output_ptr[i])
|
||||
{
|
||||
err = -1;
|
||||
error_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Keep track of multiple errors.
|
||||
if (error_count != 0)
|
||||
err = error_count;
|
||||
|
||||
if (err)
|
||||
log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
|
||||
else
|
||||
log_info("\tCL_MEM_USE_HOST_PTR buffer with kernel copy passed\n");
|
||||
|
||||
|
||||
clReleaseProgram(program);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseMemObject(results);
|
||||
clReleaseMemObject(streams[0]);
|
||||
clReleaseMemObject(streams[2]);
|
||||
clReleaseMemObject(streams[3]);
|
||||
|
||||
free(input_ptr);
|
||||
free(output_ptr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user