mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-19 06:09:01 +00:00
Reformat test harness code (#940)
* Reformat common help text Signed-off-by: Stuart Brady <stuart.brady@arm.com> * Reformat test harness code This goes part of the way to fixing issue #625. Signed-off-by: Stuart Brady <stuart.brady@arm.com>
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -16,53 +16,54 @@
|
|||||||
#ifndef THREAD_POOL_H
|
#ifndef THREAD_POOL_H
|
||||||
#define THREAD_POOL_H
|
#define THREAD_POOL_H
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl.h>
|
#include <CL/cl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//
|
//
|
||||||
// An atomic add operator
|
// An atomic add operator
|
||||||
cl_int ThreadPool_AtomicAdd( volatile cl_int *a, cl_int b ); // returns old value
|
cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b); // returns old value
|
||||||
|
|
||||||
// Your function prototype
|
// Your function prototype
|
||||||
//
|
//
|
||||||
// A function pointer to the function you want to execute in a multithreaded context. No
|
// A function pointer to the function you want to execute in a multithreaded
|
||||||
// synchronization primitives are provided, other than the atomic add above. You may not
|
// context. No synchronization primitives are provided, other than the atomic
|
||||||
// call ThreadPool_Do from your function. ThreadPool_AtomicAdd() and GetThreadCount() should
|
// add above. You may not call ThreadPool_Do from your function.
|
||||||
// work, however.
|
// ThreadPool_AtomicAdd() and GetThreadCount() should work, however.
|
||||||
//
|
//
|
||||||
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they will numbered be 0 through 7.
|
// job ids and thread ids are 0 based. If number of jobs or threads was 8, they
|
||||||
// Note that while every job will be run, it is not guaranteed that every thread will wake up before
|
// will numbered be 0 through 7. Note that while every job will be run, it is
|
||||||
// the work is done.
|
// not guaranteed that every thread will wake up before the work is done.
|
||||||
typedef cl_int (*TPFuncPtr)( cl_uint /*job_id*/, cl_uint /* thread_id */, void *userInfo );
|
typedef cl_int (*TPFuncPtr)(cl_uint /*job_id*/, cl_uint /* thread_id */,
|
||||||
|
void *userInfo);
|
||||||
|
|
||||||
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
|
// returns first non-zero result from func_ptr, or CL_SUCCESS if all are zero.
|
||||||
// Some workitems may not run if a non-zero result is returned from func_ptr().
|
// Some workitems may not run if a non-zero result is returned from func_ptr().
|
||||||
// This function may not be called from a TPFuncPtr.
|
// This function may not be called from a TPFuncPtr.
|
||||||
cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo);
|
||||||
cl_uint count,
|
|
||||||
void *userInfo );
|
|
||||||
|
|
||||||
// Returns the number of worker threads that underlie the threadpool. The value passed
|
// Returns the number of worker threads that underlie the threadpool. The value
|
||||||
// as the TPFuncPtrs thread_id will be between 0 and this value less one, inclusive.
|
// passed as the TPFuncPtrs thread_id will be between 0 and this value less one,
|
||||||
// This is safe to call from a TPFuncPtr.
|
// inclusive. This is safe to call from a TPFuncPtr.
|
||||||
cl_uint GetThreadCount( void );
|
cl_uint GetThreadCount(void);
|
||||||
|
|
||||||
// SetThreadCount() may be used to artifically set the number of worker threads
|
// SetThreadCount() may be used to artifically set the number of worker threads
|
||||||
// If the value is 0 (the default) the number of threads will be determined based on
|
// If the value is 0 (the default) the number of threads will be determined
|
||||||
// the number of CPU cores. If it is a unicore machine, then 2 will be used, so
|
// based on the number of CPU cores. If it is a unicore machine, then 2 will be
|
||||||
// that we still get some testing for thread safety.
|
// used, so that we still get some testing for thread safety.
|
||||||
//
|
//
|
||||||
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then the
|
// If count < 2 or the CL_TEST_SINGLE_THREADED environment variable is set then
|
||||||
// code will run single threaded, but will report an error to indicate that the test
|
// the code will run single threaded, but will report an error to indicate that
|
||||||
// is invalid. This option is intended for debugging purposes only. It is suggested
|
// the test is invalid. This option is intended for debugging purposes only. It
|
||||||
// as a convention that test apps set the thread count to 1 in response to the -m flag.
|
// is suggested as a convention that test apps set the thread count to 1 in
|
||||||
|
// response to the -m flag.
|
||||||
//
|
//
|
||||||
// SetThreadCount() must be called before the first call to GetThreadCount() or ThreadPool_Do(),
|
// SetThreadCount() must be called before the first call to GetThreadCount() or
|
||||||
// otherwise the behavior is indefined. It may not be called from a TPFuncPtr.
|
// ThreadPool_Do(), otherwise the behavior is indefined. It may not be called
|
||||||
void SetThreadCount( int count );
|
// from a TPFuncPtr.
|
||||||
|
void SetThreadCount(int count);
|
||||||
|
|
||||||
|
|
||||||
#endif /* THREAD_POOL_H */
|
#endif /* THREAD_POOL_H */
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
#ifndef HARNESS_ALLOC_H_
|
#ifndef HARNESS_ALLOC_H_
|
||||||
#define HARNESS_ALLOC_H_
|
#define HARNESS_ALLOC_H_
|
||||||
|
|
||||||
#if defined(__linux__) || defined (linux) || defined(__APPLE__)
|
#if defined(__linux__) || defined(linux) || defined(__APPLE__)
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#else
|
#else
|
||||||
@@ -29,43 +29,41 @@
|
|||||||
#include "mingw_compat.h"
|
#include "mingw_compat.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void * align_malloc(size_t size, size_t alignment)
|
static void* align_malloc(size_t size, size_t alignment)
|
||||||
{
|
{
|
||||||
#if defined(_WIN32) && defined(_MSC_VER)
|
#if defined(_WIN32) && defined(_MSC_VER)
|
||||||
return _aligned_malloc(size, alignment);
|
return _aligned_malloc(size, alignment);
|
||||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
|
||||||
void * ptr = NULL;
|
void* ptr = NULL;
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
ptr = memalign(alignment, size);
|
ptr = memalign(alignment, size);
|
||||||
if ( ptr )
|
if (ptr) return ptr;
|
||||||
return ptr;
|
|
||||||
#else
|
#else
|
||||||
if (alignment < sizeof(void*)) {
|
if (alignment < sizeof(void*))
|
||||||
|
{
|
||||||
alignment = sizeof(void*);
|
alignment = sizeof(void*);
|
||||||
}
|
}
|
||||||
if (0 == posix_memalign(&ptr, alignment, size))
|
if (0 == posix_memalign(&ptr, alignment, size)) return ptr;
|
||||||
return ptr;
|
|
||||||
#endif
|
#endif
|
||||||
return NULL;
|
return NULL;
|
||||||
#elif defined(__MINGW32__)
|
#elif defined(__MINGW32__)
|
||||||
return __mingw_aligned_malloc(size, alignment);
|
return __mingw_aligned_malloc(size, alignment);
|
||||||
#else
|
#else
|
||||||
#error "Please add support OS for aligned malloc"
|
#error "Please add support OS for aligned malloc"
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void align_free(void * ptr)
|
static void align_free(void* ptr)
|
||||||
{
|
{
|
||||||
#if defined(_WIN32) && defined(_MSC_VER)
|
#if defined(_WIN32) && defined(_MSC_VER)
|
||||||
_aligned_free(ptr);
|
_aligned_free(ptr);
|
||||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
#elif defined(__linux__) || defined(linux) || defined(__APPLE__)
|
||||||
return free(ptr);
|
return free(ptr);
|
||||||
#elif defined(__MINGW32__)
|
#elif defined(__MINGW32__)
|
||||||
return __mingw_aligned_free(ptr);
|
return __mingw_aligned_free(ptr);
|
||||||
#else
|
#else
|
||||||
#error "Please add support OS for aligned free"
|
#error "Please add support OS for aligned free"
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // #ifndef HARNESS_ALLOC_H_
|
#endif // #ifndef HARNESS_ALLOC_H_
|
||||||
|
|
||||||
|
|||||||
@@ -26,18 +26,15 @@
|
|||||||
#include "errorHelpers.h"
|
#include "errorHelpers.h"
|
||||||
|
|
||||||
|
|
||||||
// helper function to replace clCreateImage2D , to make the existing code use
|
// helper function to replace clCreateImage2D , to make the existing code use
|
||||||
// the functions of version 1.2 and veriosn 1.1 respectively
|
// the functions of version 1.2 and veriosn 1.1 respectively
|
||||||
|
|
||||||
static inline cl_mem create_image_2d (cl_context context,
|
static inline cl_mem create_image_2d(cl_context context, cl_mem_flags flags,
|
||||||
cl_mem_flags flags,
|
|
||||||
const cl_image_format *image_format,
|
const cl_image_format *image_format,
|
||||||
size_t image_width,
|
size_t image_width, size_t image_height,
|
||||||
size_t image_height,
|
size_t image_row_pitch, void *host_ptr,
|
||||||
size_t image_row_pitch,
|
|
||||||
void *host_ptr,
|
|
||||||
cl_int *errcode_ret)
|
cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_mem mImage = NULL;
|
cl_mem mImage = NULL;
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_2
|
#ifdef CL_VERSION_1_2
|
||||||
@@ -45,80 +42,81 @@
|
|||||||
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
|
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
image_desc_dest.image_width = image_width;
|
image_desc_dest.image_width = image_width;
|
||||||
image_desc_dest.image_height = image_height;
|
image_desc_dest.image_height = image_height;
|
||||||
image_desc_dest.image_depth= 0;// not usedfor 2d
|
image_desc_dest.image_depth = 0; // not usedfor 2d
|
||||||
image_desc_dest.image_array_size = 0;// not used for 2d
|
image_desc_dest.image_array_size = 0; // not used for 2d
|
||||||
image_desc_dest.image_row_pitch = image_row_pitch;
|
image_desc_dest.image_row_pitch = image_row_pitch;
|
||||||
image_desc_dest.image_slice_pitch = 0;
|
image_desc_dest.image_slice_pitch = 0;
|
||||||
image_desc_dest.num_mip_levels = 0;
|
image_desc_dest.num_mip_levels = 0;
|
||||||
image_desc_dest.num_samples = 0;
|
image_desc_dest.num_samples = 0;
|
||||||
image_desc_dest.mem_object = NULL;// no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
image_desc_dest.mem_object =
|
||||||
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, host_ptr, errcode_ret );
|
NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
// CL_VERSION_1_1, so always is NULL
|
||||||
// Log an info message and rely on the calling function to produce an error
|
mImage = clCreateImage(context, flags, image_format, &image_desc_dest,
|
||||||
// if necessary.
|
host_ptr, errcode_ret);
|
||||||
|
if (errcode_ret && (*errcode_ret))
|
||||||
|
{
|
||||||
|
// Log an info message and rely on the calling function to produce an
|
||||||
|
// error if necessary.
|
||||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
mImage = clCreateImage2D( context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret );
|
mImage =
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
clCreateImage2D(context, flags, image_format, image_width, image_height,
|
||||||
// Log an info message and rely on the calling function to produce an error
|
image_row_pitch, host_ptr, errcode_ret);
|
||||||
// if necessary.
|
if (errcode_ret && (*errcode_ret))
|
||||||
|
{
|
||||||
|
// Log an info message and rely on the calling function to produce an
|
||||||
|
// error if necessary.
|
||||||
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage2D failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return mImage;
|
return mImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper function to replace clCreateImage2D , to make the existing code use
|
// helper function to replace clCreateImage2D , to make the existing code use
|
||||||
// the functions of version 1.2 and veriosn 1.1 respectively
|
// the functions of version 1.2 and veriosn 1.1 respectively
|
||||||
|
|
||||||
static inline cl_mem create_image_2d_buffer (cl_context context,
|
static inline cl_mem
|
||||||
cl_mem_flags flags,
|
create_image_2d_buffer(cl_context context, cl_mem_flags flags,
|
||||||
const cl_image_format *image_format,
|
const cl_image_format *image_format, size_t image_width,
|
||||||
size_t image_width,
|
size_t image_height, size_t image_row_pitch,
|
||||||
size_t image_height,
|
cl_mem buffer, cl_int *errcode_ret)
|
||||||
size_t image_row_pitch,
|
{
|
||||||
cl_mem buffer,
|
|
||||||
cl_int *errcode_ret)
|
|
||||||
{
|
|
||||||
cl_mem mImage = NULL;
|
cl_mem mImage = NULL;
|
||||||
|
|
||||||
cl_image_desc image_desc_dest;
|
cl_image_desc image_desc_dest;
|
||||||
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
|
image_desc_dest.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
image_desc_dest.image_width = image_width;
|
image_desc_dest.image_width = image_width;
|
||||||
image_desc_dest.image_height = image_height;
|
image_desc_dest.image_height = image_height;
|
||||||
image_desc_dest.image_depth= 0;// not usedfor 2d
|
image_desc_dest.image_depth = 0; // not usedfor 2d
|
||||||
image_desc_dest.image_array_size = 0;// not used for 2d
|
image_desc_dest.image_array_size = 0; // not used for 2d
|
||||||
image_desc_dest.image_row_pitch = image_row_pitch;
|
image_desc_dest.image_row_pitch = image_row_pitch;
|
||||||
image_desc_dest.image_slice_pitch = 0;
|
image_desc_dest.image_slice_pitch = 0;
|
||||||
image_desc_dest.num_mip_levels = 0;
|
image_desc_dest.num_mip_levels = 0;
|
||||||
image_desc_dest.num_samples = 0;
|
image_desc_dest.num_samples = 0;
|
||||||
image_desc_dest.mem_object = buffer;
|
image_desc_dest.mem_object = buffer;
|
||||||
mImage = clCreateImage( context, flags, image_format, &image_desc_dest, NULL, errcode_ret );
|
mImage = clCreateImage(context, flags, image_format, &image_desc_dest, NULL,
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
errcode_ret);
|
||||||
// Log an info message and rely on the calling function to produce an error
|
if (errcode_ret && (*errcode_ret))
|
||||||
// if necessary.
|
{
|
||||||
|
// Log an info message and rely on the calling function to produce an
|
||||||
|
// error if necessary.
|
||||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
return mImage;
|
return mImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline cl_mem create_image_3d(cl_context context, cl_mem_flags flags,
|
||||||
static inline cl_mem create_image_3d (cl_context context,
|
|
||||||
cl_mem_flags flags,
|
|
||||||
const cl_image_format *image_format,
|
const cl_image_format *image_format,
|
||||||
size_t image_width,
|
size_t image_width, size_t image_height,
|
||||||
size_t image_height,
|
size_t image_depth, size_t image_row_pitch,
|
||||||
size_t image_depth,
|
size_t image_slice_pitch, void *host_ptr,
|
||||||
size_t image_row_pitch,
|
|
||||||
size_t image_slice_pitch,
|
|
||||||
void *host_ptr,
|
|
||||||
cl_int *errcode_ret)
|
cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_mem mImage;
|
cl_mem mImage;
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_2
|
#ifdef CL_VERSION_1_2
|
||||||
@@ -127,55 +125,45 @@
|
|||||||
image_desc.image_width = image_width;
|
image_desc.image_width = image_width;
|
||||||
image_desc.image_height = image_height;
|
image_desc.image_height = image_height;
|
||||||
image_desc.image_depth = image_depth;
|
image_desc.image_depth = image_depth;
|
||||||
image_desc.image_array_size = 0;// not used for one image
|
image_desc.image_array_size = 0; // not used for one image
|
||||||
image_desc.image_row_pitch = image_row_pitch;
|
image_desc.image_row_pitch = image_row_pitch;
|
||||||
image_desc.image_slice_pitch = image_slice_pitch;
|
image_desc.image_slice_pitch = image_slice_pitch;
|
||||||
image_desc.num_mip_levels = 0;
|
image_desc.num_mip_levels = 0;
|
||||||
image_desc.num_samples = 0;
|
image_desc.num_samples = 0;
|
||||||
image_desc.mem_object = NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in CL_VERSION_1_1, so always is NULL
|
image_desc.mem_object =
|
||||||
mImage = clCreateImage( context,
|
NULL; // no image type of CL_MEM_OBJECT_IMAGE1D_BUFFER in
|
||||||
flags,
|
// CL_VERSION_1_1, so always is NULL
|
||||||
image_format,
|
mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
|
||||||
&image_desc,
|
errcode_ret);
|
||||||
host_ptr,
|
if (errcode_ret && (*errcode_ret))
|
||||||
errcode_ret );
|
{
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
// Log an info message and rely on the calling function to produce an
|
||||||
// Log an info message and rely on the calling function to produce an error
|
// error if necessary.
|
||||||
// if necessary.
|
|
||||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
mImage = clCreateImage3D( context,
|
mImage = clCreateImage3D(context, flags, image_format, image_width,
|
||||||
flags, image_format,
|
image_height, image_depth, image_row_pitch,
|
||||||
image_width,
|
image_slice_pitch, host_ptr, errcode_ret);
|
||||||
image_height,
|
if (errcode_ret && (*errcode_ret))
|
||||||
image_depth,
|
{
|
||||||
image_row_pitch,
|
// Log an info message and rely on the calling function to produce an
|
||||||
image_slice_pitch,
|
// error if necessary.
|
||||||
host_ptr,
|
|
||||||
errcode_ret );
|
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
|
||||||
// Log an info message and rely on the calling function to produce an error
|
|
||||||
// if necessary.
|
|
||||||
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage3D failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return mImage;
|
return mImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline cl_mem create_image_2d_array (cl_context context,
|
static inline cl_mem
|
||||||
cl_mem_flags flags,
|
create_image_2d_array(cl_context context, cl_mem_flags flags,
|
||||||
const cl_image_format *image_format,
|
const cl_image_format *image_format, size_t image_width,
|
||||||
size_t image_width,
|
size_t image_height, size_t image_array_size,
|
||||||
size_t image_height,
|
size_t image_row_pitch, size_t image_slice_pitch,
|
||||||
size_t image_array_size,
|
void *host_ptr, cl_int *errcode_ret)
|
||||||
size_t image_row_pitch,
|
{
|
||||||
size_t image_slice_pitch,
|
|
||||||
void *host_ptr,
|
|
||||||
cl_int *errcode_ret)
|
|
||||||
{
|
|
||||||
cl_mem mImage;
|
cl_mem mImage;
|
||||||
|
|
||||||
cl_image_desc image_desc;
|
cl_image_desc image_desc;
|
||||||
@@ -189,31 +177,23 @@
|
|||||||
image_desc.num_mip_levels = 0;
|
image_desc.num_mip_levels = 0;
|
||||||
image_desc.num_samples = 0;
|
image_desc.num_samples = 0;
|
||||||
image_desc.mem_object = NULL;
|
image_desc.mem_object = NULL;
|
||||||
mImage = clCreateImage( context,
|
mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
|
||||||
flags,
|
errcode_ret);
|
||||||
image_format,
|
if (errcode_ret && (*errcode_ret))
|
||||||
&image_desc,
|
{
|
||||||
host_ptr,
|
// Log an info message and rely on the calling function to produce an
|
||||||
errcode_ret );
|
// error if necessary.
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
|
||||||
// Log an info message and rely on the calling function to produce an error
|
|
||||||
// if necessary.
|
|
||||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
return mImage;
|
return mImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline cl_mem create_image_1d_array (cl_context context,
|
static inline cl_mem create_image_1d_array(
|
||||||
cl_mem_flags flags,
|
cl_context context, cl_mem_flags flags, const cl_image_format *image_format,
|
||||||
const cl_image_format *image_format,
|
size_t image_width, size_t image_array_size, size_t image_row_pitch,
|
||||||
size_t image_width,
|
size_t image_slice_pitch, void *host_ptr, cl_int *errcode_ret)
|
||||||
size_t image_array_size,
|
{
|
||||||
size_t image_row_pitch,
|
|
||||||
size_t image_slice_pitch,
|
|
||||||
void *host_ptr,
|
|
||||||
cl_int *errcode_ret)
|
|
||||||
{
|
|
||||||
cl_mem mImage;
|
cl_mem mImage;
|
||||||
|
|
||||||
cl_image_desc image_desc;
|
cl_image_desc image_desc;
|
||||||
@@ -227,34 +207,29 @@
|
|||||||
image_desc.num_mip_levels = 0;
|
image_desc.num_mip_levels = 0;
|
||||||
image_desc.num_samples = 0;
|
image_desc.num_samples = 0;
|
||||||
image_desc.mem_object = NULL;
|
image_desc.mem_object = NULL;
|
||||||
mImage = clCreateImage( context,
|
mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
|
||||||
flags,
|
errcode_ret);
|
||||||
image_format,
|
if (errcode_ret && (*errcode_ret))
|
||||||
&image_desc,
|
{
|
||||||
host_ptr,
|
// Log an info message and rely on the calling function to produce an
|
||||||
errcode_ret );
|
// error if necessary.
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
|
||||||
// Log an info message and rely on the calling function to produce an error
|
|
||||||
// if necessary.
|
|
||||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
return mImage;
|
return mImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline cl_mem create_image_1d (cl_context context,
|
static inline cl_mem create_image_1d(cl_context context, cl_mem_flags flags,
|
||||||
cl_mem_flags flags,
|
|
||||||
const cl_image_format *image_format,
|
const cl_image_format *image_format,
|
||||||
size_t image_width,
|
size_t image_width, size_t image_row_pitch,
|
||||||
size_t image_row_pitch,
|
void *host_ptr, cl_mem buffer,
|
||||||
void *host_ptr,
|
|
||||||
cl_mem buffer,
|
|
||||||
cl_int *errcode_ret)
|
cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_mem mImage;
|
cl_mem mImage;
|
||||||
|
|
||||||
cl_image_desc image_desc;
|
cl_image_desc image_desc;
|
||||||
image_desc.image_type = buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER: CL_MEM_OBJECT_IMAGE1D;
|
image_desc.image_type =
|
||||||
|
buffer ? CL_MEM_OBJECT_IMAGE1D_BUFFER : CL_MEM_OBJECT_IMAGE1D;
|
||||||
image_desc.image_width = image_width;
|
image_desc.image_width = image_width;
|
||||||
image_desc.image_height = 1;
|
image_desc.image_height = 1;
|
||||||
image_desc.image_depth = 1;
|
image_desc.image_depth = 1;
|
||||||
@@ -263,20 +238,17 @@
|
|||||||
image_desc.num_mip_levels = 0;
|
image_desc.num_mip_levels = 0;
|
||||||
image_desc.num_samples = 0;
|
image_desc.num_samples = 0;
|
||||||
image_desc.mem_object = buffer;
|
image_desc.mem_object = buffer;
|
||||||
mImage = clCreateImage( context,
|
mImage = clCreateImage(context, flags, image_format, &image_desc, host_ptr,
|
||||||
flags,
|
errcode_ret);
|
||||||
image_format,
|
if (errcode_ret && (*errcode_ret))
|
||||||
&image_desc,
|
{
|
||||||
host_ptr,
|
// Log an info message and rely on the calling function to produce an
|
||||||
errcode_ret );
|
// error if necessary.
|
||||||
if (errcode_ret && (*errcode_ret)) {
|
|
||||||
// Log an info message and rely on the calling function to produce an error
|
|
||||||
// if necessary.
|
|
||||||
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
log_info("clCreateImage failed (%d)\n", *errcode_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
return mImage;
|
return mImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -16,14 +16,14 @@
|
|||||||
#ifndef _COMPAT_H_
|
#ifndef _COMPAT_H_
|
||||||
#define _COMPAT_H_
|
#define _COMPAT_H_
|
||||||
|
|
||||||
#if defined(_WIN32) && defined (_MSC_VER)
|
#if defined(_WIN32) && defined(_MSC_VER)
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
#define EXTERN_C extern "C"
|
#define EXTERN_C extern "C"
|
||||||
#else
|
#else
|
||||||
#define EXTERN_C
|
#define EXTERN_C
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -34,8 +34,8 @@
|
|||||||
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
|
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
|
||||||
|
|
||||||
// llabs appeared in MS C v16 (VS 10/2010).
|
// llabs appeared in MS C v16 (VS 10/2010).
|
||||||
#if defined( _MSC_VER ) && _MSC_VER <= 1500
|
#if defined(_MSC_VER) && _MSC_VER <= 1500
|
||||||
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -44,16 +44,15 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
// stdbool.h appeared in MS C v18 (VS 12/2013).
|
// stdbool.h appeared in MS C v18 (VS 12/2013).
|
||||||
#if defined( _MSC_VER ) && MSC_VER <= 1700
|
#if defined(_MSC_VER) && MSC_VER <= 1700
|
||||||
#if !defined(__cplusplus)
|
#if !defined(__cplusplus)
|
||||||
typedef char bool;
|
typedef char bool;
|
||||||
#define true 1
|
#define true 1
|
||||||
#define false 0
|
#define false 0
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#include <stdbool.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
#else
|
||||||
|
#include <stdbool.h>
|
||||||
|
#endif // defined(_MSC_VER) && MSC_VER <= 1700
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -61,7 +60,9 @@ typedef char bool;
|
|||||||
//
|
//
|
||||||
|
|
||||||
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
|
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
|
||||||
#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
|
#if defined(_MSC_VER) \
|
||||||
|
&& (!defined(__INTEL_COMPILER) && _MSC_VER <= 1500 \
|
||||||
|
|| defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1200)
|
||||||
typedef unsigned char uint8_t;
|
typedef unsigned char uint8_t;
|
||||||
typedef char int8_t;
|
typedef char int8_t;
|
||||||
typedef unsigned short uint16_t;
|
typedef unsigned short uint16_t;
|
||||||
@@ -74,11 +75,10 @@ typedef long long int64_t;
|
|||||||
#ifndef __STDC_LIMIT_MACROS
|
#ifndef __STDC_LIMIT_MACROS
|
||||||
#define __STDC_LIMIT_MACROS
|
#define __STDC_LIMIT_MACROS
|
||||||
#endif
|
#endif
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// float.h
|
// float.h
|
||||||
//
|
//
|
||||||
@@ -86,24 +86,23 @@ typedef long long int64_t;
|
|||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// fenv.h
|
// fenv.h
|
||||||
//
|
//
|
||||||
|
|
||||||
// fenv.h appeared in MS C v18 (VS 12/2013).
|
// fenv.h appeared in MS C v18 (VS 12/2013).
|
||||||
#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
|
#if defined(_MSC_VER) && _MSC_VER <= 1700 && !defined(__INTEL_COMPILER)
|
||||||
// reimplement fenv.h because windows doesn't have it
|
// reimplement fenv.h because windows doesn't have it
|
||||||
#define FE_INEXACT 0x0020
|
#define FE_INEXACT 0x0020
|
||||||
#define FE_UNDERFLOW 0x0010
|
#define FE_UNDERFLOW 0x0010
|
||||||
#define FE_OVERFLOW 0x0008
|
#define FE_OVERFLOW 0x0008
|
||||||
#define FE_DIVBYZERO 0x0004
|
#define FE_DIVBYZERO 0x0004
|
||||||
#define FE_INVALID 0x0001
|
#define FE_INVALID 0x0001
|
||||||
#define FE_ALL_EXCEPT 0x003D
|
#define FE_ALL_EXCEPT 0x003D
|
||||||
int fetestexcept(int excepts);
|
int fetestexcept(int excepts);
|
||||||
int feclearexcept(int excepts);
|
int feclearexcept(int excepts);
|
||||||
#else
|
#else
|
||||||
#include <fenv.h>
|
#include <fenv.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -111,138 +110,137 @@ typedef long long int64_t;
|
|||||||
// math.h
|
// math.h
|
||||||
//
|
//
|
||||||
|
|
||||||
#if defined( __INTEL_COMPILER )
|
#if defined(__INTEL_COMPILER)
|
||||||
#include <mathimf.h>
|
#include <mathimf.h>
|
||||||
#else
|
#else
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef M_PI
|
#ifndef M_PI
|
||||||
#define M_PI 3.14159265358979323846264338327950288
|
#define M_PI 3.14159265358979323846264338327950288
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined( _MSC_VER )
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef NAN
|
|
||||||
#define NAN (INFINITY - INFINITY)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef HUGE_VALF
|
|
||||||
#define HUGE_VALF (float)HUGE_VAL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef INFINITY
|
|
||||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef isfinite
|
|
||||||
#define isfinite(x) _finite(x)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef isnan
|
|
||||||
#define isnan( x ) ((x) != (x))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef isinf
|
|
||||||
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
|
|
||||||
|
|
||||||
double rint( double x);
|
|
||||||
float rintf( float x);
|
|
||||||
long double rintl( long double x);
|
|
||||||
|
|
||||||
float cbrtf( float );
|
|
||||||
double cbrt( double );
|
|
||||||
|
|
||||||
int ilogb( double x);
|
|
||||||
int ilogbf (float x);
|
|
||||||
int ilogbl(long double x);
|
|
||||||
|
|
||||||
double fmax(double x, double y);
|
|
||||||
double fmin(double x, double y);
|
|
||||||
float fmaxf( float x, float y );
|
|
||||||
float fminf(float x, float y);
|
|
||||||
|
|
||||||
double log2(double x);
|
|
||||||
long double log2l(long double x);
|
|
||||||
|
|
||||||
double exp2(double x);
|
|
||||||
long double exp2l(long double x);
|
|
||||||
|
|
||||||
double fdim(double x, double y);
|
|
||||||
float fdimf(float x, float y);
|
|
||||||
long double fdiml(long double x, long double y);
|
|
||||||
|
|
||||||
double remquo( double x, double y, int *quo);
|
|
||||||
float remquof( float x, float y, int *quo);
|
|
||||||
long double remquol( long double x, long double y, int *quo);
|
|
||||||
|
|
||||||
long double scalblnl(long double x, long n);
|
|
||||||
|
|
||||||
float hypotf(float x, float y);
|
|
||||||
long double hypotl(long double x, long double y) ;
|
|
||||||
double lgamma(double x);
|
|
||||||
float lgammaf(float x);
|
|
||||||
|
|
||||||
double trunc(double x);
|
|
||||||
float truncf(float x);
|
|
||||||
|
|
||||||
double log1p(double x);
|
|
||||||
float log1pf(float x);
|
|
||||||
long double log1pl(long double x);
|
|
||||||
|
|
||||||
double copysign(double x, double y);
|
|
||||||
float copysignf(float x, float y);
|
|
||||||
long double copysignl(long double x, long double y);
|
|
||||||
|
|
||||||
long lround(double x);
|
|
||||||
long lroundf(float x);
|
|
||||||
//long lroundl(long double x)
|
|
||||||
|
|
||||||
double round(double x);
|
|
||||||
float roundf(float x);
|
|
||||||
long double roundl(long double x);
|
|
||||||
|
|
||||||
int cf_signbit(double x);
|
|
||||||
int cf_signbitf(float x);
|
|
||||||
|
|
||||||
// Added in _MSC_VER == 1800 (Visual Studio 2013)
|
|
||||||
#if _MSC_VER < 1800
|
|
||||||
static int signbit(double x) { return cf_signbit(x); }
|
|
||||||
#endif
|
|
||||||
static int signbitf(float x) { return cf_signbitf(x); }
|
|
||||||
|
|
||||||
long int lrint (double flt);
|
|
||||||
long int lrintf (float flt);
|
|
||||||
|
|
||||||
float int2float (int32_t ix);
|
|
||||||
int32_t float2int (float fx);
|
|
||||||
|
|
||||||
#endif // _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
|
|
||||||
|
|
||||||
#if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
|
|
||||||
// These functions appeared in Intel C v13 and Visual Studio 2015
|
|
||||||
float nanf( const char* str);
|
|
||||||
double nan( const char* str);
|
|
||||||
long double nanl( const char* str);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined( __ANDROID__ )
|
#ifndef NAN
|
||||||
#define log2(X) (log(X)/log(2))
|
#define NAN (INFINITY - INFINITY)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef HUGE_VALF
|
||||||
|
#define HUGE_VALF (float)HUGE_VAL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef INFINITY
|
||||||
|
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef isfinite
|
||||||
|
#define isfinite(x) _finite(x)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef isnan
|
||||||
|
#define isnan(x) ((x) != (x))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef isinf
|
||||||
|
#define isinf(_x) ((_x) == INFINITY || (_x) == -INFINITY)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
|
||||||
|
|
||||||
|
double rint(double x);
|
||||||
|
float rintf(float x);
|
||||||
|
long double rintl(long double x);
|
||||||
|
|
||||||
|
float cbrtf(float);
|
||||||
|
double cbrt(double);
|
||||||
|
|
||||||
|
int ilogb(double x);
|
||||||
|
int ilogbf(float x);
|
||||||
|
int ilogbl(long double x);
|
||||||
|
|
||||||
|
double fmax(double x, double y);
|
||||||
|
double fmin(double x, double y);
|
||||||
|
float fmaxf(float x, float y);
|
||||||
|
float fminf(float x, float y);
|
||||||
|
|
||||||
|
double log2(double x);
|
||||||
|
long double log2l(long double x);
|
||||||
|
|
||||||
|
double exp2(double x);
|
||||||
|
long double exp2l(long double x);
|
||||||
|
|
||||||
|
double fdim(double x, double y);
|
||||||
|
float fdimf(float x, float y);
|
||||||
|
long double fdiml(long double x, long double y);
|
||||||
|
|
||||||
|
double remquo(double x, double y, int* quo);
|
||||||
|
float remquof(float x, float y, int* quo);
|
||||||
|
long double remquol(long double x, long double y, int* quo);
|
||||||
|
|
||||||
|
long double scalblnl(long double x, long n);
|
||||||
|
|
||||||
|
float hypotf(float x, float y);
|
||||||
|
long double hypotl(long double x, long double y);
|
||||||
|
double lgamma(double x);
|
||||||
|
float lgammaf(float x);
|
||||||
|
|
||||||
|
double trunc(double x);
|
||||||
|
float truncf(float x);
|
||||||
|
|
||||||
|
double log1p(double x);
|
||||||
|
float log1pf(float x);
|
||||||
|
long double log1pl(long double x);
|
||||||
|
|
||||||
|
double copysign(double x, double y);
|
||||||
|
float copysignf(float x, float y);
|
||||||
|
long double copysignl(long double x, long double y);
|
||||||
|
|
||||||
|
long lround(double x);
|
||||||
|
long lroundf(float x);
|
||||||
|
// long lroundl(long double x)
|
||||||
|
|
||||||
|
double round(double x);
|
||||||
|
float roundf(float x);
|
||||||
|
long double roundl(long double x);
|
||||||
|
|
||||||
|
int cf_signbit(double x);
|
||||||
|
int cf_signbitf(float x);
|
||||||
|
|
||||||
|
// Added in _MSC_VER == 1800 (Visual Studio 2013)
|
||||||
|
#if _MSC_VER < 1800
|
||||||
|
static int signbit(double x) { return cf_signbit(x); }
|
||||||
|
#endif
|
||||||
|
static int signbitf(float x) { return cf_signbitf(x); }
|
||||||
|
|
||||||
|
long int lrint(double flt);
|
||||||
|
long int lrintf(float flt);
|
||||||
|
|
||||||
|
float int2float(int32_t ix);
|
||||||
|
int32_t float2int(float fx);
|
||||||
|
|
||||||
|
#endif // _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
|
||||||
|
|
||||||
|
#if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
|
||||||
|
// These functions appeared in Intel C v13 and Visual Studio 2015
|
||||||
|
float nanf(const char* str);
|
||||||
|
double nan(const char* str);
|
||||||
|
long double nanl(const char* str);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // defined(_MSC_VER)
|
||||||
|
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
#define log2(X) (log(X) / log(2))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -250,12 +248,11 @@ typedef long long int64_t;
|
|||||||
//
|
//
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
|
// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
|
||||||
#if _MSC_VER < 1900
|
#if _MSC_VER < 1900
|
||||||
#define snprintf sprintf_s
|
#define snprintf sprintf_s
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
#endif // defined(_MSC_VER)
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
@@ -263,35 +260,32 @@ typedef long long int64_t;
|
|||||||
//
|
//
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#define strtok_r strtok_s
|
#define strtok_r strtok_s
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// unistd.h
|
// unistd.h
|
||||||
//
|
//
|
||||||
|
|
||||||
#if defined( _MSC_VER )
|
#if defined(_MSC_VER)
|
||||||
EXTERN_C unsigned int sleep( unsigned int sec );
|
EXTERN_C unsigned int sleep(unsigned int sec);
|
||||||
EXTERN_C int usleep( int usec );
|
EXTERN_C int usleep(int usec);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// syscall.h
|
// syscall.h
|
||||||
//
|
//
|
||||||
|
|
||||||
#if defined( __ANDROID__ )
|
#if defined(__ANDROID__)
|
||||||
// Android bionic's isn't providing SYS_sysctl wrappers.
|
// Android bionic's isn't providing SYS_sysctl wrappers.
|
||||||
#define SYS__sysctl __NR__sysctl
|
#define SYS__sysctl __NR__sysctl
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Some tests use _malloca which defined in malloc.h.
|
// Some tests use _malloca which defined in malloc.h.
|
||||||
#if !defined (__APPLE__)
|
#if !defined(__APPLE__)
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -300,32 +294,32 @@ typedef long long int64_t;
|
|||||||
// ???
|
// ???
|
||||||
//
|
//
|
||||||
|
|
||||||
#if defined( _MSC_VER )
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
#define MAXPATHLEN _MAX_PATH
|
#define MAXPATHLEN _MAX_PATH
|
||||||
|
|
||||||
EXTERN_C uint64_t ReadTime( void );
|
EXTERN_C uint64_t ReadTime(void);
|
||||||
EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
|
EXTERN_C double SubtractTime(uint64_t endTime, uint64_t startTime);
|
||||||
|
|
||||||
/** Returns the number of leading 0-bits in x,
|
/** Returns the number of leading 0-bits in x,
|
||||||
starting at the most significant bit position.
|
starting at the most significant bit position.
|
||||||
If x is 0, the result is undefined.
|
If x is 0, the result is undefined.
|
||||||
*/
|
*/
|
||||||
EXTERN_C int __builtin_clz(unsigned int pattern);
|
EXTERN_C int __builtin_clz(unsigned int pattern);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef MIN
|
#ifndef MIN
|
||||||
#define MIN(x,y) (((x)<(y))?(x):(y))
|
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
|
||||||
#endif
|
#endif
|
||||||
#ifndef MAX
|
#ifndef MAX
|
||||||
#define MAX(x,y) (((x)>(y))?(x):(y))
|
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*-----------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
WARNING: DO NOT USE THESE MACROS:
|
||||||
WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
|
MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
|
||||||
|
|
||||||
This is a typical usage of the macros:
|
This is a typical usage of the macros:
|
||||||
|
|
||||||
@@ -334,70 +328,81 @@ typedef long long int64_t;
|
|||||||
(taken from math_brute_force/reference_math.c). There are two problems:
|
(taken from math_brute_force/reference_math.c). There are two problems:
|
||||||
|
|
||||||
1. There is an error here. On Windows in will produce incorrect result
|
1. There is an error here. On Windows in will produce incorrect result
|
||||||
`0x1.5555555555555p+50'. To have a correct result it should be written as
|
`0x1.5555555555555p+50'.
|
||||||
`MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
|
To have a correct result it should be written as:
|
||||||
third argument is not obvious -- sometimes it should be the same as exponent of the
|
MAKE_HEX_DOUBLE(0x1.5555555555555p-2, 0x15555555555555LL, -54)
|
||||||
first argument, but sometimes not.
|
A proper value of the third argument is not obvious -- sometimes it
|
||||||
|
should be the same as exponent of the first argument, but sometimes
|
||||||
|
not.
|
||||||
|
|
||||||
2. Information is duplicated. It is easy to make a mistake.
|
2. Information is duplicated. It is easy to make a mistake.
|
||||||
|
|
||||||
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
|
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead
|
||||||
------------------------------------------------------------------------------------------------
|
(see them in the bottom of the file).
|
||||||
*/
|
-----------------------------------------------------------------------------*/
|
||||||
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||||
|
|
||||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
#define MAKE_HEX_FLOAT(x, y, z) ((float)ldexp((float)(y), z))
|
||||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
#define MAKE_HEX_DOUBLE(x, y, z) ldexp((double)(y), z)
|
||||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
#define MAKE_HEX_LONG(x, y, z) ((long double)ldexp((long double)(y), z))
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
|
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
|
||||||
#define MAKE_HEX_FLOAT(x,y,z) x
|
#define MAKE_HEX_FLOAT(x, y, z) x
|
||||||
#define MAKE_HEX_DOUBLE(x,y,z) x
|
#define MAKE_HEX_DOUBLE(x, y, z) x
|
||||||
#define MAKE_HEX_LONG(x,y,z) x
|
#define MAKE_HEX_LONG(x, y, z) x
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*-----------------------------------------------------------------------------
|
||||||
------------------------------------------------------------------------------------------------
|
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type
|
||||||
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
|
float, double, long double respectively. Arguments:
|
||||||
double respectively. Arguments:
|
|
||||||
|
|
||||||
sm -- sign of number,
|
sm -- sign of number,
|
||||||
int -- integer part of mantissa (without `0x' prefix),
|
int -- integer part of mantissa (without `0x' prefix),
|
||||||
fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
|
fract -- fractional part of mantissa (without decimal point and `L' or
|
||||||
|
`LL' suffixes),
|
||||||
se -- sign of exponent,
|
se -- sign of exponent,
|
||||||
exp -- absolute value of (binary) exponent.
|
exp -- absolute value of (binary) exponent.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
|
double yhi = HEX_DBL(+, 1, 5555555555555, -, 2); // 0x1.5555555555555p-2
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
|
|
||||||
We have to pass signs as separate arguments because gcc pass negative integer values
|
We have to pass signs as separate arguments because gcc pass negative
|
||||||
(e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
|
integer values (e. g. `-2') into a macro as two separate tokens, so
|
||||||
`0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
|
`HEX_FLT(1, 0, -2)' produces result `0x1.0p- 2' (note a space between minus
|
||||||
literal.
|
and two) which is not a correct floating point literal.
|
||||||
------------------------------------------------------------------------------------------------
|
-----------------------------------------------------------------------------*/
|
||||||
*/
|
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
|
||||||
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
// If compiler does not support hex floating point literals:
|
||||||
// If compiler does not support hex floating point literals:
|
#define HEX_FLT(sm, int, fract, se, exp) \
|
||||||
#define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) )
|
sm ldexpf((float)(0x##int##fract##UL), \
|
||||||
#define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) )
|
se exp + ilogbf((float)0x##int) \
|
||||||
#define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
|
- ilogbf((float)(0x##int##fract##UL)))
|
||||||
|
#define HEX_DBL(sm, int, fract, se, exp) \
|
||||||
|
sm ldexp((double)(0x##int##fract##ULL), \
|
||||||
|
se exp + ilogb((double)0x##int) \
|
||||||
|
- ilogb((double)(0x##int##fract##ULL)))
|
||||||
|
#define HEX_LDBL(sm, int, fract, se, exp) \
|
||||||
|
sm ldexpl((long double)(0x##int##fract##ULL), \
|
||||||
|
se exp + ilogbl((long double)0x##int) \
|
||||||
|
- ilogbl((long double)(0x##int##fract##ULL)))
|
||||||
#else
|
#else
|
||||||
// If compiler supports hex floating point literals: just concatenate all the parts into a literal.
|
// If compiler supports hex floating point literals: just concatenate all the
|
||||||
#define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
|
// parts into a literal.
|
||||||
#define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
|
#define HEX_FLT(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##F
|
||||||
#define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
|
#define HEX_DBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp
|
||||||
|
#define HEX_LDBL(sm, int, fract, se, exp) sm 0x##int##.##fract##p##se##exp##L
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__MINGW32__)
|
#if defined(__MINGW32__)
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
#define sleep(sec) Sleep((sec) * 1000)
|
#define sleep(sec) Sleep((sec)*1000)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif // _COMPAT_H_
|
#endif // _COMPAT_H_
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -65,60 +65,70 @@ enum RoundingTypes
|
|||||||
|
|
||||||
typedef enum RoundingTypes RoundingType;
|
typedef enum RoundingTypes RoundingType;
|
||||||
|
|
||||||
extern void print_type_to_string(ExplicitType type, void *data, char* string);
|
extern void print_type_to_string(ExplicitType type, void *data, char *string);
|
||||||
extern size_t get_explicit_type_size( ExplicitType type );
|
extern size_t get_explicit_type_size(ExplicitType type);
|
||||||
extern const char * get_explicit_type_name( ExplicitType type );
|
extern const char *get_explicit_type_name(ExplicitType type);
|
||||||
extern void convert_explicit_value( void *inRaw, void *outRaw, ExplicitType inType, bool saturate, RoundingType roundType, ExplicitType outType );
|
extern void convert_explicit_value(void *inRaw, void *outRaw,
|
||||||
|
ExplicitType inType, bool saturate,
|
||||||
|
RoundingType roundType,
|
||||||
|
ExplicitType outType);
|
||||||
|
|
||||||
extern void generate_random_data( ExplicitType type, size_t count, MTdata d, void *outData );
|
extern void generate_random_data(ExplicitType type, size_t count, MTdata d,
|
||||||
extern void * create_random_data( ExplicitType type, MTdata d, size_t count );
|
void *outData);
|
||||||
|
extern void *create_random_data(ExplicitType type, MTdata d, size_t count);
|
||||||
|
|
||||||
extern cl_long read_upscale_signed( void *inRaw, ExplicitType inType );
|
extern cl_long read_upscale_signed(void *inRaw, ExplicitType inType);
|
||||||
extern cl_ulong read_upscale_unsigned( void *inRaw, ExplicitType inType );
|
extern cl_ulong read_upscale_unsigned(void *inRaw, ExplicitType inType);
|
||||||
extern float read_as_float( void *inRaw, ExplicitType inType );
|
extern float read_as_float(void *inRaw, ExplicitType inType);
|
||||||
|
|
||||||
extern float get_random_float(float low, float high, MTdata d);
|
extern float get_random_float(float low, float high, MTdata d);
|
||||||
extern double get_random_double(double low, double high, MTdata d);
|
extern double get_random_double(double low, double high, MTdata d);
|
||||||
extern float any_float( MTdata d );
|
extern float any_float(MTdata d);
|
||||||
extern double any_double( MTdata d );
|
extern double any_double(MTdata d);
|
||||||
|
|
||||||
extern int random_in_range( int minV, int maxV, MTdata d );
|
extern int random_in_range(int minV, int maxV, MTdata d);
|
||||||
|
|
||||||
size_t get_random_size_t(size_t low, size_t high, MTdata d);
|
size_t get_random_size_t(size_t low, size_t high, MTdata d);
|
||||||
|
|
||||||
// Note: though this takes a double, this is for use with single precision tests
|
// Note: though this takes a double, this is for use with single precision tests
|
||||||
static inline int IsFloatSubnormal( float x )
|
static inline int IsFloatSubnormal(float x)
|
||||||
{
|
{
|
||||||
#if 2 == FLT_RADIX
|
#if 2 == FLT_RADIX
|
||||||
// Do this in integer to avoid problems with FTZ behavior
|
// Do this in integer to avoid problems with FTZ behavior
|
||||||
union{ float d; uint32_t u;}u;
|
union {
|
||||||
|
float d;
|
||||||
|
uint32_t u;
|
||||||
|
} u;
|
||||||
u.d = fabsf(x);
|
u.d = fabsf(x);
|
||||||
return (u.u-1) < 0x007fffffU;
|
return (u.u - 1) < 0x007fffffU;
|
||||||
#else
|
#else
|
||||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware --
|
||||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
// will fail if you flush subnormals to zero
|
||||||
|
return fabs(x) < (double)FLT_MIN && x != 0.0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsDoubleSubnormal( double x )
|
static inline int IsDoubleSubnormal(double x)
|
||||||
{
|
{
|
||||||
#if 2 == FLT_RADIX
|
#if 2 == FLT_RADIX
|
||||||
// Do this in integer to avoid problems with FTZ behavior
|
// Do this in integer to avoid problems with FTZ behavior
|
||||||
union{ double d; uint64_t u;}u;
|
union {
|
||||||
u.d = fabs( x);
|
double d;
|
||||||
return (u.u-1) < 0x000fffffffffffffULL;
|
uint64_t u;
|
||||||
|
} u;
|
||||||
|
u.d = fabs(x);
|
||||||
|
return (u.u - 1) < 0x000fffffffffffffULL;
|
||||||
#else
|
#else
|
||||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware --
|
||||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
// will fail if you flush subnormals to zero
|
||||||
|
return fabs(x) < (double)DBL_MIN && x != 0.0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int IsHalfSubnormal( cl_half x )
|
static inline int IsHalfSubnormal(cl_half x)
|
||||||
{
|
{
|
||||||
// this relies on interger overflow to exclude 0 as a subnormal
|
// this relies on interger overflow to exclude 0 as a subnormal
|
||||||
return ( ( x & 0x7fffU ) - 1U ) < 0x03ffU;
|
return ((x & 0x7fffU) - 1U) < 0x03ffU;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // _conversions_h
|
#endif // _conversions_h
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,30 +24,36 @@
|
|||||||
|
|
||||||
#include <CL/cl_half.h>
|
#include <CL/cl_half.h>
|
||||||
|
|
||||||
const char *IGetErrorString( int clErrorCode )
|
const char *IGetErrorString(int clErrorCode)
|
||||||
{
|
{
|
||||||
switch( clErrorCode )
|
switch (clErrorCode)
|
||||||
{
|
{
|
||||||
case CL_SUCCESS: return "CL_SUCCESS";
|
case CL_SUCCESS: return "CL_SUCCESS";
|
||||||
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
|
||||||
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
|
||||||
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
|
||||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||||
|
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||||
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
|
||||||
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
|
||||||
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
case CL_PROFILING_INFO_NOT_AVAILABLE:
|
||||||
|
return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||||
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
|
||||||
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
|
||||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
|
||||||
|
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||||
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
|
||||||
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
|
||||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
|
||||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||||
|
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
|
||||||
|
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||||
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
|
case CL_COMPILE_PROGRAM_FAILURE: return "CL_COMPILE_PROGRAM_FAILURE";
|
||||||
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
|
case CL_LINKER_NOT_AVAILABLE: return "CL_LINKER_NOT_AVAILABLE";
|
||||||
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
case CL_LINK_PROGRAM_FAILURE: return "CL_LINK_PROGRAM_FAILURE";
|
||||||
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
case CL_DEVICE_PARTITION_FAILED: return "CL_DEVICE_PARTITION_FAILED";
|
||||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
|
||||||
|
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||||
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
|
||||||
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
|
||||||
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
|
||||||
@@ -56,15 +62,18 @@ const char *IGetErrorString( int clErrorCode )
|
|||||||
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
|
||||||
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
|
||||||
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
|
||||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
|
||||||
|
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||||
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
|
||||||
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
|
||||||
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
|
||||||
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
|
||||||
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
|
||||||
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
|
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||||
|
return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||||
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
|
||||||
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
|
case CL_INVALID_KERNEL_DEFINITION:
|
||||||
|
return "CL_INVALID_KERNEL_DEFINITION";
|
||||||
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
|
||||||
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
|
||||||
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
|
||||||
@@ -96,9 +105,9 @@ const char *IGetErrorString( int clErrorCode )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *GetChannelOrderName( cl_channel_order order )
|
const char *GetChannelOrderName(cl_channel_order order)
|
||||||
{
|
{
|
||||||
switch( order )
|
switch (order)
|
||||||
{
|
{
|
||||||
case CL_R: return "CL_R";
|
case CL_R: return "CL_R";
|
||||||
case CL_A: return "CL_A";
|
case CL_A: return "CL_A";
|
||||||
@@ -133,9 +142,9 @@ const char *GetChannelOrderName( cl_channel_order order )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int IsChannelOrderSupported( cl_channel_order order )
|
int IsChannelOrderSupported(cl_channel_order order)
|
||||||
{
|
{
|
||||||
switch( order )
|
switch (order)
|
||||||
{
|
{
|
||||||
case CL_R:
|
case CL_R:
|
||||||
case CL_A:
|
case CL_A:
|
||||||
@@ -155,24 +164,20 @@ int IsChannelOrderSupported( cl_channel_order order )
|
|||||||
case CL_sRGBx:
|
case CL_sRGBx:
|
||||||
case CL_sBGRA:
|
case CL_sBGRA:
|
||||||
case CL_sRGBA:
|
case CL_sRGBA:
|
||||||
case CL_DEPTH:
|
case CL_DEPTH: return 1;
|
||||||
return 1;
|
|
||||||
#if defined CL_1RGB_APPLE
|
#if defined CL_1RGB_APPLE
|
||||||
case CL_1RGB_APPLE:
|
case CL_1RGB_APPLE: return 1;
|
||||||
return 1;
|
|
||||||
#endif
|
#endif
|
||||||
#if defined CL_BGR1_APPLE
|
#if defined CL_BGR1_APPLE
|
||||||
case CL_BGR1_APPLE:
|
case CL_BGR1_APPLE: return 1;
|
||||||
return 1;
|
|
||||||
#endif
|
#endif
|
||||||
default:
|
default: return 0;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *GetChannelTypeName( cl_channel_type type )
|
const char *GetChannelTypeName(cl_channel_type type)
|
||||||
{
|
{
|
||||||
switch( type )
|
switch (type)
|
||||||
{
|
{
|
||||||
case CL_SNORM_INT8: return "CL_SNORM_INT8";
|
case CL_SNORM_INT8: return "CL_SNORM_INT8";
|
||||||
case CL_SNORM_INT16: return "CL_SNORM_INT16";
|
case CL_SNORM_INT16: return "CL_SNORM_INT16";
|
||||||
@@ -197,9 +202,9 @@ const char *GetChannelTypeName( cl_channel_type type )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int IsChannelTypeSupported( cl_channel_type type )
|
int IsChannelTypeSupported(cl_channel_type type)
|
||||||
{
|
{
|
||||||
switch( type )
|
switch (type)
|
||||||
{
|
{
|
||||||
case CL_SNORM_INT8:
|
case CL_SNORM_INT8:
|
||||||
case CL_SNORM_INT16:
|
case CL_SNORM_INT16:
|
||||||
@@ -216,20 +221,17 @@ int IsChannelTypeSupported( cl_channel_type type )
|
|||||||
case CL_UNSIGNED_INT16:
|
case CL_UNSIGNED_INT16:
|
||||||
case CL_UNSIGNED_INT32:
|
case CL_UNSIGNED_INT32:
|
||||||
case CL_HALF_FLOAT:
|
case CL_HALF_FLOAT:
|
||||||
case CL_FLOAT:
|
case CL_FLOAT: return 1;
|
||||||
return 1;
|
|
||||||
#ifdef CL_SFIXED14_APPLE
|
#ifdef CL_SFIXED14_APPLE
|
||||||
case CL_SFIXED14_APPLE:
|
case CL_SFIXED14_APPLE: return 1;
|
||||||
return 1;
|
|
||||||
#endif
|
#endif
|
||||||
default:
|
default: return 0;
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *GetAddressModeName( cl_addressing_mode mode )
|
const char *GetAddressModeName(cl_addressing_mode mode)
|
||||||
{
|
{
|
||||||
switch( mode )
|
switch (mode)
|
||||||
{
|
{
|
||||||
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
case CL_ADDRESS_NONE: return "CL_ADDRESS_NONE";
|
||||||
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
case CL_ADDRESS_CLAMP_TO_EDGE: return "CL_ADDRESS_CLAMP_TO_EDGE";
|
||||||
@@ -240,9 +242,9 @@ const char *GetAddressModeName( cl_addressing_mode mode )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *GetDeviceTypeName( cl_device_type type )
|
const char *GetDeviceTypeName(cl_device_type type)
|
||||||
{
|
{
|
||||||
switch( type )
|
switch (type)
|
||||||
{
|
{
|
||||||
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
case CL_DEVICE_TYPE_GPU: return "CL_DEVICE_TYPE_GPU";
|
||||||
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
case CL_DEVICE_TYPE_CPU: return "CL_DEVICE_TYPE_CPU";
|
||||||
@@ -252,34 +254,34 @@ const char *GetDeviceTypeName( cl_device_type type )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer )
|
const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
|
||||||
|
size_t vecSize, char *buffer)
|
||||||
{
|
{
|
||||||
static char scratch[ 1024 ];
|
static char scratch[1024];
|
||||||
size_t i, j;
|
size_t i, j;
|
||||||
|
|
||||||
if( buffer == NULL )
|
if (buffer == NULL) buffer = scratch;
|
||||||
buffer = scratch;
|
|
||||||
|
|
||||||
unsigned char *p = (unsigned char *)dataBuffer;
|
unsigned char *p = (unsigned char *)dataBuffer;
|
||||||
char *bPtr;
|
char *bPtr;
|
||||||
|
|
||||||
buffer[ 0 ] = 0;
|
buffer[0] = 0;
|
||||||
bPtr = buffer;
|
bPtr = buffer;
|
||||||
for( i = 0; i < vecSize; i++ )
|
for (i = 0; i < vecSize; i++)
|
||||||
{
|
{
|
||||||
if( i > 0 )
|
if (i > 0)
|
||||||
{
|
{
|
||||||
bPtr[ 0 ] = ' ';
|
bPtr[0] = ' ';
|
||||||
bPtr++;
|
bPtr++;
|
||||||
}
|
}
|
||||||
for( j = 0; j < typeSize; j++ )
|
for (j = 0; j < typeSize; j++)
|
||||||
{
|
{
|
||||||
sprintf( bPtr, "%02x", (unsigned int)p[ typeSize - j - 1 ] );
|
sprintf(bPtr, "%02x", (unsigned int)p[typeSize - j - 1]);
|
||||||
bPtr += 2;
|
bPtr += 2;
|
||||||
}
|
}
|
||||||
p += typeSize;
|
p += typeSize;
|
||||||
}
|
}
|
||||||
bPtr[ 0 ] = 0;
|
bPtr[0] = 0;
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
@@ -298,31 +300,35 @@ const char *GetQueuePropertyName(cl_command_queue_properties property)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef MAX
|
#ifndef MAX
|
||||||
#define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
|
#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined( _MSC_VER )
|
#if defined(_MSC_VER)
|
||||||
#define scalbnf(_a, _i ) ldexpf( _a, _i )
|
#define scalbnf(_a, _i) ldexpf(_a, _i)
|
||||||
#define scalbn(_a, _i ) ldexp( _a, _i )
|
#define scalbn(_a, _i) ldexp(_a, _i)
|
||||||
#define scalbnl(_a, _i ) ldexpl( _a, _i )
|
#define scalbnl(_a, _i) ldexpl(_a, _i)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// taken from math tests
|
// taken from math tests
|
||||||
#define HALF_MIN_EXP -13
|
#define HALF_MIN_EXP -13
|
||||||
#define HALF_MANT_DIG 11
|
#define HALF_MANT_DIG 11
|
||||||
static float Ulp_Error_Half_Float( float test, double reference )
|
static float Ulp_Error_Half_Float(float test, double reference)
|
||||||
{
|
{
|
||||||
union{ double d; uint64_t u; }u; u.d = reference;
|
union {
|
||||||
|
double d;
|
||||||
|
uint64_t u;
|
||||||
|
} u;
|
||||||
|
u.d = reference;
|
||||||
|
|
||||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
// Note: This function presumes that someone has already tested whether the
|
||||||
// rounded before calling this function. That test:
|
// result is correctly, rounded before calling this function. That test:
|
||||||
//
|
//
|
||||||
// if( (float) reference == test )
|
// if( (float) reference == test )
|
||||||
// return 0.0f;
|
// return 0.0f;
|
||||||
//
|
//
|
||||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
||||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
||||||
// results.
|
// are otherwise correctly rounded results.
|
||||||
|
|
||||||
double testVal = test;
|
double testVal = test;
|
||||||
|
|
||||||
@@ -342,23 +348,25 @@ static float Ulp_Error_Half_Float( float test, double reference )
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if( u.u & 0x000fffffffffffffULL )
|
if (u.u & 0x000fffffffffffffULL)
|
||||||
{ // Non-power of two and NaN
|
{ // Non-power of two and NaN
|
||||||
if( isnan( reference ) && isnan( test ) )
|
if (isnan(reference) && isnan(test))
|
||||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||||
|
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference), HALF_MIN_EXP-1 );
|
int ulp_exp =
|
||||||
|
HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1);
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
return (float) scalbn( testVal - reference, ulp_exp );
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// reference is a normal power of two or a zero
|
// reference is a normal power of two or a zero
|
||||||
int ulp_exp = HALF_MANT_DIG - 1 - MAX( ilogb( reference) - 1, HALF_MIN_EXP-1 );
|
int ulp_exp =
|
||||||
|
HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1);
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
return (float) scalbn( testVal - reference, ulp_exp );
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
float Ulp_Error_Half(cl_half test, float reference)
|
float Ulp_Error_Half(cl_half test, float reference)
|
||||||
@@ -367,331 +375,309 @@ float Ulp_Error_Half(cl_half test, float reference)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float Ulp_Error( float test, double reference )
|
float Ulp_Error(float test, double reference)
|
||||||
{
|
{
|
||||||
union{ double d; uint64_t u; }u; u.d = reference;
|
union {
|
||||||
|
double d;
|
||||||
|
uint64_t u;
|
||||||
|
} u;
|
||||||
|
u.d = reference;
|
||||||
double testVal = test;
|
double testVal = test;
|
||||||
|
|
||||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
// Note: This function presumes that someone has already tested whether the
|
||||||
// rounded before calling this function. That test:
|
// result is correctly, rounded before calling this function. That test:
|
||||||
//
|
//
|
||||||
// if( (float) reference == test )
|
// if( (float) reference == test )
|
||||||
// return 0.0f;
|
// return 0.0f;
|
||||||
//
|
//
|
||||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
||||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
||||||
// results.
|
// are otherwise correctly rounded results.
|
||||||
|
|
||||||
|
|
||||||
if( isinf( reference ) )
|
if (isinf(reference))
|
||||||
{
|
{
|
||||||
if( testVal == reference )
|
if (testVal == reference) return 0.0f;
|
||||||
return 0.0f;
|
|
||||||
|
|
||||||
return (float) (testVal - reference );
|
return (float)(testVal - reference);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( isinf( testVal) )
|
if (isinf(testVal))
|
||||||
{ // infinite test value, but finite (but possibly overflowing in float) reference.
|
{ // infinite test value, but finite (but possibly overflowing in float)
|
||||||
|
// reference.
|
||||||
//
|
//
|
||||||
// The function probably overflowed prematurely here. Formally, the spec says this is
|
// The function probably overflowed prematurely here. Formally, the spec
|
||||||
// an infinite ulp error and should not be tolerated. Unfortunately, this would mean
|
// says this is an infinite ulp error and should not be tolerated.
|
||||||
// that the internal precision of some half_pow implementations would have to be 29+ bits
|
// Unfortunately, this would mean that the internal precision of some
|
||||||
// at half_powr( 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
|
// half_pow implementations would have to be 29+ bits at half_powr(
|
||||||
// is not exactly 128.0. You might represent this for example as 4*(32 - ~2**-24), which
|
// 0x1.fffffep+31, 4) to correctly determine that 4*log2( 0x1.fffffep+31 )
|
||||||
// after rounding to single is 4*32 = 128, which will ultimately result in premature
|
// is not exactly 128.0. You might represent this for example as 4*(32 -
|
||||||
// overflow, even though a good faith representation would be correct to within 2**-29
|
// ~2**-24), which after rounding to single is 4*32 = 128, which will
|
||||||
// interally.
|
// ultimately result in premature overflow, even though a good faith
|
||||||
|
// representation would be correct to within 2**-29 interally.
|
||||||
|
|
||||||
// In the interest of not requiring the implementation go to extraordinary lengths to
|
// In the interest of not requiring the implementation go to
|
||||||
// deliver a half precision function, we allow premature overflow within the limit
|
// extraordinary lengths to deliver a half precision function, we allow
|
||||||
// of the allowed ulp error. Towards, that end, we "pretend" the test value is actually
|
// premature overflow within the limit of the allowed ulp error.
|
||||||
// 2**128, the next value that would appear in the number line if float had sufficient range.
|
// Towards, that end, we "pretend" the test value is actually 2**128,
|
||||||
testVal = copysign( MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal );
|
// the next value that would appear in the number line if float had
|
||||||
|
// sufficient range.
|
||||||
|
testVal = copysign(MAKE_HEX_DOUBLE(0x1.0p128, 0x1LL, 128), testVal);
|
||||||
|
|
||||||
// Note that the same hack may not work in long double, which is not guaranteed to have
|
// Note that the same hack may not work in long double, which is not
|
||||||
// more range than double. It is not clear that premature overflow should be tolerated for
|
// guaranteed to have more range than double. It is not clear that
|
||||||
// double.
|
// premature overflow should be tolerated for double.
|
||||||
}
|
}
|
||||||
|
|
||||||
if( u.u & 0x000fffffffffffffULL )
|
if (u.u & 0x000fffffffffffffULL)
|
||||||
{ // Non-power of two and NaN
|
{ // Non-power of two and NaN
|
||||||
if( isnan( reference ) && isnan( test ) )
|
if (isnan(reference) && isnan(test))
|
||||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||||
|
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference), FLT_MIN_EXP-1 );
|
int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1);
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
return (float) scalbn( testVal - reference, ulp_exp );
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// reference is a normal power of two or a zero
|
// reference is a normal power of two or a zero
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp = FLT_MANT_DIG - 1 - MAX( ilogb( reference) - 1, FLT_MIN_EXP-1 );
|
int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1);
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
return (float) scalbn( testVal - reference, ulp_exp );
|
return (float)scalbn(testVal - reference, ulp_exp);
|
||||||
}
|
}
|
||||||
|
|
||||||
float Ulp_Error_Double( double test, long double reference )
|
float Ulp_Error_Double(double test, long double reference)
|
||||||
{
|
{
|
||||||
// Deal with long double = double
|
// Deal with long double = double
|
||||||
// On most systems long double is a higher precision type than double. They provide either
|
// On most systems long double is a higher precision type than double. They
|
||||||
// a 80-bit or greater floating point type, or they provide a head-tail double double format.
|
// provide either a 80-bit or greater floating point type, or they provide a
|
||||||
// That is sufficient to represent the accuracy of a floating point result to many more bits
|
// head-tail double double format. That is sufficient to represent the
|
||||||
// than double and we can calculate sub-ulp errors. This is the standard system for which this
|
// accuracy of a floating point result to many more bits than double and we
|
||||||
|
// can calculate sub-ulp errors. This is the standard system for which this
|
||||||
// test suite is designed.
|
// test suite is designed.
|
||||||
//
|
//
|
||||||
// On some systems double and long double are the same thing. Then we run into a problem,
|
// On some systems double and long double are the same thing. Then we run
|
||||||
// because our representation of the infinitely precise result (passed in as reference above)
|
// into a problem, because our representation of the infinitely precise
|
||||||
// can be off by as much as a half double precision ulp itself. In this case, we inflate the
|
// result (passed in as reference above) can be off by as much as a half
|
||||||
// reported error by half an ulp to take this into account. A more correct and permanent fix
|
// double precision ulp itself. In this case, we inflate the reported error
|
||||||
// would be to undertake refactoring the reference code to return results in this format:
|
// by half an ulp to take this into account. A more correct and permanent
|
||||||
|
// fix would be to undertake refactoring the reference code to return
|
||||||
|
// results in this format:
|
||||||
//
|
//
|
||||||
// typedef struct DoubleReference
|
// typedef struct DoubleReference
|
||||||
// { // true value = correctlyRoundedResult + ulps * ulp(correctlyRoundedResult) (infinitely precise)
|
// {
|
||||||
// double correctlyRoundedResult; // as best we can
|
// // true value = correctlyRoundedResult + ulps *
|
||||||
// double ulps; // plus a fractional amount to account for the difference
|
// // ulp(correctlyRoundedResult) (infinitely precise)
|
||||||
// }DoubleReference; // between infinitely precise result and correctlyRoundedResult, in units of ulps.
|
// // as best we can:
|
||||||
|
// double correctlyRoundedResult;
|
||||||
|
// // plus a fractional amount to account for the difference
|
||||||
|
// // between infinitely precise result and correctlyRoundedResult,
|
||||||
|
// // in units of ulps:
|
||||||
|
// double ulps;
|
||||||
|
// } DoubleReference;
|
||||||
//
|
//
|
||||||
// This would provide a useful higher-than-double precision format for everyone that we can use,
|
// This would provide a useful higher-than-double precision format for
|
||||||
// and would solve a few problems with representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
// everyone that we can use, and would solve a few problems with
|
||||||
|
// representing absolute errors below DBL_MIN and over DBL_MAX for systems
|
||||||
// that use a head to tail double double for long double.
|
// that use a head to tail double double for long double.
|
||||||
|
|
||||||
// Note: This function presumes that someone has already tested whether the result is correctly,
|
// Note: This function presumes that someone has already tested whether the
|
||||||
// rounded before calling this function. That test:
|
// result is correctly, rounded before calling this function. That test:
|
||||||
//
|
//
|
||||||
// if( (float) reference == test )
|
// if( (float) reference == test )
|
||||||
// return 0.0f;
|
// return 0.0f;
|
||||||
//
|
//
|
||||||
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out before we get here.
|
// would ensure that cases like fabs(reference) > FLT_MAX are weeded out
|
||||||
// Otherwise, we'll return inf ulp error here, for what are otherwise correctly rounded
|
// before we get here. Otherwise, we'll return inf ulp error here, for what
|
||||||
// results.
|
// are otherwise correctly rounded results.
|
||||||
|
|
||||||
|
|
||||||
int x;
|
int x;
|
||||||
long double testVal = test;
|
long double testVal = test;
|
||||||
if( 0.5L != frexpl( reference, &x) )
|
if (0.5L != frexpl(reference, &x))
|
||||||
{ // Non-power of two and NaN
|
{ // Non-power of two and NaN
|
||||||
if( isinf( reference ) )
|
if (isinf(reference))
|
||||||
{
|
{
|
||||||
if( testVal == reference )
|
if (testVal == reference) return 0.0f;
|
||||||
return 0.0f;
|
|
||||||
|
|
||||||
return (float) ( testVal - reference );
|
return (float)(testVal - reference);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( isnan( reference ) && isnan( test ) )
|
if (isnan(reference) && isnan(test))
|
||||||
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
return 0.0f; // if we are expecting a NaN, any NaN is fine
|
||||||
|
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference), DBL_MIN_EXP-1 );
|
int ulp_exp =
|
||||||
|
DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
||||||
|
|
||||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
// account for rounding error in reference result on systems that do not
|
||||||
if( sizeof(long double) == sizeof( double ) )
|
// have a higher precision floating point type (see above)
|
||||||
result += copysignf( 0.5f, result);
|
if (sizeof(long double) == sizeof(double))
|
||||||
|
result += copysignf(0.5f, result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// reference is a normal power of two or a zero
|
// reference is a normal power of two or a zero
|
||||||
// The unbiased exponent of the ulp unit place
|
// The unbiased exponent of the ulp unit place
|
||||||
int ulp_exp = DBL_MANT_DIG - 1 - MAX( ilogbl( reference) - 1, DBL_MIN_EXP-1 );
|
int ulp_exp =
|
||||||
|
DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
|
||||||
|
|
||||||
// Scale the exponent of the error
|
// Scale the exponent of the error
|
||||||
float result = (float) scalbnl( testVal - reference, ulp_exp );
|
float result = (float)scalbnl(testVal - reference, ulp_exp);
|
||||||
|
|
||||||
// account for rounding error in reference result on systems that do not have a higher precision floating point type (see above)
|
// account for rounding error in reference result on systems that do not
|
||||||
if( sizeof(long double) == sizeof( double ) )
|
// have a higher precision floating point type (see above)
|
||||||
result += copysignf( 0.5f, result);
|
if (sizeof(long double) == sizeof(double))
|
||||||
|
result += copysignf(0.5f, result);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices, cl_device_id *device_list)
|
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
|
||||||
|
cl_device_id *device_list)
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
size_t size_ret;
|
size_t size_ret;
|
||||||
|
|
||||||
// Does the program object exist?
|
// Does the program object exist?
|
||||||
if (program != NULL) {
|
if (program != NULL)
|
||||||
|
{
|
||||||
|
|
||||||
// Was the number of devices given
|
// Was the number of devices given
|
||||||
if (num_devices == 0) {
|
if (num_devices == 0)
|
||||||
|
{
|
||||||
|
|
||||||
// If zero devices were specified then allocate and query the device list from the context
|
// If zero devices were specified then allocate and query the device
|
||||||
|
// list from the context
|
||||||
cl_context context;
|
cl_context context;
|
||||||
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT, sizeof(context), &context, NULL);
|
error = clGetProgramInfo(program, CL_PROGRAM_CONTEXT,
|
||||||
test_error( error, "Unable to query program's context" );
|
sizeof(context), &context, NULL);
|
||||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size_ret);
|
test_error(error, "Unable to query program's context");
|
||||||
test_error( error, "Unable to query context's device size" );
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
|
||||||
|
&size_ret);
|
||||||
|
test_error(error, "Unable to query context's device size");
|
||||||
num_devices = size_ret / sizeof(cl_device_id);
|
num_devices = size_ret / sizeof(cl_device_id);
|
||||||
device_list = (cl_device_id *) malloc(size_ret);
|
device_list = (cl_device_id *)malloc(size_ret);
|
||||||
if (device_list == NULL) {
|
if (device_list == NULL)
|
||||||
print_error( error, "malloc failed" );
|
{
|
||||||
|
print_error(error, "malloc failed");
|
||||||
return CL_OUT_OF_HOST_MEMORY;
|
return CL_OUT_OF_HOST_MEMORY;
|
||||||
}
|
}
|
||||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret, device_list, NULL);
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size_ret,
|
||||||
test_error( error, "Unable to query context's devices" );
|
device_list, NULL);
|
||||||
|
test_error(error, "Unable to query context's devices");
|
||||||
}
|
}
|
||||||
|
|
||||||
// For each device in the device_list
|
// For each device in the device_list
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
for (i = 0; i < num_devices; i++) {
|
for (i = 0; i < num_devices; i++)
|
||||||
|
{
|
||||||
|
|
||||||
// Get the build status
|
// Get the build status
|
||||||
cl_build_status build_status;
|
cl_build_status build_status;
|
||||||
error = clGetProgramBuildInfo(program,
|
error = clGetProgramBuildInfo(
|
||||||
device_list[i],
|
program, device_list[i], CL_PROGRAM_BUILD_STATUS,
|
||||||
CL_PROGRAM_BUILD_STATUS,
|
sizeof(build_status), &build_status, &size_ret);
|
||||||
sizeof(build_status),
|
test_error(error, "Unable to query build status");
|
||||||
&build_status,
|
|
||||||
|
// If the build failed then log the status, and allocate the build
|
||||||
|
// log, log it and free it
|
||||||
|
if (build_status != CL_BUILD_SUCCESS)
|
||||||
|
{
|
||||||
|
|
||||||
|
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n",
|
||||||
|
(int)build_status);
|
||||||
|
error = clGetProgramBuildInfo(program, device_list[i],
|
||||||
|
CL_PROGRAM_BUILD_LOG, 0, NULL,
|
||||||
&size_ret);
|
&size_ret);
|
||||||
test_error( error, "Unable to query build status" );
|
test_error(error, "Unable to query build log size");
|
||||||
|
char *build_log = (char *)malloc(size_ret);
|
||||||
// If the build failed then log the status, and allocate the build log, log it and free it
|
error = clGetProgramBuildInfo(program, device_list[i],
|
||||||
if (build_status != CL_BUILD_SUCCESS) {
|
CL_PROGRAM_BUILD_LOG, size_ret,
|
||||||
|
build_log, &size_ret);
|
||||||
log_error("ERROR: CL_PROGRAM_BUILD_STATUS=%d\n", (int) build_status);
|
test_error(error, "Unable to query build log");
|
||||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
|
|
||||||
test_error( error, "Unable to query build log size" );
|
|
||||||
char *build_log = (char *) malloc(size_ret);
|
|
||||||
error = clGetProgramBuildInfo(program, device_list[i], CL_PROGRAM_BUILD_LOG, size_ret, build_log, &size_ret);
|
|
||||||
test_error( error, "Unable to query build log" );
|
|
||||||
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
|
log_error("ERROR: CL_PROGRAM_BUILD_LOG:\n%s\n", build_log);
|
||||||
free(build_log);
|
free(build_log);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Was the number of devices given
|
// Was the number of devices given
|
||||||
if (num_devices == 0) {
|
if (num_devices == 0)
|
||||||
|
{
|
||||||
|
|
||||||
// If zero devices were specified then free the device list
|
// If zero devices were specified then free the device list
|
||||||
free(device_list);
|
free(device_list);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * subtests_requiring_opencl_1_2[] = {
|
const char *subtests_requiring_opencl_1_2[] = {
|
||||||
"device_partition_equally",
|
"device_partition_equally", "device_partition_by_counts",
|
||||||
"device_partition_by_counts",
|
|
||||||
"device_partition_by_affinity_domain_numa",
|
"device_partition_by_affinity_domain_numa",
|
||||||
"device_partition_by_affinity_domain_l4_cache",
|
"device_partition_by_affinity_domain_l4_cache",
|
||||||
"device_partition_by_affinity_domain_l3_cache",
|
"device_partition_by_affinity_domain_l3_cache",
|
||||||
"device_partition_by_affinity_domain_l2_cache",
|
"device_partition_by_affinity_domain_l2_cache",
|
||||||
"device_partition_by_affinity_domain_l1_cache",
|
"device_partition_by_affinity_domain_l1_cache",
|
||||||
"device_partition_by_affinity_domain_next_partitionable",
|
"device_partition_by_affinity_domain_next_partitionable",
|
||||||
"device_partition_all",
|
"device_partition_all", "buffer_fill_int", "buffer_fill_uint",
|
||||||
"buffer_fill_int",
|
"buffer_fill_short", "buffer_fill_ushort", "buffer_fill_char",
|
||||||
"buffer_fill_uint",
|
"buffer_fill_uchar", "buffer_fill_long", "buffer_fill_ulong",
|
||||||
"buffer_fill_short",
|
"buffer_fill_float", "buffer_fill_struct",
|
||||||
"buffer_fill_ushort",
|
"test_mem_host_write_only_buffer", "test_mem_host_write_only_subbuffer",
|
||||||
"buffer_fill_char",
|
"test_mem_host_no_access_buffer", "test_mem_host_no_access_subbuffer",
|
||||||
"buffer_fill_uchar",
|
"test_mem_host_read_only_image", "test_mem_host_write_only_image",
|
||||||
"buffer_fill_long",
|
|
||||||
"buffer_fill_ulong",
|
|
||||||
"buffer_fill_float",
|
|
||||||
"buffer_fill_struct",
|
|
||||||
"test_mem_host_write_only_buffer",
|
|
||||||
"test_mem_host_write_only_subbuffer",
|
|
||||||
"test_mem_host_no_access_buffer",
|
|
||||||
"test_mem_host_no_access_subbuffer",
|
|
||||||
"test_mem_host_read_only_image",
|
|
||||||
"test_mem_host_write_only_image",
|
|
||||||
"test_mem_host_no_access_image",
|
"test_mem_host_no_access_image",
|
||||||
// CL_MEM_HOST_{READ|WRITE}_ONLY api/
|
// CL_MEM_HOST_{READ|WRITE}_ONLY api/
|
||||||
"get_buffer_info",
|
"get_buffer_info", "get_image1d_info", "get_image1d_array_info",
|
||||||
"get_image1d_info",
|
|
||||||
"get_image1d_array_info",
|
|
||||||
"get_image2d_array_info",
|
"get_image2d_array_info",
|
||||||
// gl/
|
// gl/
|
||||||
"images_read_1D",
|
"images_read_1D", "images_write_1D", "images_1D_getinfo",
|
||||||
"images_write_1D",
|
"images_read_1Darray", "images_write_1Darray", "images_1Darray_getinfo",
|
||||||
"images_1D_getinfo",
|
"images_read_2Darray", "images_write_2Darray", "images_2Darray_getinfo",
|
||||||
"images_read_1Darray",
|
"buffer_migrate", "image_migrate",
|
||||||
"images_write_1Darray",
|
|
||||||
"images_1Darray_getinfo",
|
|
||||||
"images_read_2Darray",
|
|
||||||
"images_write_2Darray",
|
|
||||||
"images_2Darray_getinfo",
|
|
||||||
"buffer_migrate",
|
|
||||||
"image_migrate",
|
|
||||||
// compiler/
|
// compiler/
|
||||||
"load_program_source",
|
"load_program_source", "load_multistring_source", "load_two_kernel_source",
|
||||||
"load_multistring_source",
|
"load_null_terminated_source", "load_null_terminated_multi_line_source",
|
||||||
"load_two_kernel_source",
|
|
||||||
"load_null_terminated_source",
|
|
||||||
"load_null_terminated_multi_line_source",
|
|
||||||
"load_null_terminated_partial_multi_line_source",
|
"load_null_terminated_partial_multi_line_source",
|
||||||
"load_discreet_length_source",
|
"load_discreet_length_source", "get_program_source",
|
||||||
"get_program_source",
|
"get_program_build_info", "get_program_info", "large_compile",
|
||||||
"get_program_build_info",
|
"async_build", "options_build_optimizations", "options_build_macro",
|
||||||
"get_program_info",
|
"options_build_macro_existence", "options_include_directory",
|
||||||
"large_compile",
|
"options_denorm_cache", "preprocessor_define_udef", "preprocessor_include",
|
||||||
"async_build",
|
"preprocessor_line_error", "preprocessor_pragma",
|
||||||
"options_build_optimizations",
|
"compiler_defines_for_extensions", "image_macro", "simple_compile_only",
|
||||||
"options_build_macro",
|
"simple_static_compile_only", "simple_extern_compile_only",
|
||||||
"options_build_macro_existence",
|
"simple_compile_with_callback", "simple_embedded_header_compile",
|
||||||
"options_include_directory",
|
"simple_link_only", "two_file_regular_variable_access",
|
||||||
"options_denorm_cache",
|
"two_file_regular_struct_access", "two_file_regular_function_access",
|
||||||
"preprocessor_define_udef",
|
"simple_link_with_callback", "simple_embedded_header_link",
|
||||||
"preprocessor_include",
|
|
||||||
"preprocessor_line_error",
|
|
||||||
"preprocessor_pragma",
|
|
||||||
"compiler_defines_for_extensions",
|
|
||||||
"image_macro",
|
|
||||||
"simple_compile_only",
|
|
||||||
"simple_static_compile_only",
|
|
||||||
"simple_extern_compile_only",
|
|
||||||
"simple_compile_with_callback",
|
|
||||||
"simple_embedded_header_compile",
|
|
||||||
"simple_link_only",
|
|
||||||
"two_file_regular_variable_access",
|
|
||||||
"two_file_regular_struct_access",
|
|
||||||
"two_file_regular_function_access",
|
|
||||||
"simple_link_with_callback",
|
|
||||||
"simple_embedded_header_link",
|
|
||||||
"execute_after_simple_compile_and_link",
|
"execute_after_simple_compile_and_link",
|
||||||
"execute_after_simple_compile_and_link_no_device_info",
|
"execute_after_simple_compile_and_link_no_device_info",
|
||||||
"execute_after_simple_compile_and_link_with_defines",
|
"execute_after_simple_compile_and_link_with_defines",
|
||||||
"execute_after_simple_compile_and_link_with_callbacks",
|
"execute_after_simple_compile_and_link_with_callbacks",
|
||||||
"execute_after_simple_library_with_link",
|
"execute_after_simple_library_with_link", "execute_after_two_file_link",
|
||||||
"execute_after_two_file_link",
|
"execute_after_two_file_link", "execute_after_embedded_header_link",
|
||||||
"execute_after_two_file_link",
|
|
||||||
"execute_after_embedded_header_link",
|
|
||||||
"execute_after_included_header_link",
|
"execute_after_included_header_link",
|
||||||
"execute_after_serialize_reload_object",
|
"execute_after_serialize_reload_object",
|
||||||
"execute_after_serialize_reload_library",
|
"execute_after_serialize_reload_library", "simple_library_only",
|
||||||
"simple_library_only",
|
"simple_library_with_callback", "simple_library_with_link", "two_file_link",
|
||||||
"simple_library_with_callback",
|
"multi_file_libraries", "multiple_files", "multiple_libraries",
|
||||||
"simple_library_with_link",
|
"multiple_files_multiple_libraries", "multiple_embedded_headers",
|
||||||
"two_file_link",
|
"program_binary_type", "compile_and_link_status_options_log",
|
||||||
"multi_file_libraries",
|
|
||||||
"multiple_files",
|
|
||||||
"multiple_libraries",
|
|
||||||
"multiple_files_multiple_libraries",
|
|
||||||
"multiple_embedded_headers",
|
|
||||||
"program_binary_type",
|
|
||||||
"compile_and_link_status_options_log",
|
|
||||||
// CL_PROGRAM_NUM_KERNELS, in api/
|
// CL_PROGRAM_NUM_KERNELS, in api/
|
||||||
"get_kernel_arg_info",
|
"get_kernel_arg_info", "create_kernels_in_program",
|
||||||
"create_kernels_in_program",
|
|
||||||
// clEnqueue..WithWaitList, in events/
|
// clEnqueue..WithWaitList, in events/
|
||||||
"event_enqueue_marker_with_event_list",
|
"event_enqueue_marker_with_event_list",
|
||||||
"event_enqueue_barrier_with_event_list",
|
"event_enqueue_barrier_with_event_list", "popcount"
|
||||||
"popcount"
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *subtests_to_skip_with_offline_compiler[] = {
|
const char *subtests_to_skip_with_offline_compiler[] = {
|
||||||
@@ -754,14 +740,18 @@ const char *subtests_to_skip_with_offline_compiler[] = {
|
|||||||
"async_build",
|
"async_build",
|
||||||
};
|
};
|
||||||
|
|
||||||
int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device)
|
int check_functions_for_offline_compiler(const char *subtestname,
|
||||||
|
cl_device_id device)
|
||||||
{
|
{
|
||||||
if (gCompilationMode != kOnline)
|
if (gCompilationMode != kOnline)
|
||||||
{
|
{
|
||||||
int nNotRequiredWithOfflineCompiler = sizeof(subtests_to_skip_with_offline_compiler)/sizeof(char *);
|
int nNotRequiredWithOfflineCompiler =
|
||||||
|
sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
|
||||||
size_t i;
|
size_t i;
|
||||||
for(i=0; i < nNotRequiredWithOfflineCompiler; ++i) {
|
for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
|
||||||
if(!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i])) {
|
{
|
||||||
|
if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i]))
|
||||||
|
{
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,10 +32,14 @@
|
|||||||
#define log_info printf
|
#define log_info printf
|
||||||
#define log_error printf
|
#define log_error printf
|
||||||
#define log_missing_feature printf
|
#define log_missing_feature printf
|
||||||
#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
#define log_perf(_number, _higherBetter, _numType, _format, ...) \
|
||||||
_higherBetter?"higher is better":"lower is better", _number )
|
printf("Performance Number " _format " (in %s, %s): %g\n", ##__VA_ARGS__, \
|
||||||
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, \
|
_numType, _higherBetter ? "higher is better" : "lower is better", \
|
||||||
_higherBetter?"higher is better":"lower is better" , _number)
|
_number)
|
||||||
|
#define vlog_perf(_number, _higherBetter, _numType, _format, ...) \
|
||||||
|
printf("Performance Number " _format " (in %s, %s): %g\n", ##__VA_ARGS__, \
|
||||||
|
_numType, _higherBetter ? "higher is better" : "lower is better", \
|
||||||
|
_number)
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#ifdef __MINGW32__
|
#ifdef __MINGW32__
|
||||||
// Use __mingw_printf since it supports "%a" format specifier
|
// Use __mingw_printf since it supports "%a" format specifier
|
||||||
@@ -54,14 +58,15 @@ static int vlog_win32(const char *format, ...);
|
|||||||
|
|
||||||
#define ct_assert(b) ct_assert_i(b, __LINE__)
|
#define ct_assert(b) ct_assert_i(b, __LINE__)
|
||||||
#define ct_assert_i(b, line) ct_assert_ii(b, line)
|
#define ct_assert_i(b, line) ct_assert_ii(b, line)
|
||||||
#define ct_assert_ii(b, line) int _compile_time_assertion_on_line_##line[b ? 1 : -1];
|
#define ct_assert_ii(b, line) \
|
||||||
|
int _compile_time_assertion_on_line_##line[b ? 1 : -1];
|
||||||
|
|
||||||
#define test_fail(msg, ...) \
|
#define test_fail(msg, ...) \
|
||||||
{ \
|
{ \
|
||||||
log_error(msg, ##__VA_ARGS__); \
|
log_error(msg, ##__VA_ARGS__); \
|
||||||
return TEST_FAIL; \
|
return TEST_FAIL; \
|
||||||
}
|
}
|
||||||
#define test_error(errCode,msg) test_error_ret(errCode,msg,errCode)
|
#define test_error(errCode, msg) test_error_ret(errCode, msg, errCode)
|
||||||
#define test_error_ret(errCode, msg, retValue) \
|
#define test_error_ret(errCode, msg, retValue) \
|
||||||
{ \
|
{ \
|
||||||
auto errCodeResult = errCode; \
|
auto errCodeResult = errCode; \
|
||||||
@@ -71,26 +76,73 @@ static int vlog_win32(const char *format, ...);
|
|||||||
return retValue; \
|
return retValue; \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
#define print_error(errCode,msg) log_error( "ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString( errCode ), __FILE__, __LINE__ );
|
#define print_error(errCode, msg) \
|
||||||
|
log_error("ERROR: %s! (%s from %s:%d)\n", msg, IGetErrorString(errCode), \
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
|
||||||
#define test_missing_feature(errCode, msg) test_missing_feature_ret(errCode, msg, errCode)
|
#define test_missing_feature(errCode, msg) \
|
||||||
|
test_missing_feature_ret(errCode, msg, errCode)
|
||||||
// this macro should always return CL_SUCCESS, but print the missing feature
|
// this macro should always return CL_SUCCESS, but print the missing feature
|
||||||
// message
|
// message
|
||||||
#define test_missing_feature_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { print_missing_feature( errCode, msg ); return CL_SUCCESS ; } }
|
#define test_missing_feature_ret(errCode, msg, retValue) \
|
||||||
#define print_missing_feature(errCode, msg) log_missing_feature("ERROR: Subtest %s tests a feature not supported by the device version! (from %s:%d)\n", msg, __FILE__, __LINE__ );
|
{ \
|
||||||
|
if (errCode != CL_SUCCESS) \
|
||||||
|
{ \
|
||||||
|
print_missing_feature(errCode, msg); \
|
||||||
|
return CL_SUCCESS; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define print_missing_feature(errCode, msg) \
|
||||||
|
log_missing_feature("ERROR: Subtest %s tests a feature not supported by " \
|
||||||
|
"the device version! (from %s:%d)\n", \
|
||||||
|
msg, __FILE__, __LINE__);
|
||||||
|
|
||||||
#define test_missing_support_offline_cmpiler(errCode, msg) test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
|
#define test_missing_support_offline_cmpiler(errCode, msg) \
|
||||||
|
test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
|
||||||
// this macro should always return CL_SUCCESS, but print the skip message on
|
// this macro should always return CL_SUCCESS, but print the skip message on
|
||||||
// test not supported with offline compiler
|
// test not supported with offline compiler
|
||||||
#define test_missing_support_offline_cmpiler_ret(errCode,msg,retValue) { if( errCode != CL_SUCCESS ) { log_info( "INFO: Subtest %s tests is not supported in offline compiler execution path! (from %s:%d)\n", msg, __FILE__, __LINE__ ); return TEST_SKIP ; } }
|
#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue) \
|
||||||
|
{ \
|
||||||
|
if (errCode != CL_SUCCESS) \
|
||||||
|
{ \
|
||||||
|
log_info("INFO: Subtest %s tests is not supported in offline " \
|
||||||
|
"compiler execution path! (from %s:%d)\n", \
|
||||||
|
msg, __FILE__, __LINE__); \
|
||||||
|
return TEST_SKIP; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
// expected error code vs. what we got
|
// expected error code vs. what we got
|
||||||
#define test_failure_error(errCode, expectedErrCode, msg) test_failure_error_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
#define test_failure_error(errCode, expectedErrCode, msg) \
|
||||||
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_error( errCode, expectedErrCode, msg ); return retValue ; } }
|
test_failure_error_ret(errCode, expectedErrCode, msg, \
|
||||||
#define print_failure_error(errCode, expectedErrCode, msg) log_error( "ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
errCode != expectedErrCode)
|
||||||
#define test_failure_warning(errCode, expectedErrCode, msg) test_failure_warning_ret(errCode, expectedErrCode, msg, errCode != expectedErrCode)
|
#define test_failure_error_ret(errCode, expectedErrCode, msg, retValue) \
|
||||||
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) { if( errCode != expectedErrCode ) { print_failure_warning( errCode, expectedErrCode, msg ); warnings++ ; } }
|
{ \
|
||||||
#define print_failure_warning(errCode, expectedErrCode, msg) log_error( "WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, IGetErrorString( errCode ), IGetErrorString( expectedErrCode ), __FILE__, __LINE__ );
|
if (errCode != expectedErrCode) \
|
||||||
|
{ \
|
||||||
|
print_failure_error(errCode, expectedErrCode, msg); \
|
||||||
|
return retValue; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define print_failure_error(errCode, expectedErrCode, msg) \
|
||||||
|
log_error("ERROR: %s! (Got %s, expected %s from %s:%d)\n", msg, \
|
||||||
|
IGetErrorString(errCode), IGetErrorString(expectedErrCode), \
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
#define test_failure_warning(errCode, expectedErrCode, msg) \
|
||||||
|
test_failure_warning_ret(errCode, expectedErrCode, msg, \
|
||||||
|
errCode != expectedErrCode)
|
||||||
|
#define test_failure_warning_ret(errCode, expectedErrCode, msg, retValue) \
|
||||||
|
{ \
|
||||||
|
if (errCode != expectedErrCode) \
|
||||||
|
{ \
|
||||||
|
print_failure_warning(errCode, expectedErrCode, msg); \
|
||||||
|
warnings++; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define print_failure_warning(errCode, expectedErrCode, msg) \
|
||||||
|
log_error("WARNING: %s! (Got %s, expected %s from %s:%d)\n", msg, \
|
||||||
|
IGetErrorString(errCode), IGetErrorString(expectedErrCode), \
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
|
||||||
// generate an error when an assertion is false (not error code related)
|
// generate an error when an assertion is false (not error code related)
|
||||||
#define test_assert_error(condition, msg) \
|
#define test_assert_error(condition, msg) \
|
||||||
@@ -120,27 +172,31 @@ static int vlog_win32(const char *format, ...);
|
|||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
extern const char *IGetErrorString( int clErrorCode );
|
extern const char *IGetErrorString(int clErrorCode);
|
||||||
|
|
||||||
extern float Ulp_Error_Half(cl_half test, float reference);
|
extern float Ulp_Error_Half(cl_half test, float reference);
|
||||||
extern float Ulp_Error(float test, double reference);
|
extern float Ulp_Error(float test, double reference);
|
||||||
extern float Ulp_Error_Double(double test, long double reference);
|
extern float Ulp_Error_Double(double test, long double reference);
|
||||||
|
|
||||||
extern const char *GetChannelTypeName( cl_channel_type type );
|
extern const char *GetChannelTypeName(cl_channel_type type);
|
||||||
extern int IsChannelTypeSupported( cl_channel_type type );
|
extern int IsChannelTypeSupported(cl_channel_type type);
|
||||||
extern const char *GetChannelOrderName( cl_channel_order order );
|
extern const char *GetChannelOrderName(cl_channel_order order);
|
||||||
extern int IsChannelOrderSupported( cl_channel_order order );
|
extern int IsChannelOrderSupported(cl_channel_order order);
|
||||||
extern const char *GetAddressModeName( cl_addressing_mode mode );
|
extern const char *GetAddressModeName(cl_addressing_mode mode);
|
||||||
extern const char *GetQueuePropertyName(cl_command_queue_properties properties);
|
extern const char *GetQueuePropertyName(cl_command_queue_properties properties);
|
||||||
|
|
||||||
extern const char *GetDeviceTypeName( cl_device_type type );
|
extern const char *GetDeviceTypeName(cl_device_type type);
|
||||||
int check_functions_for_offline_compiler(const char *subtestname, cl_device_id device);
|
int check_functions_for_offline_compiler(const char *subtestname,
|
||||||
|
cl_device_id device);
|
||||||
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
|
cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
|
||||||
cl_device_id *device_list);
|
cl_device_id *device_list);
|
||||||
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static storage, but it's not reentrant then!)
|
|
||||||
extern const char *GetDataVectorString( void *dataBuffer, size_t typeSize, size_t vecSize, char *buffer );
|
|
||||||
|
|
||||||
#if defined (_WIN32) && !defined(__MINGW32__)
|
// NON-REENTRANT UNLESS YOU PROVIDE A BUFFER PTR (pass null to use static
|
||||||
|
// storage, but it's not reentrant then!)
|
||||||
|
extern const char *GetDataVectorString(void *dataBuffer, size_t typeSize,
|
||||||
|
size_t vecSize, char *buffer);
|
||||||
|
|
||||||
|
#if defined(_WIN32) && !defined(__MINGW32__)
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -148,17 +204,21 @@ static int vlog_win32(const char *format, ...)
|
|||||||
{
|
{
|
||||||
const char *new_format = format;
|
const char *new_format = format;
|
||||||
|
|
||||||
if (strstr(format, "%a")) {
|
if (strstr(format, "%a"))
|
||||||
|
{
|
||||||
char *temp;
|
char *temp;
|
||||||
if ((temp = strdup(format)) == NULL) {
|
if ((temp = strdup(format)) == NULL)
|
||||||
|
{
|
||||||
printf("vlog_win32: Failed to allocate memory for strdup\n");
|
printf("vlog_win32: Failed to allocate memory for strdup\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
new_format = temp;
|
new_format = temp;
|
||||||
while (*temp) {
|
while (*temp)
|
||||||
|
{
|
||||||
// replace %a with %f
|
// replace %a with %f
|
||||||
if ((*temp == '%') && (*(temp+1) == 'a')) {
|
if ((*temp == '%') && (*(temp + 1) == 'a'))
|
||||||
*(temp+1) = 'f';
|
{
|
||||||
|
*(temp + 1) = 'f';
|
||||||
}
|
}
|
||||||
temp++;
|
temp++;
|
||||||
}
|
}
|
||||||
@@ -169,8 +229,9 @@ static int vlog_win32(const char *format, ...)
|
|||||||
vprintf(new_format, args);
|
vprintf(new_format, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
|
|
||||||
if (new_format != format) {
|
if (new_format != format)
|
||||||
free((void*)new_format);
|
{
|
||||||
|
free((void *)new_format);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -179,5 +240,3 @@ static int vlog_win32(const char *format, ...)
|
|||||||
|
|
||||||
|
|
||||||
#endif // _errorHelpers_h
|
#endif // _errorHelpers_h
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,89 +16,99 @@
|
|||||||
#ifndef _fpcontrol_h
|
#ifndef _fpcontrol_h
|
||||||
#define _fpcontrol_h
|
#define _fpcontrol_h
|
||||||
|
|
||||||
// In order to get tests for correctly rounded operations (e.g. multiply) to work properly we need to be able to set the reference hardware
|
// In order to get tests for correctly rounded operations (e.g. multiply) to
|
||||||
// to FTZ mode if the device hardware is running in that mode. We have explored all other options short of writing correctly rounded operations
|
// work properly we need to be able to set the reference hardware to FTZ mode if
|
||||||
// in integer code, and have found this is the only way to correctly verify operation.
|
// the device hardware is running in that mode. We have explored all other
|
||||||
|
// options short of writing correctly rounded operations in integer code, and
|
||||||
|
// have found this is the only way to correctly verify operation.
|
||||||
//
|
//
|
||||||
// Non-Apple implementations will need to provide their own implentation for these features. If the reference hardware and device are both
|
// Non-Apple implementations will need to provide their own implentation for
|
||||||
// running in the same state (either FTZ or IEEE compliant modes) then these functions may be empty. If the device is running in non-default
|
// these features. If the reference hardware and device are both running in the
|
||||||
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
// same state (either FTZ or IEEE compliant modes) then these functions may be
|
||||||
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
// empty. If the device is running in non-default rounding mode (e.g. round
|
||||||
typedef int FPU_mode_type;
|
// toward zero), then these functions should also set the reference device into
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
|
// that rounding mode.
|
||||||
#include <xmmintrin.h>
|
#if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__) \
|
||||||
#elif defined( __PPC__ )
|
|| defined(__MINGW32__)
|
||||||
#include <fpu_control.h>
|
typedef int FPU_mode_type;
|
||||||
extern __thread fpu_control_t fpu_control;
|
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||||
|
|| defined(__MINGW32__)
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#elif defined(__PPC__)
|
||||||
|
#include <fpu_control.h>
|
||||||
|
extern __thread fpu_control_t fpu_control;
|
||||||
#endif
|
#endif
|
||||||
// Set the reference hardware floating point unit to FTZ mode
|
// Set the reference hardware floating point unit to FTZ mode
|
||||||
static inline void ForceFTZ( FPU_mode_type *mode )
|
static inline void ForceFTZ(FPU_mode_type *mode)
|
||||||
{
|
{
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||||
|
|| defined(__MINGW32__)
|
||||||
*mode = _mm_getcsr();
|
*mode = _mm_getcsr();
|
||||||
_mm_setcsr( *mode | 0x8040);
|
_mm_setcsr(*mode | 0x8040);
|
||||||
#elif defined( __PPC__ )
|
#elif defined(__PPC__)
|
||||||
*mode = fpu_control;
|
*mode = fpu_control;
|
||||||
fpu_control |= _FPU_MASK_NI;
|
fpu_control |= _FPU_MASK_NI;
|
||||||
#elif defined ( __arm__ )
|
#elif defined(__arm__)
|
||||||
unsigned fpscr;
|
unsigned fpscr;
|
||||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*mode = fpscr;
|
||||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
|
||||||
// Add 64 bit support
|
// Add 64 bit support
|
||||||
#elif defined (__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
unsigned fpscr;
|
unsigned fpscr;
|
||||||
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
|
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*mode = fpscr;
|
||||||
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr | (1U << 24)));
|
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
|
||||||
#else
|
#else
|
||||||
#error ForceFTZ needs an implentation
|
#error ForceFTZ needs an implentation
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable the denorm flush to zero
|
// Disable the denorm flush to zero
|
||||||
static inline void DisableFTZ( FPU_mode_type *mode )
|
static inline void DisableFTZ(FPU_mode_type *mode)
|
||||||
{
|
{
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||||
|
|| defined(__MINGW32__)
|
||||||
*mode = _mm_getcsr();
|
*mode = _mm_getcsr();
|
||||||
_mm_setcsr( *mode & ~0x8040);
|
_mm_setcsr(*mode & ~0x8040);
|
||||||
#elif defined( __PPC__ )
|
#elif defined(__PPC__)
|
||||||
*mode = fpu_control;
|
*mode = fpu_control;
|
||||||
fpu_control &= ~_FPU_MASK_NI;
|
fpu_control &= ~_FPU_MASK_NI;
|
||||||
#elif defined ( __arm__ )
|
#elif defined(__arm__)
|
||||||
unsigned fpscr;
|
unsigned fpscr;
|
||||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
__asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*mode = fpscr;
|
||||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
__asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||||
// Add 64 bit support
|
// Add 64 bit support
|
||||||
#elif defined (__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
unsigned fpscr;
|
unsigned fpscr;
|
||||||
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpscr));
|
__asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*mode = fpscr;
|
||||||
__asm__ volatile ("msr fpcr, %0" :: "r"(fpscr & ~(1U << 24)));
|
__asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
|
||||||
#else
|
#else
|
||||||
#error DisableFTZ needs an implentation
|
#error DisableFTZ needs an implentation
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore the reference hardware to floating point state indicated by *mode
|
// Restore the reference hardware to floating point state indicated by *mode
|
||||||
static inline void RestoreFPState( FPU_mode_type *mode )
|
static inline void RestoreFPState(FPU_mode_type *mode)
|
||||||
{
|
{
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined (__MINGW32__)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER) \
|
||||||
_mm_setcsr( *mode );
|
|| defined(__MINGW32__)
|
||||||
#elif defined( __PPC__)
|
_mm_setcsr(*mode);
|
||||||
|
#elif defined(__PPC__)
|
||||||
fpu_control = *mode;
|
fpu_control = *mode;
|
||||||
#elif defined (__arm__)
|
#elif defined(__arm__)
|
||||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
__asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
|
||||||
// Add 64 bit support
|
// Add 64 bit support
|
||||||
#elif defined (__aarch64__)
|
#elif defined(__aarch64__)
|
||||||
__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
|
__asm__ volatile("msr fpcr, %0" ::"r"(*mode));
|
||||||
#else
|
#else
|
||||||
#error RestoreFPState needs an implementation
|
#error RestoreFPState needs an implementation
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#error ForceFTZ and RestoreFPState need implentations
|
#error ForceFTZ and RestoreFPState need implentations
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -21,33 +21,34 @@
|
|||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void * genericThread::IStaticReflector( void * data )
|
void *genericThread::IStaticReflector(void *data)
|
||||||
{
|
{
|
||||||
genericThread *t = (genericThread *)data;
|
genericThread *t = (genericThread *)data;
|
||||||
return t->IRun();
|
return t->IRun();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool genericThread::Start( void )
|
bool genericThread::Start(void)
|
||||||
{
|
{
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
mHandle = CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) IStaticReflector, this, 0, NULL );
|
mHandle = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)IStaticReflector,
|
||||||
return ( mHandle != NULL );
|
this, 0, NULL);
|
||||||
|
return (mHandle != NULL);
|
||||||
#else // !_WIN32
|
#else // !_WIN32
|
||||||
int error = pthread_create( (pthread_t*)&mHandle, NULL, IStaticReflector, (void *)this );
|
int error = pthread_create((pthread_t *)&mHandle, NULL, IStaticReflector,
|
||||||
return ( error == 0 );
|
(void *)this);
|
||||||
|
return (error == 0);
|
||||||
#endif // !_WIN32
|
#endif // !_WIN32
|
||||||
}
|
}
|
||||||
|
|
||||||
void * genericThread::Join( void )
|
void *genericThread::Join(void)
|
||||||
{
|
{
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
WaitForSingleObject( (HANDLE)mHandle, INFINITE );
|
WaitForSingleObject((HANDLE)mHandle, INFINITE);
|
||||||
return NULL;
|
return NULL;
|
||||||
#else // !_WIN32
|
#else // !_WIN32
|
||||||
void * retVal;
|
void *retVal;
|
||||||
int error = pthread_join( (pthread_t)mHandle, &retVal );
|
int error = pthread_join((pthread_t)mHandle, &retVal);
|
||||||
if( error != 0 )
|
if (error != 0) retVal = NULL;
|
||||||
retVal = NULL;
|
|
||||||
return retVal;
|
return retVal;
|
||||||
#endif // !_WIN32
|
#endif // !_WIN32
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,25 +18,20 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
class genericThread
|
class genericThread {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
|
|
||||||
virtual ~genericThread() {}
|
virtual ~genericThread() {}
|
||||||
|
|
||||||
bool Start( void );
|
bool Start(void);
|
||||||
void * Join( void );
|
void* Join(void);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
virtual void* IRun(void) = 0;
|
||||||
virtual void * IRun( void ) = 0;
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
|
private:
|
||||||
void* mHandle;
|
void* mHandle;
|
||||||
|
|
||||||
static void * IStaticReflector( void * data );
|
static void* IStaticReflector(void* data);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _genericThread_h
|
#endif // _genericThread_h
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -46,7 +46,8 @@
|
|||||||
extern cl_device_type gDeviceType;
|
extern cl_device_type gDeviceType;
|
||||||
extern bool gTestRounding;
|
extern bool gTestRounding;
|
||||||
|
|
||||||
// Number of iterations per image format to test if not testing max images, rounding, or small images
|
// Number of iterations per image format to test if not testing max images,
|
||||||
|
// rounding, or small images
|
||||||
#define NUM_IMAGE_ITERATIONS 3
|
#define NUM_IMAGE_ITERATIONS 3
|
||||||
|
|
||||||
|
|
||||||
@@ -55,51 +56,64 @@ extern bool gTestRounding;
|
|||||||
#define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
|
#define MAX_lRGB_TO_sRGB_CONVERSION_ERROR 0.6
|
||||||
|
|
||||||
// Definition for our own sampler type, to mirror the cl_sampler internals
|
// Definition for our own sampler type, to mirror the cl_sampler internals
|
||||||
typedef struct {
|
typedef struct
|
||||||
|
{
|
||||||
cl_addressing_mode addressing_mode;
|
cl_addressing_mode addressing_mode;
|
||||||
cl_filter_mode filter_mode;
|
cl_filter_mode filter_mode;
|
||||||
bool normalized_coords;
|
bool normalized_coords;
|
||||||
} image_sampler_data;
|
} image_sampler_data;
|
||||||
|
|
||||||
int round_to_even( float v );
|
int round_to_even(float v);
|
||||||
|
|
||||||
#define NORMALIZE( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : round_to_even( v * max ) ) )
|
#define NORMALIZE(v, max) (v < 0 ? 0 : (v > 1.f ? max : round_to_even(v * max)))
|
||||||
#define NORMALIZE_UNROUNDED( v, max ) ( v < 0 ? 0 : ( v > 1.f ? max : v * max ) )
|
#define NORMALIZE_UNROUNDED(v, max) (v < 0 ? 0 : (v > 1.f ? max : v * max))
|
||||||
#define NORMALIZE_SIGNED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : round_to_even( v * max ) ) )
|
#define NORMALIZE_SIGNED(v, min, max) \
|
||||||
#define NORMALIZE_SIGNED_UNROUNDED( v, min, max ) ( v < -1.0f ? min : ( v > 1.f ? max : v * max ) )
|
(v < -1.0f ? min : (v > 1.f ? max : round_to_even(v * max)))
|
||||||
#define CONVERT_INT( v, min, max, max_val) ( v < min ? min : ( v > max ? max_val : round_to_even( v ) ) )
|
#define NORMALIZE_SIGNED_UNROUNDED(v, min, max) \
|
||||||
#define CONVERT_UINT( v, max, max_val) ( v < 0 ? 0 : ( v > max ? max_val : round_to_even( v ) ) )
|
(v < -1.0f ? min : (v > 1.f ? max : v * max))
|
||||||
|
#define CONVERT_INT(v, min, max, max_val) \
|
||||||
|
(v < min ? min : (v > max ? max_val : round_to_even(v)))
|
||||||
|
#define CONVERT_UINT(v, max, max_val) \
|
||||||
|
(v < 0 ? 0 : (v > max ? max_val : round_to_even(v)))
|
||||||
|
|
||||||
extern void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err = false, int t = 0 );
|
extern void print_read_header(cl_image_format *format,
|
||||||
extern void print_write_header( cl_image_format *format, bool err);
|
image_sampler_data *sampler, bool err = false,
|
||||||
extern void print_header( cl_image_format *format, bool err );
|
int t = 0);
|
||||||
extern bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind );
|
extern void print_write_header(cl_image_format *format, bool err);
|
||||||
extern bool is_image_format_required(cl_image_format format,
|
extern void print_header(cl_image_format *format, bool err);
|
||||||
cl_mem_flags flags,
|
extern bool find_format(cl_image_format *formatList, unsigned int numFormats,
|
||||||
|
cl_image_format *formatToFind);
|
||||||
|
extern bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
|
||||||
cl_mem_object_type image_type,
|
cl_mem_object_type image_type,
|
||||||
cl_device_id device);
|
cl_device_id device);
|
||||||
extern void build_required_image_formats(cl_mem_flags flags,
|
extern void
|
||||||
cl_mem_object_type image_type,
|
build_required_image_formats(cl_mem_flags flags, cl_mem_object_type image_type,
|
||||||
cl_device_id device,
|
cl_device_id device,
|
||||||
std::vector<cl_image_format>& formatsToSupport);
|
std::vector<cl_image_format> &formatsToSupport);
|
||||||
|
|
||||||
extern uint32_t get_format_type_size(const cl_image_format *format);
|
extern uint32_t get_format_type_size(const cl_image_format *format);
|
||||||
extern uint32_t get_channel_data_type_size(cl_channel_type channelType);
|
extern uint32_t get_channel_data_type_size(cl_channel_type channelType);
|
||||||
extern uint32_t get_format_channel_count(const cl_image_format *format);
|
extern uint32_t get_format_channel_count(const cl_image_format *format);
|
||||||
extern uint32_t get_channel_order_channel_count(cl_channel_order order);
|
extern uint32_t get_channel_order_channel_count(cl_channel_order order);
|
||||||
cl_channel_type get_channel_type_from_name( const char *name );
|
cl_channel_type get_channel_type_from_name(const char *name);
|
||||||
cl_channel_order get_channel_order_from_name( const char *name );
|
cl_channel_order get_channel_order_from_name(const char *name);
|
||||||
extern int is_format_signed( const cl_image_format *format );
|
extern int is_format_signed(const cl_image_format *format);
|
||||||
extern uint32_t get_pixel_size(cl_image_format *format);
|
extern uint32_t get_pixel_size(cl_image_format *format);
|
||||||
|
|
||||||
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
|
/* Helper to get any ol image format as long as it is 8-bits-per-channel */
|
||||||
extern int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
extern int get_8_bit_image_format(cl_context context,
|
||||||
|
cl_mem_object_type objType,
|
||||||
|
cl_mem_flags flags, size_t channelCount,
|
||||||
|
cl_image_format *outFormat);
|
||||||
|
|
||||||
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
|
/* Helper to get any ol image format as long as it is 32-bits-per-channel */
|
||||||
extern int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat );
|
extern int get_32_bit_image_format(cl_context context,
|
||||||
|
cl_mem_object_type objType,
|
||||||
|
cl_mem_flags flags, size_t channelCount,
|
||||||
|
cl_image_format *outFormat);
|
||||||
|
|
||||||
int random_in_range( int minV, int maxV, MTdata d );
|
int random_in_range(int minV, int maxV, MTdata d);
|
||||||
int random_log_in_range( int minV, int maxV, MTdata d );
|
int random_log_in_range(int minV, int maxV, MTdata d);
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@@ -118,81 +132,110 @@ typedef struct
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
float p[4];
|
float p[4];
|
||||||
}FloatPixel;
|
} FloatPixel;
|
||||||
|
|
||||||
void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
|
void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
|
||||||
size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
|
size_t sizes[][3], size_t maxWidth, size_t maxHeight,
|
||||||
const cl_ulong maxIndividualAllocSize, const cl_ulong maxTotalAllocSize, cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSize=0);
|
size_t maxDepth, size_t maxArraySize,
|
||||||
extern size_t get_format_max_int( cl_image_format *format );
|
const cl_ulong maxIndividualAllocSize,
|
||||||
|
const cl_ulong maxTotalAllocSize,
|
||||||
|
cl_mem_object_type image_type, cl_image_format *format,
|
||||||
|
int usingMaxPixelSize = 0);
|
||||||
|
extern size_t get_format_max_int(cl_image_format *format);
|
||||||
|
|
||||||
extern cl_ulong get_image_size( image_descriptor const *imageInfo );
|
extern cl_ulong get_image_size(image_descriptor const *imageInfo);
|
||||||
extern cl_ulong get_image_size_mb( image_descriptor const *imageInfo );
|
extern cl_ulong get_image_size_mb(image_descriptor const *imageInfo);
|
||||||
|
|
||||||
extern char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &Owner, MTdata d );
|
extern char *generate_random_image_data(image_descriptor *imageInfo,
|
||||||
|
BufferOwningPtr<char> &Owner, MTdata d);
|
||||||
|
|
||||||
extern int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
|
extern int debug_find_vector_in_image(void *imagePtr,
|
||||||
void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod = 0 );
|
image_descriptor *imageInfo,
|
||||||
|
void *vectorToFind, size_t vectorSize,
|
||||||
|
int *outX, int *outY, int *outZ,
|
||||||
|
size_t lod = 0);
|
||||||
|
|
||||||
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
extern int debug_find_pixel_in_image(void *imagePtr,
|
||||||
unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
|
image_descriptor *imageInfo,
|
||||||
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
unsigned int *valuesToFind, int *outX,
|
||||||
int *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
|
int *outY, int *outZ, int lod = 0);
|
||||||
extern int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
|
extern int debug_find_pixel_in_image(void *imagePtr,
|
||||||
float *valuesToFind, int *outX, int *outY, int *outZ, int lod = 0 );
|
image_descriptor *imageInfo,
|
||||||
|
int *valuesToFind, int *outX, int *outY,
|
||||||
|
int *outZ, int lod = 0);
|
||||||
|
extern int debug_find_pixel_in_image(void *imagePtr,
|
||||||
|
image_descriptor *imageInfo,
|
||||||
|
float *valuesToFind, int *outX, int *outY,
|
||||||
|
int *outZ, int lod = 0);
|
||||||
|
|
||||||
extern void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
|
extern void copy_image_data(image_descriptor *srcImageInfo,
|
||||||
const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] );
|
image_descriptor *dstImageInfo, void *imageValues,
|
||||||
|
void *destImageValues, const size_t sourcePos[],
|
||||||
|
const size_t destPos[], const size_t regionSize[]);
|
||||||
|
|
||||||
int has_alpha(cl_image_format *format);
|
int has_alpha(cl_image_format *format);
|
||||||
|
|
||||||
extern bool is_sRGBA_order(cl_channel_order image_channel_order);
|
extern bool is_sRGBA_order(cl_channel_order image_channel_order);
|
||||||
|
|
||||||
inline float calculate_array_index( float coord, float extent );
|
inline float calculate_array_index(float coord, float extent);
|
||||||
|
|
||||||
cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth);
|
cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth);
|
||||||
cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo);
|
cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo);
|
||||||
size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod);
|
size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod);
|
||||||
|
|
||||||
template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
|
template <class T>
|
||||||
int x, int y, int z, T *outData, int lod )
|
void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
|
||||||
|
int y, int z, T *outData, int lod)
|
||||||
{
|
{
|
||||||
size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth, slice_pitch_lod = 0/*imageInfo->slicePitch*/ , row_pitch_lod = 0/*imageInfo->rowPitch*/;
|
size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
|
||||||
width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod):1;
|
depth_lod = imageInfo->depth,
|
||||||
|
slice_pitch_lod = 0 /*imageInfo->slicePitch*/,
|
||||||
|
row_pitch_lod = 0 /*imageInfo->rowPitch*/;
|
||||||
|
width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
|
||||||
|
|
||||||
if ( imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY && imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY
|
||||||
height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod):1;
|
&& imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
|
||||||
|
height_lod =
|
||||||
|
(imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
|
||||||
|
|
||||||
if(imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
|
if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D)
|
||||||
depth_lod = ( imageInfo->depth >> lod) ? ( imageInfo->depth >> lod) : 1;
|
depth_lod = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
|
||||||
row_pitch_lod = (imageInfo->num_mip_levels > 0)? (width_lod * get_pixel_size( imageInfo->format )): imageInfo->rowPitch;
|
row_pitch_lod = (imageInfo->num_mip_levels > 0)
|
||||||
slice_pitch_lod = (imageInfo->num_mip_levels > 0)? (row_pitch_lod * height_lod): imageInfo->slicePitch;
|
? (width_lod * get_pixel_size(imageInfo->format))
|
||||||
|
: imageInfo->rowPitch;
|
||||||
|
slice_pitch_lod = (imageInfo->num_mip_levels > 0)
|
||||||
|
? (row_pitch_lod * height_lod)
|
||||||
|
: imageInfo->slicePitch;
|
||||||
|
|
||||||
// correct depth_lod and height_lod for array image types in order to avoid
|
// correct depth_lod and height_lod for array image types in order to avoid
|
||||||
// return
|
// return
|
||||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1 && depth_lod == 1) {
|
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY && height_lod == 1
|
||||||
|
&& depth_lod == 1)
|
||||||
|
{
|
||||||
depth_lod = 0;
|
depth_lod = 0;
|
||||||
height_lod = 0;
|
height_lod = 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1) {
|
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY && depth_lod == 1)
|
||||||
|
{
|
||||||
depth_lod = 0;
|
depth_lod = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( x < 0 || x >= (int)width_lod
|
if (x < 0 || x >= (int)width_lod
|
||||||
|| ( height_lod != 0 && ( y < 0 || y >= (int)height_lod ) )
|
|| (height_lod != 0 && (y < 0 || y >= (int)height_lod))
|
||||||
|| ( depth_lod != 0 && ( z < 0 || z >= (int)depth_lod ) )
|
|| (depth_lod != 0 && (z < 0 || z >= (int)depth_lod))
|
||||||
|| ( imageInfo->arraySize != 0 && ( z < 0 || z >= (int)imageInfo->arraySize ) ) )
|
|| (imageInfo->arraySize != 0
|
||||||
|
&& (z < 0 || z >= (int)imageInfo->arraySize)))
|
||||||
{
|
{
|
||||||
// Border color
|
// Border color
|
||||||
if (imageInfo->format->image_channel_order == CL_DEPTH)
|
if (imageInfo->format->image_channel_order == CL_DEPTH)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = 1;
|
outData[0] = 1;
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
|
{
|
||||||
if (!has_alpha(imageInfo->format))
|
outData[0] = outData[1] = outData[2] = outData[3] = 0;
|
||||||
outData[3] = 1;
|
if (!has_alpha(imageInfo->format)) outData[3] = 1;
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -200,78 +243,70 @@ template <class T> void read_image_pixel( void *imageData, image_descriptor *ima
|
|||||||
cl_image_format *format = imageInfo->format;
|
cl_image_format *format = imageInfo->format;
|
||||||
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
T tempData[ 4 ];
|
T tempData[4];
|
||||||
|
|
||||||
// Advance to the right spot
|
// Advance to the right spot
|
||||||
char *ptr = (char *)imageData;
|
char *ptr = (char *)imageData;
|
||||||
size_t pixelSize = get_pixel_size( format );
|
size_t pixelSize = get_pixel_size(format);
|
||||||
|
|
||||||
ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
|
ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
|
||||||
|
|
||||||
// OpenCL only supports reading floats from certain formats
|
// OpenCL only supports reading floats from certain formats
|
||||||
switch( format->image_channel_data_type )
|
switch (format->image_channel_data_type)
|
||||||
{
|
|
||||||
case CL_SNORM_INT8:
|
|
||||||
{
|
{
|
||||||
|
case CL_SNORM_INT8: {
|
||||||
cl_char *dPtr = (cl_char *)ptr;
|
cl_char *dPtr = (cl_char *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_INT8:
|
case CL_UNORM_INT8: {
|
||||||
{
|
|
||||||
cl_uchar *dPtr = (cl_uchar *)ptr;
|
cl_uchar *dPtr = (cl_uchar *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_SIGNED_INT8:
|
case CL_SIGNED_INT8: {
|
||||||
{
|
|
||||||
cl_char *dPtr = (cl_char *)ptr;
|
cl_char *dPtr = (cl_char *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNSIGNED_INT8:
|
case CL_UNSIGNED_INT8: {
|
||||||
{
|
cl_uchar *dPtr = (cl_uchar *)ptr;
|
||||||
cl_uchar *dPtr = (cl_uchar*)ptr;
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
tempData[i] = (T)dPtr[i];
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_SNORM_INT16:
|
case CL_SNORM_INT16: {
|
||||||
{
|
|
||||||
cl_short *dPtr = (cl_short *)ptr;
|
cl_short *dPtr = (cl_short *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_INT16:
|
case CL_UNORM_INT16: {
|
||||||
{
|
|
||||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_SIGNED_INT16:
|
case CL_SIGNED_INT16: {
|
||||||
{
|
|
||||||
cl_short *dPtr = (cl_short *)ptr;
|
cl_short *dPtr = (cl_short *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNSIGNED_INT16:
|
case CL_UNSIGNED_INT16: {
|
||||||
{
|
|
||||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -282,210 +317,202 @@ template <class T> void read_image_pixel( void *imageData, image_descriptor *ima
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_SIGNED_INT32:
|
case CL_SIGNED_INT32: {
|
||||||
{
|
|
||||||
cl_int *dPtr = (cl_int *)ptr;
|
cl_int *dPtr = (cl_int *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNSIGNED_INT32:
|
case CL_UNSIGNED_INT32: {
|
||||||
{
|
|
||||||
cl_uint *dPtr = (cl_uint *)ptr;
|
cl_uint *dPtr = (cl_uint *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_SHORT_565:
|
case CL_UNORM_SHORT_565: {
|
||||||
{
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||||
cl_ushort *dPtr = (cl_ushort*)ptr;
|
tempData[0] = (T)(dPtr[0] >> 11);
|
||||||
tempData[ 0 ] = (T)( dPtr[ 0 ] >> 11 );
|
tempData[1] = (T)((dPtr[0] >> 5) & 63);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
|
tempData[2] = (T)(dPtr[0] & 31);
|
||||||
tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef OBSOLETE_FORMAT
|
#ifdef OBSOLETE_FORMAT
|
||||||
case CL_UNORM_SHORT_565_REV:
|
case CL_UNORM_SHORT_565_REV: {
|
||||||
{
|
|
||||||
unsigned short *dPtr = (unsigned short *)ptr;
|
unsigned short *dPtr = (unsigned short *)ptr;
|
||||||
tempData[ 2 ] = (T)( dPtr[ 0 ] >> 11 );
|
tempData[2] = (T)(dPtr[0] >> 11);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 63 );
|
tempData[1] = (T)((dPtr[0] >> 5) & 63);
|
||||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
|
tempData[0] = (T)(dPtr[0] & 31);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_SHORT_555_REV:
|
case CL_UNORM_SHORT_555_REV: {
|
||||||
{
|
|
||||||
unsigned short *dPtr = (unsigned short *)ptr;
|
unsigned short *dPtr = (unsigned short *)ptr;
|
||||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
|
tempData[2] = (T)((dPtr[0] >> 10) & 31);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
|
tempData[1] = (T)((dPtr[0] >> 5) & 31);
|
||||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 31 );
|
tempData[0] = (T)(dPtr[0] & 31);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_INT_8888:
|
case CL_UNORM_INT_8888: {
|
||||||
{
|
|
||||||
unsigned int *dPtr = (unsigned int *)ptr;
|
unsigned int *dPtr = (unsigned int *)ptr;
|
||||||
tempData[ 3 ] = (T)( dPtr[ 0 ] >> 24 );
|
tempData[3] = (T)(dPtr[0] >> 24);
|
||||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
|
tempData[2] = (T)((dPtr[0] >> 16) & 0xff);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
|
tempData[1] = (T)((dPtr[0] >> 8) & 0xff);
|
||||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 0xff );
|
tempData[0] = (T)(dPtr[0] & 0xff);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case CL_UNORM_INT_8888_REV:
|
case CL_UNORM_INT_8888_REV: {
|
||||||
{
|
|
||||||
unsigned int *dPtr = (unsigned int *)ptr;
|
unsigned int *dPtr = (unsigned int *)ptr;
|
||||||
tempData[ 0 ] = (T)( dPtr[ 0 ] >> 24 );
|
tempData[0] = (T)(dPtr[0] >> 24);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 16 ) & 0xff );
|
tempData[1] = (T)((dPtr[0] >> 16) & 0xff);
|
||||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 8 ) & 0xff );
|
tempData[2] = (T)((dPtr[0] >> 8) & 0xff);
|
||||||
tempData[ 3 ] = (T)( dPtr[ 0 ] & 0xff );
|
tempData[3] = (T)(dPtr[0] & 0xff);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_INT_101010_REV:
|
case CL_UNORM_INT_101010_REV: {
|
||||||
{
|
|
||||||
unsigned int *dPtr = (unsigned int *)ptr;
|
unsigned int *dPtr = (unsigned int *)ptr;
|
||||||
tempData[ 2 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
|
tempData[2] = (T)((dPtr[0] >> 20) & 0x3ff);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
|
tempData[1] = (T)((dPtr[0] >> 10) & 0x3ff);
|
||||||
tempData[ 0 ] = (T)( dPtr[ 0 ] & 0x3ff );
|
tempData[0] = (T)(dPtr[0] & 0x3ff);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
case CL_UNORM_SHORT_555:
|
case CL_UNORM_SHORT_555: {
|
||||||
{
|
|
||||||
cl_ushort *dPtr = (cl_ushort *)ptr;
|
cl_ushort *dPtr = (cl_ushort *)ptr;
|
||||||
tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 31 );
|
tempData[0] = (T)((dPtr[0] >> 10) & 31);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 5 ) & 31 );
|
tempData[1] = (T)((dPtr[0] >> 5) & 31);
|
||||||
tempData[ 2 ] = (T)( dPtr[ 0 ] & 31 );
|
tempData[2] = (T)(dPtr[0] & 31);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_UNORM_INT_101010:
|
case CL_UNORM_INT_101010: {
|
||||||
{
|
|
||||||
cl_uint *dPtr = (cl_uint *)ptr;
|
cl_uint *dPtr = (cl_uint *)ptr;
|
||||||
tempData[ 0 ] = (T)( ( dPtr[ 0 ] >> 20 ) & 0x3ff );
|
tempData[0] = (T)((dPtr[0] >> 20) & 0x3ff);
|
||||||
tempData[ 1 ] = (T)( ( dPtr[ 0 ] >> 10 ) & 0x3ff );
|
tempData[1] = (T)((dPtr[0] >> 10) & 0x3ff);
|
||||||
tempData[ 2 ] = (T)( dPtr[ 0 ] & 0x3ff );
|
tempData[2] = (T)(dPtr[0] & 0x3ff);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case CL_FLOAT:
|
case CL_FLOAT: {
|
||||||
{
|
|
||||||
cl_float *dPtr = (cl_float *)ptr;
|
cl_float *dPtr = (cl_float *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ];
|
tempData[i] = (T)dPtr[i];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#ifdef CL_SFIXED14_APPLE
|
#ifdef CL_SFIXED14_APPLE
|
||||||
case CL_SFIXED14_APPLE:
|
case CL_SFIXED14_APPLE: {
|
||||||
{
|
|
||||||
cl_float *dPtr = (cl_float *)ptr;
|
cl_float *dPtr = (cl_float *)ptr;
|
||||||
for( i = 0; i < get_format_channel_count( format ); i++ )
|
for (i = 0; i < get_format_channel_count(format); i++)
|
||||||
tempData[ i ] = (T)dPtr[ i ] + 0x4000;
|
tempData[i] = (T)dPtr[i] + 0x4000;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
|
outData[0] = outData[1] = outData[2] = 0;
|
||||||
outData[ 3 ] = 1;
|
outData[3] = 1;
|
||||||
|
|
||||||
if( format->image_channel_order == CL_A )
|
if (format->image_channel_order == CL_A)
|
||||||
{
|
{
|
||||||
outData[ 3 ] = tempData[ 0 ];
|
outData[3] = tempData[0];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_R )
|
else if (format->image_channel_order == CL_R)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_Rx )
|
else if (format->image_channel_order == CL_Rx)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_RA )
|
else if (format->image_channel_order == CL_RA)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
outData[ 3 ] = tempData[ 1 ];
|
outData[3] = tempData[1];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_RG )
|
else if (format->image_channel_order == CL_RG)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_RGx )
|
else if (format->image_channel_order == CL_RGx)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
}
|
}
|
||||||
else if(( format->image_channel_order == CL_RGB ) || ( format->image_channel_order == CL_sRGB ))
|
else if ((format->image_channel_order == CL_RGB)
|
||||||
|
|| (format->image_channel_order == CL_sRGB))
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
outData[ 2 ] = tempData[ 2 ];
|
outData[2] = tempData[2];
|
||||||
}
|
}
|
||||||
else if(( format->image_channel_order == CL_RGBx ) || ( format->image_channel_order == CL_sRGBx ))
|
else if ((format->image_channel_order == CL_RGBx)
|
||||||
|
|| (format->image_channel_order == CL_sRGBx))
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
outData[ 2 ] = tempData[ 2 ];
|
outData[2] = tempData[2];
|
||||||
outData[ 3 ] = 0;
|
outData[3] = 0;
|
||||||
}
|
}
|
||||||
else if(( format->image_channel_order == CL_RGBA ) || ( format->image_channel_order == CL_sRGBA ))
|
else if ((format->image_channel_order == CL_RGBA)
|
||||||
|
|| (format->image_channel_order == CL_sRGBA))
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
outData[ 2 ] = tempData[ 2 ];
|
outData[2] = tempData[2];
|
||||||
outData[ 3 ] = tempData[ 3 ];
|
outData[3] = tempData[3];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_ARGB )
|
else if (format->image_channel_order == CL_ARGB)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 1 ];
|
outData[0] = tempData[1];
|
||||||
outData[ 1 ] = tempData[ 2 ];
|
outData[1] = tempData[2];
|
||||||
outData[ 2 ] = tempData[ 3 ];
|
outData[2] = tempData[3];
|
||||||
outData[ 3 ] = tempData[ 0 ];
|
outData[3] = tempData[0];
|
||||||
}
|
}
|
||||||
else if(( format->image_channel_order == CL_BGRA ) || ( format->image_channel_order == CL_sBGRA ))
|
else if ((format->image_channel_order == CL_BGRA)
|
||||||
|
|| (format->image_channel_order == CL_sBGRA))
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 2 ];
|
outData[0] = tempData[2];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
outData[ 2 ] = tempData[ 0 ];
|
outData[2] = tempData[0];
|
||||||
outData[ 3 ] = tempData[ 3 ];
|
outData[3] = tempData[3];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_INTENSITY )
|
else if (format->image_channel_order == CL_INTENSITY)
|
||||||
{
|
{
|
||||||
outData[ 1 ] = tempData[ 0 ];
|
outData[1] = tempData[0];
|
||||||
outData[ 2 ] = tempData[ 0 ];
|
outData[2] = tempData[0];
|
||||||
outData[ 3 ] = tempData[ 0 ];
|
outData[3] = tempData[0];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_LUMINANCE )
|
else if (format->image_channel_order == CL_LUMINANCE)
|
||||||
{
|
{
|
||||||
outData[ 1 ] = tempData[ 0 ];
|
outData[1] = tempData[0];
|
||||||
outData[ 2 ] = tempData[ 0 ];
|
outData[2] = tempData[0];
|
||||||
}
|
}
|
||||||
else if( format->image_channel_order == CL_DEPTH )
|
else if (format->image_channel_order == CL_DEPTH)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 0 ];
|
outData[0] = tempData[0];
|
||||||
}
|
}
|
||||||
#ifdef CL_1RGB_APPLE
|
#ifdef CL_1RGB_APPLE
|
||||||
else if( format->image_channel_order == CL_1RGB_APPLE )
|
else if (format->image_channel_order == CL_1RGB_APPLE)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 1 ];
|
outData[0] = tempData[1];
|
||||||
outData[ 1 ] = tempData[ 2 ];
|
outData[1] = tempData[2];
|
||||||
outData[ 2 ] = tempData[ 3 ];
|
outData[2] = tempData[3];
|
||||||
outData[ 3 ] = 0xff;
|
outData[3] = 0xff;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#ifdef CL_BGR1_APPLE
|
#ifdef CL_BGR1_APPLE
|
||||||
else if( format->image_channel_order == CL_BGR1_APPLE )
|
else if (format->image_channel_order == CL_BGR1_APPLE)
|
||||||
{
|
{
|
||||||
outData[ 0 ] = tempData[ 2 ];
|
outData[0] = tempData[2];
|
||||||
outData[ 1 ] = tempData[ 1 ];
|
outData[1] = tempData[1];
|
||||||
outData[ 2 ] = tempData[ 0 ];
|
outData[2] = tempData[0];
|
||||||
outData[ 3 ] = 0xff;
|
outData[3] = 0xff;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
@@ -495,27 +522,32 @@ template <class T> void read_image_pixel( void *imageData, image_descriptor *ima
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> void read_image_pixel( void *imageData, image_descriptor *imageInfo,
|
template <class T>
|
||||||
int x, int y, int z, T *outData )
|
void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
|
||||||
|
int y, int z, T *outData)
|
||||||
{
|
{
|
||||||
read_image_pixel<T>( imageData, imageInfo, x, y, z, outData, 0);
|
read_image_pixel<T>(imageData, imageInfo, x, y, z, outData, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stupid template rules
|
// Stupid template rules
|
||||||
bool get_integer_coords( float x, float y, float z,
|
bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
|
||||||
|
size_t depth, image_sampler_data *imageSampler,
|
||||||
|
image_descriptor *imageInfo, int &outX, int &outY,
|
||||||
|
int &outZ);
|
||||||
|
bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
|
||||||
|
float yAddressOffset, float zAddressOffset,
|
||||||
size_t width, size_t height, size_t depth,
|
size_t width, size_t height, size_t depth,
|
||||||
image_sampler_data *imageSampler, image_descriptor *imageInfo,
|
image_sampler_data *imageSampler,
|
||||||
int &outX, int &outY, int &outZ );
|
image_descriptor *imageInfo, int &outX,
|
||||||
bool get_integer_coords_offset( float x, float y, float z,
|
int &outY, int &outZ);
|
||||||
float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
|
||||||
size_t width, size_t height, size_t depth,
|
|
||||||
image_sampler_data *imageSampler, image_descriptor *imageInfo,
|
|
||||||
int &outX, int &outY, int &outZ );
|
|
||||||
|
|
||||||
|
|
||||||
template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
|
template <class T>
|
||||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
void sample_image_pixel_offset(void *imageData, image_descriptor *imageInfo,
|
||||||
image_sampler_data *imageSampler, T *outData, int lod )
|
float x, float y, float z, float xAddressOffset,
|
||||||
|
float yAddressOffset, float zAddressOffset,
|
||||||
|
image_sampler_data *imageSampler, T *outData,
|
||||||
|
int lod)
|
||||||
{
|
{
|
||||||
int iX = 0, iY = 0, iZ = 0;
|
int iX = 0, iY = 0, iZ = 0;
|
||||||
|
|
||||||
@@ -523,7 +555,8 @@ template <class T> void sample_image_pixel_offset( void *imageData, image_descri
|
|||||||
float max_h;
|
float max_h;
|
||||||
float max_d;
|
float max_d;
|
||||||
|
|
||||||
switch (imageInfo->type) {
|
switch (imageInfo->type)
|
||||||
|
{
|
||||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||||
max_h = imageInfo->arraySize;
|
max_h = imageInfo->arraySize;
|
||||||
max_d = 0;
|
max_d = 0;
|
||||||
@@ -538,103 +571,136 @@ template <class T> void sample_image_pixel_offset( void *imageData, image_descri
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
|
if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
|
||||||
|
{
|
||||||
|
switch (imageInfo->type)
|
||||||
{
|
{
|
||||||
switch (imageInfo->type) {
|
|
||||||
case CL_MEM_OBJECT_IMAGE3D:
|
case CL_MEM_OBJECT_IMAGE3D:
|
||||||
max_d = (float)((imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1);
|
max_d = (float)((imageInfo->depth >> lod)
|
||||||
|
? (imageInfo->depth >> lod)
|
||||||
|
: 1);
|
||||||
case CL_MEM_OBJECT_IMAGE2D:
|
case CL_MEM_OBJECT_IMAGE2D:
|
||||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||||
max_h = (float)((imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1);
|
max_h = (float)((imageInfo->height >> lod)
|
||||||
|
? (imageInfo->height >> lod)
|
||||||
|
: 1);
|
||||||
break;
|
break;
|
||||||
default:
|
default:;
|
||||||
;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
max_w = (float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
|
max_w =
|
||||||
|
(float)((imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1);
|
||||||
}
|
}
|
||||||
get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, max_w, max_h, max_d, imageSampler, imageInfo, iX, iY, iZ );
|
get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
|
||||||
|
zAddressOffset, max_w, max_h, max_d, imageSampler,
|
||||||
|
imageInfo, iX, iY, iZ);
|
||||||
|
|
||||||
read_image_pixel<T>( imageData, imageInfo, iX, iY, iZ, outData, lod );
|
read_image_pixel<T>(imageData, imageInfo, iX, iY, iZ, outData, lod);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> void sample_image_pixel_offset( void *imageData, image_descriptor *imageInfo,
|
template <class T>
|
||||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
void sample_image_pixel_offset(void *imageData, image_descriptor *imageInfo,
|
||||||
|
float x, float y, float z, float xAddressOffset,
|
||||||
|
float yAddressOffset, float zAddressOffset,
|
||||||
image_sampler_data *imageSampler, T *outData)
|
image_sampler_data *imageSampler, T *outData)
|
||||||
{
|
{
|
||||||
sample_image_pixel_offset<T>( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
|
sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, xAddressOffset,
|
||||||
imageSampler, outData, 0);
|
yAddressOffset, zAddressOffset, imageSampler,
|
||||||
|
outData, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class T> void sample_image_pixel( void *imageData, image_descriptor *imageInfo,
|
template <class T>
|
||||||
float x, float y, float z, image_sampler_data *imageSampler, T *outData )
|
void sample_image_pixel(void *imageData, image_descriptor *imageInfo, float x,
|
||||||
|
float y, float z, image_sampler_data *imageSampler,
|
||||||
|
T *outData)
|
||||||
{
|
{
|
||||||
return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData);
|
return sample_image_pixel_offset<T>(imageData, imageInfo, x, y, z, 0.0f,
|
||||||
|
0.0f, 0.0f, imageSampler, outData);
|
||||||
}
|
}
|
||||||
|
|
||||||
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|
FloatPixel
|
||||||
float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
|
sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
|
||||||
|
float y, float z, image_sampler_data *imageSampler,
|
||||||
|
float *outData, int verbose, int *containsDenorms);
|
||||||
|
|
||||||
FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
|
FloatPixel sample_image_pixel_float(void *imageData,
|
||||||
float x, float y, float z, image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
|
image_descriptor *imageInfo, float x,
|
||||||
|
float y, float z,
|
||||||
|
image_sampler_data *imageSampler,
|
||||||
|
float *outData, int verbose,
|
||||||
|
int *containsDenorms, int lod);
|
||||||
|
|
||||||
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
|
FloatPixel sample_image_pixel_float_offset(
|
||||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
void *imageData, image_descriptor *imageInfo, float x, float y, float z,
|
||||||
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms );
|
float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||||
FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
|
image_sampler_data *imageSampler, float *outData, int verbose,
|
||||||
float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
int *containsDenorms);
|
||||||
image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms, int lod );
|
FloatPixel sample_image_pixel_float_offset(
|
||||||
|
void *imageData, image_descriptor *imageInfo, float x, float y, float z,
|
||||||
|
float xAddressOffset, float yAddressOffset, float zAddressOffset,
|
||||||
|
image_sampler_data *imageSampler, float *outData, int verbose,
|
||||||
|
int *containsDenorms, int lod);
|
||||||
|
|
||||||
|
|
||||||
extern void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData );
|
extern void pack_image_pixel(unsigned int *srcVector,
|
||||||
extern void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData );
|
const cl_image_format *imageFormat, void *outData);
|
||||||
extern void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData );
|
extern void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
|
||||||
extern void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors );
|
void *outData);
|
||||||
|
extern void pack_image_pixel(float *srcVector,
|
||||||
|
const cl_image_format *imageFormat, void *outData);
|
||||||
|
extern void pack_image_pixel_error(const float *srcVector,
|
||||||
|
const cl_image_format *imageFormat,
|
||||||
|
const void *results, float *errors);
|
||||||
|
|
||||||
extern char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer = false );
|
extern char *create_random_image_data(ExplicitType dataType,
|
||||||
|
image_descriptor *imageInfo,
|
||||||
|
BufferOwningPtr<char> &P, MTdata d,
|
||||||
|
bool image2DFromBuffer = false);
|
||||||
|
|
||||||
// deprecated
|
// deprecated
|
||||||
//extern bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue );
|
// extern bool clamp_image_coord( image_sampler_data *imageSampler, float value,
|
||||||
|
// size_t max, int &outValue );
|
||||||
|
|
||||||
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
|
extern void get_sampler_kernel_code(image_sampler_data *imageSampler,
|
||||||
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
|
char *outLine);
|
||||||
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
|
extern float get_max_absolute_error(cl_image_format *format,
|
||||||
|
image_sampler_data *sampler);
|
||||||
|
extern float get_max_relative_error(cl_image_format *format,
|
||||||
|
image_sampler_data *sampler, int is3D,
|
||||||
|
int isLinearFilter);
|
||||||
|
|
||||||
|
|
||||||
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
|
#define errMax(_x, _y) ((_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y))
|
||||||
|
|
||||||
static inline cl_uint abs_diff_uint( cl_uint x, cl_uint y )
|
static inline cl_uint abs_diff_uint(cl_uint x, cl_uint y)
|
||||||
{
|
{
|
||||||
return y > x ? y - x : x - y;
|
return y > x ? y - x : x - y;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline cl_uint abs_diff_int( cl_int x, cl_int y )
|
static inline cl_uint abs_diff_int(cl_int x, cl_int y)
|
||||||
{
|
{
|
||||||
return (cl_uint) (y > x ? y - x : x - y);
|
return (cl_uint)(y > x ? y - x : x - y);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline cl_float relative_error( float test, float expected )
|
static inline cl_float relative_error(float test, float expected)
|
||||||
{
|
{
|
||||||
// 0-0/0 is 0 in this case, not NaN
|
// 0-0/0 is 0 in this case, not NaN
|
||||||
if( test == 0.0f && expected == 0.0f )
|
if (test == 0.0f && expected == 0.0f) return 0.0f;
|
||||||
return 0.0f;
|
|
||||||
|
|
||||||
return (test - expected) / expected;
|
return (test - expected) / expected;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern float random_float(float low, float high);
|
extern float random_float(float low, float high);
|
||||||
|
|
||||||
class CoordWalker
|
class CoordWalker {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
CoordWalker( void * coords, bool useFloats, size_t vecSize );
|
CoordWalker(void *coords, bool useFloats, size_t vecSize);
|
||||||
~CoordWalker();
|
~CoordWalker();
|
||||||
|
|
||||||
cl_float Get( size_t idx, size_t el );
|
cl_float Get(size_t idx, size_t el);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
cl_float * mFloatCoords;
|
cl_float *mFloatCoords;
|
||||||
cl_int * mIntCoords;
|
cl_int *mIntCoords;
|
||||||
size_t mVecSize;
|
size_t mVecSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -25,16 +25,16 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#if defined (__MINGW32__)
|
#if defined(__MINGW32__)
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/opencl.h>
|
#include <CL/opencl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "deviceInfo.h"
|
#include "deviceInfo.h"
|
||||||
@@ -43,7 +43,8 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The below code is intended to be used at the top of kernels that appear inline in files to set line and file info for the kernel:
|
* The below code is intended to be used at the top of kernels that appear
|
||||||
|
* inline in files to set line and file info for the kernel:
|
||||||
*
|
*
|
||||||
* const char *source = {
|
* const char *source = {
|
||||||
* INIT_OPENCL_DEBUG_INFO
|
* INIT_OPENCL_DEBUG_INFO
|
||||||
@@ -53,104 +54,115 @@
|
|||||||
* "}\n"
|
* "}\n"
|
||||||
* };
|
* };
|
||||||
*/
|
*/
|
||||||
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO( __LINE__, __FILE__ )
|
#define INIT_OPENCL_DEBUG_INFO SET_OPENCL_LINE_INFO(__LINE__, __FILE__)
|
||||||
#define SET_OPENCL_LINE_INFO(_line, _file) "#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
|
#define SET_OPENCL_LINE_INFO(_line, _file) \
|
||||||
|
"#line " STRINGIFY(_line) " " STRINGIFY(_file) "\n"
|
||||||
#ifndef STRINGIFY_VALUE
|
#ifndef STRINGIFY_VALUE
|
||||||
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
|
#define STRINGIFY_VALUE(_x) STRINGIFY(_x)
|
||||||
#endif
|
#endif
|
||||||
#ifndef STRINGIFY
|
#ifndef STRINGIFY
|
||||||
#define STRINGIFY(_x) #_x
|
#define STRINGIFY(_x) #_x
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const int MAX_LEN_FOR_KERNEL_LIST = 20;
|
const int MAX_LEN_FOR_KERNEL_LIST = 20;
|
||||||
|
|
||||||
/* Helper that creates a single program and kernel from a single-kernel program source */
|
/* Helper that creates a single program and kernel from a single-kernel program
|
||||||
extern int create_single_kernel_helper(cl_context context,
|
* source */
|
||||||
cl_program *outProgram,
|
extern int
|
||||||
cl_kernel *outKernel,
|
create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
||||||
unsigned int numKernelLines,
|
cl_kernel *outKernel, unsigned int numKernelLines,
|
||||||
const char **kernelProgram,
|
const char **kernelProgram, const char *kernelName,
|
||||||
const char *kernelName,
|
|
||||||
const char *buildOptions = NULL,
|
const char *buildOptions = NULL,
|
||||||
const bool openclCXX = false);
|
const bool openclCXX = false);
|
||||||
|
|
||||||
extern int create_single_kernel_helper_with_build_options(cl_context context,
|
extern int create_single_kernel_helper_with_build_options(
|
||||||
cl_program *outProgram,
|
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
||||||
cl_kernel *outKernel,
|
unsigned int numKernelLines, const char **kernelProgram,
|
||||||
unsigned int numKernelLines,
|
const char *kernelName, const char *buildOptions,
|
||||||
const char **kernelProgram,
|
|
||||||
const char *kernelName,
|
|
||||||
const char *buildOptions,
|
|
||||||
const bool openclCXX = false);
|
const bool openclCXX = false);
|
||||||
|
|
||||||
extern int create_single_kernel_helper_create_program(cl_context context,
|
extern int create_single_kernel_helper_create_program(
|
||||||
cl_program *outProgram,
|
cl_context context, cl_program *outProgram, unsigned int numKernelLines,
|
||||||
unsigned int numKernelLines,
|
const char **kernelProgram, const char *buildOptions = NULL,
|
||||||
const char **kernelProgram,
|
|
||||||
const char *buildOptions = NULL,
|
|
||||||
const bool openclCXX = false);
|
const bool openclCXX = false);
|
||||||
|
|
||||||
extern int create_single_kernel_helper_create_program_for_device(cl_context context,
|
extern int create_single_kernel_helper_create_program_for_device(
|
||||||
cl_device_id device,
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
||||||
cl_program *outProgram,
|
unsigned int numKernelLines, const char **kernelProgram,
|
||||||
unsigned int numKernelLines,
|
const char *buildOptions = NULL, const bool openclCXX = false);
|
||||||
const char **kernelProgram,
|
|
||||||
const char *buildOptions = NULL,
|
|
||||||
const bool openclCXX = false);
|
|
||||||
|
|
||||||
/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++ program. */
|
/* Creates OpenCL C++ program. This one must be used for creating OpenCL C++
|
||||||
extern int create_openclcpp_program(cl_context context,
|
* program. */
|
||||||
cl_program *outProgram,
|
extern int create_openclcpp_program(cl_context context, cl_program *outProgram,
|
||||||
unsigned int numKernelLines,
|
unsigned int numKernelLines,
|
||||||
const char **kernelProgram,
|
const char **kernelProgram,
|
||||||
const char *buildOptions = NULL);
|
const char *buildOptions = NULL);
|
||||||
|
|
||||||
/* Builds program (outProgram) and creates one kernel */
|
/* Builds program (outProgram) and creates one kernel */
|
||||||
int build_program_create_kernel_helper(cl_context context,
|
int build_program_create_kernel_helper(
|
||||||
cl_program *outProgram,
|
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
||||||
cl_kernel *outKernel,
|
unsigned int numKernelLines, const char **kernelProgram,
|
||||||
unsigned int numKernelLines,
|
const char *kernelName, const char *buildOptions = NULL);
|
||||||
const char **kernelProgram,
|
|
||||||
const char *kernelName,
|
|
||||||
const char *buildOptions = NULL);
|
|
||||||
|
|
||||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
/* Helper to obtain the biggest fit work group size for all the devices in a
|
||||||
extern int get_max_common_work_group_size( cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outSize );
|
* given group and for the given global thread size */
|
||||||
|
extern int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
|
||||||
|
size_t globalThreadSize,
|
||||||
|
size_t *outSize);
|
||||||
|
|
||||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
/* Helper to obtain the biggest fit work group size for all the devices in a
|
||||||
extern int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
* given group and for the given global thread size */
|
||||||
|
extern int get_max_common_2D_work_group_size(cl_context context,
|
||||||
|
cl_kernel kernel,
|
||||||
|
size_t *globalThreadSize,
|
||||||
|
size_t *outSizes);
|
||||||
|
|
||||||
/* Helper to obtain the biggest fit work group size for all the devices in a given group and for the given global thread size */
|
/* Helper to obtain the biggest fit work group size for all the devices in a
|
||||||
extern int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel, size_t *globalThreadSize, size_t *outSizes );
|
* given group and for the given global thread size */
|
||||||
|
extern int get_max_common_3D_work_group_size(cl_context context,
|
||||||
|
cl_kernel kernel,
|
||||||
|
size_t *globalThreadSize,
|
||||||
|
size_t *outSizes);
|
||||||
|
|
||||||
/* Helper to obtain the biggest allowed work group size for all the devices in a given group */
|
/* Helper to obtain the biggest allowed work group size for all the devices in a
|
||||||
extern int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outSize, size_t *outLimits );
|
* given group */
|
||||||
|
extern int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
|
||||||
|
size_t *outSize, size_t *outLimits);
|
||||||
|
|
||||||
/* Helper to obtain the biggest allowed 1D work group size on a given device */
|
/* Helper to obtain the biggest allowed 1D work group size on a given device */
|
||||||
extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize );
|
extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
|
||||||
|
cl_kernel kernel,
|
||||||
|
size_t *outSize);
|
||||||
|
|
||||||
/* Helper to determine if a device supports an image format */
|
/* Helper to determine if a device supports an image format */
|
||||||
extern int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt );
|
extern int is_image_format_supported(cl_context context, cl_mem_flags flags,
|
||||||
|
cl_mem_object_type image_type,
|
||||||
|
const cl_image_format *fmt);
|
||||||
|
|
||||||
/* Helper to get pixel size for a pixel format */
|
/* Helper to get pixel size for a pixel format */
|
||||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
size_t get_pixel_bytes(const cl_image_format *fmt);
|
||||||
|
|
||||||
/* Verify the given device supports images. */
|
/* Verify the given device supports images. */
|
||||||
extern test_status verifyImageSupport( cl_device_id device );
|
extern test_status verifyImageSupport(cl_device_id device);
|
||||||
|
|
||||||
/* Checks that the given device supports images. Same as verify, but doesn't print an error */
|
/* Checks that the given device supports images. Same as verify, but doesn't
|
||||||
extern int checkForImageSupport( cl_device_id device );
|
* print an error */
|
||||||
extern int checkFor3DImageSupport( cl_device_id device );
|
extern int checkForImageSupport(cl_device_id device);
|
||||||
|
extern int checkFor3DImageSupport(cl_device_id device);
|
||||||
extern int checkForReadWriteImageSupport(cl_device_id device);
|
extern int checkForReadWriteImageSupport(cl_device_id device);
|
||||||
|
|
||||||
/* Checks that a given queue property is supported on the specified device. Returns 1 if supported, 0 if not or an error. */
|
/* Checks that a given queue property is supported on the specified device.
|
||||||
extern int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop );
|
* Returns 1 if supported, 0 if not or an error. */
|
||||||
|
extern int checkDeviceForQueueSupport(cl_device_id device,
|
||||||
|
cl_command_queue_properties prop);
|
||||||
|
|
||||||
/* Helper to obtain the min alignment for a given context, i.e the max of all min alignments for devices attached to the context*/
|
/* Helper to obtain the min alignment for a given context, i.e the max of all
|
||||||
|
* min alignments for devices attached to the context*/
|
||||||
size_t get_min_alignment(cl_context context);
|
size_t get_min_alignment(cl_context context);
|
||||||
|
|
||||||
/* Helper to obtain the default rounding mode for single precision computation. (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
|
/* Helper to obtain the default rounding mode for single precision computation.
|
||||||
cl_device_fp_config get_default_rounding_mode( cl_device_id device );
|
* (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
|
||||||
|
cl_device_fp_config get_default_rounding_mode(cl_device_id device);
|
||||||
|
|
||||||
#define PASSIVE_REQUIRE_IMAGE_SUPPORT(device) \
|
#define PASSIVE_REQUIRE_IMAGE_SUPPORT(device) \
|
||||||
if (checkForImageSupport(device)) \
|
if (checkForImageSupport(device)) \
|
||||||
@@ -176,8 +188,9 @@ cl_device_fp_config get_default_rounding_mode( cl_device_id device );
|
|||||||
return TEST_SKIPPED_ITSELF; \
|
return TEST_SKIPPED_ITSELF; \
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Prints out the standard device header for all tests given the device to print for */
|
/* Prints out the standard device header for all tests given the device to print
|
||||||
extern int printDeviceHeader( cl_device_id device );
|
* for */
|
||||||
|
extern int printDeviceHeader(cl_device_id device);
|
||||||
|
|
||||||
// Execute the CL_DEVICE_OPENCL_C_VERSION query and return the OpenCL C version
|
// Execute the CL_DEVICE_OPENCL_C_VERSION query and return the OpenCL C version
|
||||||
// is supported by the device.
|
// is supported by the device.
|
||||||
|
|||||||
@@ -19,33 +19,36 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
//This function is unavailable on various mingw compilers,
|
// This function is unavailable on various mingw compilers,
|
||||||
//especially 64 bit so implementing it here
|
// especially 64 bit so implementing it here
|
||||||
const char *basename_dot=".";
|
const char *basename_dot = ".";
|
||||||
char*
|
char *basename(char *path)
|
||||||
basename(char *path)
|
|
||||||
{
|
{
|
||||||
char *p = path, *b = NULL;
|
char *p = path, *b = NULL;
|
||||||
int len = strlen(path);
|
int len = strlen(path);
|
||||||
|
|
||||||
if (path == NULL) {
|
if (path == NULL)
|
||||||
return (char*)basename_dot;
|
{
|
||||||
|
return (char *)basename_dot;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not absolute path on windows
|
// Not absolute path on windows
|
||||||
if (path[1] != ':') {
|
if (path[1] != ':')
|
||||||
|
{
|
||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim trailing path seperators
|
// Trim trailing path seperators
|
||||||
if (path[len - 1] == '\\' ||
|
if (path[len - 1] == '\\' || path[len - 1] == '/')
|
||||||
path[len - 1] == '/' ) {
|
{
|
||||||
len--;
|
len--;
|
||||||
path[len] = '\0';
|
path[len] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
while (len) {
|
while (len)
|
||||||
while((*p != '\\' || *p != '/') && len) {
|
{
|
||||||
|
while ((*p != '\\' || *p != '/') && len)
|
||||||
|
{
|
||||||
p++;
|
p++;
|
||||||
len--;
|
len--;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ char *basename(char *path);
|
|||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
|
||||||
#if defined(__MINGW64__)
|
#if defined(__MINGW64__)
|
||||||
//mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
|
// mingw-w64 doesnot have __mingw_aligned_malloc, instead it has _aligned_malloc
|
||||||
#define __mingw_aligned_malloc _aligned_malloc
|
#define __mingw_aligned_malloc _aligned_malloc
|
||||||
#define __mingw_aligned_free _aligned_free
|
#define __mingw_aligned_free _aligned_free
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
//
|
//
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
|
|
||||||
#if defined ( _MSC_VER )
|
#if defined(_MSC_VER)
|
||||||
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
|
||||||
#if _MSC_VER < 1900 && ! defined( __INTEL_COMPILER )
|
#if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
@@ -32,9 +32,12 @@
|
|||||||
//
|
//
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
float copysignf( float x, float y )
|
float copysignf(float x, float y)
|
||||||
{
|
{
|
||||||
union{ cl_uint u; float f; }ux, uy;
|
union {
|
||||||
|
cl_uint u;
|
||||||
|
float f;
|
||||||
|
} ux, uy;
|
||||||
|
|
||||||
ux.f = x;
|
ux.f = x;
|
||||||
uy.f = y;
|
uy.f = y;
|
||||||
@@ -44,9 +47,12 @@ float copysignf( float x, float y )
|
|||||||
return ux.f;
|
return ux.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
double copysign( double x, double y )
|
double copysign(double x, double y)
|
||||||
{
|
{
|
||||||
union{ cl_ulong u; double f; }ux, uy;
|
union {
|
||||||
|
cl_ulong u;
|
||||||
|
double f;
|
||||||
|
} ux, uy;
|
||||||
|
|
||||||
ux.f = x;
|
ux.f = x;
|
||||||
uy.f = y;
|
uy.f = y;
|
||||||
@@ -56,13 +62,16 @@ double copysign( double x, double y )
|
|||||||
return ux.f;
|
return ux.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
long double copysignl( long double x, long double y )
|
long double copysignl(long double x, long double y)
|
||||||
{
|
{
|
||||||
union
|
union {
|
||||||
{
|
|
||||||
long double f;
|
long double f;
|
||||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
struct
|
||||||
}ux, uy;
|
{
|
||||||
|
cl_ulong m;
|
||||||
|
cl_ushort sexp;
|
||||||
|
} u;
|
||||||
|
} ux, uy;
|
||||||
|
|
||||||
ux.f = x;
|
ux.f = x;
|
||||||
uy.f = y;
|
uy.f = y;
|
||||||
@@ -76,12 +85,12 @@ float rintf(float x)
|
|||||||
{
|
{
|
||||||
float absx = fabsf(x);
|
float absx = fabsf(x);
|
||||||
|
|
||||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
if (absx < 8388608.0f /* 0x1.0p23f */)
|
||||||
{
|
{
|
||||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
|
||||||
float rounded = x + magic;
|
float rounded = x + magic;
|
||||||
rounded -= magic;
|
rounded -= magic;
|
||||||
x = copysignf( rounded, x );
|
x = copysignf(rounded, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -91,12 +100,12 @@ double rint(double x)
|
|||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
|
|
||||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
if (absx < 4503599627370496.0 /* 0x1.0p52f */)
|
||||||
{
|
{
|
||||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
|
||||||
double rounded = x + magic;
|
double rounded = x + magic;
|
||||||
rounded -= magic;
|
rounded -= magic;
|
||||||
x = copysign( rounded, x );
|
x = copysign(rounded, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -106,12 +115,13 @@ long double rintl(long double x)
|
|||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
|
|
||||||
if( absx < 9223372036854775808.0L /* 0x1.0p64f */ )
|
if (absx < 9223372036854775808.0L /* 0x1.0p64f */)
|
||||||
{
|
{
|
||||||
long double magic = copysignl( 9223372036854775808.0L /* 0x1.0p63L */, x );
|
long double magic =
|
||||||
|
copysignl(9223372036854775808.0L /* 0x1.0p63L */, x);
|
||||||
long double rounded = x + magic;
|
long double rounded = x + magic;
|
||||||
rounded -= magic;
|
rounded -= magic;
|
||||||
x = copysignl( rounded, x );
|
x = copysignl(rounded, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -125,30 +135,31 @@ long double rintl(long double x)
|
|||||||
//
|
//
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
#ifndef FP_ILOGB0
|
#ifndef FP_ILOGB0
|
||||||
#define FP_ILOGB0 INT_MIN
|
#define FP_ILOGB0 INT_MIN
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef FP_ILOGBNAN
|
#ifndef FP_ILOGBNAN
|
||||||
#define FP_ILOGBNAN INT_MIN
|
#define FP_ILOGBNAN INT_MIN
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int ilogb (double x)
|
int ilogb(double x)
|
||||||
{
|
{
|
||||||
union{ double f; cl_ulong u;} u;
|
union {
|
||||||
|
double f;
|
||||||
|
cl_ulong u;
|
||||||
|
} u;
|
||||||
u.f = x;
|
u.f = x;
|
||||||
|
|
||||||
cl_ulong absx = u.u & CL_LONG_MAX;
|
cl_ulong absx = u.u & CL_LONG_MAX;
|
||||||
if( absx - 0x0001000000000000ULL >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
|
if (absx - 0x0001000000000000ULL
|
||||||
|
>= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
|
||||||
{
|
{
|
||||||
switch( absx )
|
switch (absx)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0: return FP_ILOGB0;
|
||||||
return FP_ILOGB0;
|
case 0x7ff0000000000000ULL: return INT_MAX;
|
||||||
case 0x7ff0000000000000ULL:
|
|
||||||
return INT_MAX;
|
|
||||||
default:
|
default:
|
||||||
if( absx > 0x7ff0000000000000ULL )
|
if (absx > 0x7ff0000000000000ULL) return FP_ILOGBNAN;
|
||||||
return FP_ILOGBNAN;
|
|
||||||
|
|
||||||
// subnormal
|
// subnormal
|
||||||
u.u = absx | 0x3ff0000000000000ULL;
|
u.u = absx | 0x3ff0000000000000ULL;
|
||||||
@@ -161,23 +172,23 @@ int ilogb (double x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int ilogbf (float x)
|
int ilogbf(float x)
|
||||||
{
|
{
|
||||||
union{ float f; cl_uint u;} u;
|
union {
|
||||||
|
float f;
|
||||||
|
cl_uint u;
|
||||||
|
} u;
|
||||||
u.f = x;
|
u.f = x;
|
||||||
|
|
||||||
cl_uint absx = u.u & 0x7fffffff;
|
cl_uint absx = u.u & 0x7fffffff;
|
||||||
if( absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
|
if (absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
|
||||||
{
|
{
|
||||||
switch( absx )
|
switch (absx)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0: return FP_ILOGB0;
|
||||||
return FP_ILOGB0;
|
case 0x7f800000U: return INT_MAX;
|
||||||
case 0x7f800000U:
|
|
||||||
return INT_MAX;
|
|
||||||
default:
|
default:
|
||||||
if( absx > 0x7f800000 )
|
if (absx > 0x7f800000) return FP_ILOGBNAN;
|
||||||
return FP_ILOGBNAN;
|
|
||||||
|
|
||||||
// subnormal
|
// subnormal
|
||||||
u.u = absx | 0x3f800000U;
|
u.u = absx | 0x3f800000U;
|
||||||
@@ -189,32 +200,33 @@ int ilogbf (float x)
|
|||||||
return (absx >> 23) - 127;
|
return (absx >> 23) - 127;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ilogbl (long double x)
|
int ilogbl(long double x)
|
||||||
{
|
{
|
||||||
union
|
union {
|
||||||
{
|
|
||||||
long double f;
|
long double f;
|
||||||
struct{ cl_ulong m; cl_ushort sexp; }u;
|
struct
|
||||||
|
{
|
||||||
|
cl_ulong m;
|
||||||
|
cl_ushort sexp;
|
||||||
|
} u;
|
||||||
} u;
|
} u;
|
||||||
u.f = x;
|
u.f = x;
|
||||||
|
|
||||||
int exp = u.u.sexp & 0x7fff;
|
int exp = u.u.sexp & 0x7fff;
|
||||||
if( 0 == exp )
|
if (0 == exp)
|
||||||
{
|
{
|
||||||
if( 0 == u.u.m )
|
if (0 == u.u.m) return FP_ILOGB0;
|
||||||
return FP_ILOGB0;
|
|
||||||
|
|
||||||
//subnormal
|
// subnormal
|
||||||
u.u.sexp = 0x3fff;
|
u.u.sexp = 0x3fff;
|
||||||
u.f -= 1.0f;
|
u.f -= 1.0f;
|
||||||
exp = u.u.sexp & 0x7fff;
|
exp = u.u.sexp & 0x7fff;
|
||||||
|
|
||||||
return exp - (0x3fff + 0x3ffe);
|
return exp - (0x3fff + 0x3ffe);
|
||||||
}
|
}
|
||||||
else if( 0x7fff == exp )
|
else if (0x7fff == exp)
|
||||||
{
|
{
|
||||||
if( u.u.m & CL_LONG_MAX )
|
if (u.u.m & CL_LONG_MAX) return FP_ILOGBNAN;
|
||||||
return FP_ILOGBNAN;
|
|
||||||
|
|
||||||
return INT_MAX;
|
return INT_MAX;
|
||||||
}
|
}
|
||||||
@@ -232,7 +244,10 @@ int ilogbl (long double x)
|
|||||||
|
|
||||||
static void GET_BITS_SP32(float fx, unsigned int* ux)
|
static void GET_BITS_SP32(float fx, unsigned int* ux)
|
||||||
{
|
{
|
||||||
volatile union {float f; unsigned int u;} _bitsy;
|
volatile union {
|
||||||
|
float f;
|
||||||
|
unsigned int u;
|
||||||
|
} _bitsy;
|
||||||
_bitsy.f = (fx);
|
_bitsy.f = (fx);
|
||||||
*ux = _bitsy.u;
|
*ux = _bitsy.u;
|
||||||
}
|
}
|
||||||
@@ -244,7 +259,10 @@ static void GET_BITS_SP32(float fx, unsigned int* ux)
|
|||||||
/* } */
|
/* } */
|
||||||
static void PUT_BITS_SP32(unsigned int ux, float* fx)
|
static void PUT_BITS_SP32(unsigned int ux, float* fx)
|
||||||
{
|
{
|
||||||
volatile union {float f; unsigned int u;} _bitsy;
|
volatile union {
|
||||||
|
float f;
|
||||||
|
unsigned int u;
|
||||||
|
} _bitsy;
|
||||||
_bitsy.u = (ux);
|
_bitsy.u = (ux);
|
||||||
*fx = _bitsy.f;
|
*fx = _bitsy.f;
|
||||||
}
|
}
|
||||||
@@ -256,13 +274,19 @@ static void PUT_BITS_SP32(unsigned int ux, float* fx)
|
|||||||
/* } */
|
/* } */
|
||||||
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
|
static void GET_BITS_DP64(double dx, unsigned __int64* lx)
|
||||||
{
|
{
|
||||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
volatile union {
|
||||||
|
double d;
|
||||||
|
unsigned __int64 l;
|
||||||
|
} _bitsy;
|
||||||
_bitsy.d = (dx);
|
_bitsy.d = (dx);
|
||||||
*lx = _bitsy.l;
|
*lx = _bitsy.l;
|
||||||
}
|
}
|
||||||
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
|
static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
|
||||||
{
|
{
|
||||||
volatile union {double d; unsigned __int64 l;} _bitsy;
|
volatile union {
|
||||||
|
double d;
|
||||||
|
unsigned __int64 l;
|
||||||
|
} _bitsy;
|
||||||
_bitsy.l = (lx);
|
_bitsy.l = (lx);
|
||||||
*dx = _bitsy.d;
|
*dx = _bitsy.d;
|
||||||
}
|
}
|
||||||
@@ -287,8 +311,7 @@ int SIGNBIT_DP64(double x )
|
|||||||
that x is NaN; gcc does. */
|
that x is NaN; gcc does. */
|
||||||
double fmax(double x, double y)
|
double fmax(double x, double y)
|
||||||
{
|
{
|
||||||
if( isnan(y) )
|
if (isnan(y)) return x;
|
||||||
return x;
|
|
||||||
|
|
||||||
return x >= y ? x : y;
|
return x >= y ? x : y;
|
||||||
}
|
}
|
||||||
@@ -301,17 +324,15 @@ double fmax(double x, double y)
|
|||||||
|
|
||||||
double fmin(double x, double y)
|
double fmin(double x, double y)
|
||||||
{
|
{
|
||||||
if( isnan(y) )
|
if (isnan(y)) return x;
|
||||||
return x;
|
|
||||||
|
|
||||||
return x <= y ? x : y;
|
return x <= y ? x : y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
float fmaxf( float x, float y )
|
float fmaxf(float x, float y)
|
||||||
{
|
{
|
||||||
if( isnan(y) )
|
if (isnan(y)) return x;
|
||||||
return x;
|
|
||||||
|
|
||||||
return x >= y ? x : y;
|
return x >= y ? x : y;
|
||||||
}
|
}
|
||||||
@@ -323,31 +344,31 @@ float fmaxf( float x, float y )
|
|||||||
|
|
||||||
float fminf(float x, float y)
|
float fminf(float x, float y)
|
||||||
{
|
{
|
||||||
if( isnan(y) )
|
if (isnan(y)) return x;
|
||||||
return x;
|
|
||||||
|
|
||||||
return x <= y ? x : y;
|
return x <= y ? x : y;
|
||||||
}
|
}
|
||||||
|
|
||||||
long double scalblnl(long double x, long n)
|
long double scalblnl(long double x, long n)
|
||||||
{
|
{
|
||||||
union
|
union {
|
||||||
{
|
|
||||||
long double d;
|
long double d;
|
||||||
struct{ cl_ulong m; cl_ushort sexp;}u;
|
struct
|
||||||
}u;
|
{
|
||||||
|
cl_ulong m;
|
||||||
|
cl_ushort sexp;
|
||||||
|
} u;
|
||||||
|
} u;
|
||||||
u.u.m = CL_LONG_MIN;
|
u.u.m = CL_LONG_MIN;
|
||||||
|
|
||||||
if( x == 0.0L || n < -2200)
|
if (x == 0.0L || n < -2200) return copysignl(0.0L, x);
|
||||||
return copysignl( 0.0L, x );
|
|
||||||
|
|
||||||
if( n > 2200 )
|
if (n > 2200) return INFINITY;
|
||||||
return INFINITY;
|
|
||||||
|
|
||||||
if( n < 0 )
|
if (n < 0)
|
||||||
{
|
{
|
||||||
u.u.sexp = 0x3fff - 1022;
|
u.u.sexp = 0x3fff - 1022;
|
||||||
while( n <= -1022 )
|
while (n <= -1022)
|
||||||
{
|
{
|
||||||
x *= u.d;
|
x *= u.d;
|
||||||
n += 1022;
|
n += 1022;
|
||||||
@@ -357,10 +378,10 @@ long double scalblnl(long double x, long n)
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( n > 0 )
|
if (n > 0)
|
||||||
{
|
{
|
||||||
u.u.sexp = 0x3fff + 1023;
|
u.u.sexp = 0x3fff + 1023;
|
||||||
while( n >= 1023 )
|
while (n >= 1023)
|
||||||
{
|
{
|
||||||
x *= u.d;
|
x *= u.d;
|
||||||
n -= 1023;
|
n -= 1023;
|
||||||
@@ -381,12 +402,9 @@ long double scalblnl(long double x, long n)
|
|||||||
const static cl_double log_e_base2 = 1.4426950408889634074;
|
const static cl_double log_e_base2 = 1.4426950408889634074;
|
||||||
const static cl_double log_10_base2 = 3.3219280948873623478;
|
const static cl_double log_10_base2 = 3.3219280948873623478;
|
||||||
|
|
||||||
//double log10(double x);
|
// double log10(double x);
|
||||||
|
|
||||||
double log2(double x)
|
double log2(double x) { return 1.44269504088896340735992468100189214 * log(x); }
|
||||||
{
|
|
||||||
return 1.44269504088896340735992468100189214 * log(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
long double log2l(long double x)
|
long double log2l(long double x)
|
||||||
{
|
{
|
||||||
@@ -397,10 +415,10 @@ double trunc(double x)
|
|||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
|
|
||||||
if( absx < 4503599627370496.0 /* 0x1.0p52f */ )
|
if (absx < 4503599627370496.0 /* 0x1.0p52f */)
|
||||||
{
|
{
|
||||||
cl_long rounded = x;
|
cl_long rounded = x;
|
||||||
x = copysign( (double) rounded, x );
|
x = copysign((double)rounded, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -410,10 +428,10 @@ float truncf(float x)
|
|||||||
{
|
{
|
||||||
float absx = fabsf(x);
|
float absx = fabsf(x);
|
||||||
|
|
||||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
if (absx < 8388608.0f /* 0x1.0p23f */)
|
||||||
{
|
{
|
||||||
cl_int rounded = x;
|
cl_int rounded = x;
|
||||||
x = copysignf( (float) rounded, x );
|
x = copysignf((float)rounded, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -423,57 +441,52 @@ long lround(double x)
|
|||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
|
|
||||||
if( absx < 0.5 )
|
if (absx < 0.5) return 0;
|
||||||
return 0;
|
|
||||||
|
|
||||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
if (absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||||
{
|
{
|
||||||
absx += 0.5;
|
absx += 0.5;
|
||||||
cl_long rounded = absx;
|
cl_long rounded = absx;
|
||||||
absx = rounded;
|
absx = rounded;
|
||||||
x = copysign( absx, x );
|
x = copysign(absx, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( x >= (double) LONG_MAX )
|
if (x >= (double)LONG_MAX) return LONG_MAX;
|
||||||
return LONG_MAX;
|
|
||||||
|
|
||||||
return (long) x;
|
return (long)x;
|
||||||
}
|
}
|
||||||
|
|
||||||
long lroundf(float x)
|
long lroundf(float x)
|
||||||
{
|
{
|
||||||
float absx = fabsf(x);
|
float absx = fabsf(x);
|
||||||
|
|
||||||
if( absx < 0.5f )
|
if (absx < 0.5f) return 0;
|
||||||
return 0;
|
|
||||||
|
|
||||||
if( absx < 8388608.0f )
|
if (absx < 8388608.0f)
|
||||||
{
|
{
|
||||||
absx += 0.5f;
|
absx += 0.5f;
|
||||||
cl_int rounded = absx;
|
cl_int rounded = absx;
|
||||||
absx = rounded;
|
absx = rounded;
|
||||||
x = copysignf( absx, x );
|
x = copysignf(absx, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
if( x >= (float) LONG_MAX )
|
if (x >= (float)LONG_MAX) return LONG_MAX;
|
||||||
return LONG_MAX;
|
|
||||||
|
|
||||||
return (long) x;
|
return (long)x;
|
||||||
}
|
}
|
||||||
|
|
||||||
double round(double x)
|
double round(double x)
|
||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
|
|
||||||
if( absx < 0.5 )
|
if (absx < 0.5) return copysign(0.0, x);
|
||||||
return copysign( 0.0, x);
|
|
||||||
|
|
||||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */)
|
if (absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||||
{
|
{
|
||||||
absx += 0.5;
|
absx += 0.5;
|
||||||
cl_long rounded = absx;
|
cl_long rounded = absx;
|
||||||
absx = rounded;
|
absx = rounded;
|
||||||
x = copysign( absx, x );
|
x = copysign(absx, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -483,15 +496,14 @@ float roundf(float x)
|
|||||||
{
|
{
|
||||||
float absx = fabsf(x);
|
float absx = fabsf(x);
|
||||||
|
|
||||||
if( absx < 0.5f )
|
if (absx < 0.5f) return copysignf(0.0f, x);
|
||||||
return copysignf( 0.0f, x);
|
|
||||||
|
|
||||||
if( absx < 8388608.0f )
|
if (absx < 8388608.0f)
|
||||||
{
|
{
|
||||||
absx += 0.5f;
|
absx += 0.5f;
|
||||||
cl_int rounded = absx;
|
cl_int rounded = absx;
|
||||||
absx = rounded;
|
absx = rounded;
|
||||||
x = copysignf( absx, x );
|
x = copysignf(absx, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
@@ -501,65 +513,59 @@ long double roundl(long double x)
|
|||||||
{
|
{
|
||||||
long double absx = fabsl(x);
|
long double absx = fabsl(x);
|
||||||
|
|
||||||
if( absx < 0.5L )
|
if (absx < 0.5L) return copysignl(0.0L, x);
|
||||||
return copysignl( 0.0L, x);
|
|
||||||
|
|
||||||
if( absx < 9223372036854775808.0L /*0x1.0p63L*/ )
|
if (absx < 9223372036854775808.0L /*0x1.0p63L*/)
|
||||||
{
|
{
|
||||||
absx += 0.5L;
|
absx += 0.5L;
|
||||||
cl_ulong rounded = absx;
|
cl_ulong rounded = absx;
|
||||||
absx = rounded;
|
absx = rounded;
|
||||||
x = copysignl( absx, x );
|
x = copysignl(absx, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
float cbrtf( float x )
|
float cbrtf(float x)
|
||||||
{
|
{
|
||||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
float z = pow(fabs((double)x), 1.0 / 3.0);
|
||||||
return copysignf( z, x );
|
return copysignf(z, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
double cbrt( double x )
|
double cbrt(double x) { return copysign(pow(fabs(x), 1.0 / 3.0), x); }
|
||||||
{
|
|
||||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
|
||||||
}
|
|
||||||
|
|
||||||
long int lrint (double x)
|
long int lrint(double x)
|
||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
|
|
||||||
if( x >= (double) LONG_MAX )
|
if (x >= (double)LONG_MAX) return LONG_MAX;
|
||||||
return LONG_MAX;
|
|
||||||
|
|
||||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
if (absx < 4503599627370496.0 /* 0x1.0p52 */)
|
||||||
{
|
{
|
||||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
|
||||||
double rounded = x + magic;
|
double rounded = x + magic;
|
||||||
rounded -= magic;
|
rounded -= magic;
|
||||||
return (long int) rounded;
|
return (long int)rounded;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (long int) x;
|
return (long int)x;
|
||||||
}
|
}
|
||||||
|
|
||||||
long int lrintf (float x)
|
long int lrintf(float x)
|
||||||
{
|
{
|
||||||
float absx = fabsf(x);
|
float absx = fabsf(x);
|
||||||
|
|
||||||
if( x >= (float) LONG_MAX )
|
if (x >= (float)LONG_MAX) return LONG_MAX;
|
||||||
return LONG_MAX;
|
|
||||||
|
|
||||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
if (absx < 8388608.0f /* 0x1.0p23f */)
|
||||||
{
|
{
|
||||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
|
||||||
float rounded = x + magic;
|
float rounded = x + magic;
|
||||||
rounded -= magic;
|
rounded -= magic;
|
||||||
return (long int) rounded;
|
return (long int)rounded;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (long int) x;
|
return (long int)x;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // _MSC_VER < 1900
|
#endif // _MSC_VER < 1900
|
||||||
@@ -574,13 +580,12 @@ long int lrintf (float x)
|
|||||||
int fetestexcept(int excepts)
|
int fetestexcept(int excepts)
|
||||||
{
|
{
|
||||||
unsigned int status = _statusfp();
|
unsigned int status = _statusfp();
|
||||||
return excepts & (
|
return excepts
|
||||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
& (((status & _SW_INEXACT) ? FE_INEXACT : 0)
|
||||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
| ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)
|
||||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
| ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)
|
||||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
| ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0)
|
||||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
| ((status & _SW_INVALID) ? FE_INVALID : 0));
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int feclearexcept(int excepts)
|
int feclearexcept(int excepts)
|
||||||
@@ -592,33 +597,36 @@ int feclearexcept(int excepts)
|
|||||||
|
|
||||||
#endif // __INTEL_COMPILER
|
#endif // __INTEL_COMPILER
|
||||||
|
|
||||||
#if _MSC_VER < 1900 && ( ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300 )
|
#if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
|
||||||
|
|
||||||
float nanf( const char* str)
|
float nanf(const char* str)
|
||||||
{
|
{
|
||||||
cl_uint u = atoi( str );
|
cl_uint u = atoi(str);
|
||||||
u |= 0x7fc00000U;
|
u |= 0x7fc00000U;
|
||||||
return *( float*)(&u);
|
return *(float*)(&u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double nan( const char* str)
|
double nan(const char* str)
|
||||||
{
|
{
|
||||||
cl_ulong u = atoi( str );
|
cl_ulong u = atoi(str);
|
||||||
u |= 0x7ff8000000000000ULL;
|
u |= 0x7ff8000000000000ULL;
|
||||||
return *( double*)(&u);
|
return *(double*)(&u);
|
||||||
}
|
}
|
||||||
|
|
||||||
// double check this implementatation
|
// double check this implementatation
|
||||||
long double nanl( const char* str)
|
long double nanl(const char* str)
|
||||||
{
|
{
|
||||||
union
|
union {
|
||||||
{
|
|
||||||
long double f;
|
long double f;
|
||||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
struct
|
||||||
}u;
|
{
|
||||||
|
cl_ulong m;
|
||||||
|
cl_ushort sexp;
|
||||||
|
} u;
|
||||||
|
} u;
|
||||||
u.u.sexp = 0x7fff;
|
u.u.sexp = 0x7fff;
|
||||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
u.u.m = 0x8000000000000000ULL | atoi(str);
|
||||||
|
|
||||||
return u.f;
|
return u.f;
|
||||||
}
|
}
|
||||||
@@ -632,32 +640,35 @@ long double nanl( const char* str)
|
|||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// This function is commented out because the Windows implementation should never call munmap.
|
// This function is commented out because the Windows implementation should
|
||||||
|
never call munmap.
|
||||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||||
int munmap(void *addr, size_t len)
|
int munmap(void *addr, size_t len)
|
||||||
{
|
{
|
||||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
// FIXME: this is not correct. munmap is like free()
|
||||||
|
// http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||||
|
|
||||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
uint64_t ReadTime( void )
|
uint64_t ReadTime(void)
|
||||||
{
|
{
|
||||||
LARGE_INTEGER current;
|
LARGE_INTEGER current;
|
||||||
QueryPerformanceCounter(¤t);
|
QueryPerformanceCounter(¤t);
|
||||||
return (uint64_t)current.QuadPart;
|
return (uint64_t)current.QuadPart;
|
||||||
}
|
}
|
||||||
|
|
||||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
double SubtractTime(uint64_t endTime, uint64_t startTime)
|
||||||
{
|
{
|
||||||
static double PerformanceFrequency = 0.0;
|
static double PerformanceFrequency = 0.0;
|
||||||
|
|
||||||
if (PerformanceFrequency == 0.0) {
|
if (PerformanceFrequency == 0.0)
|
||||||
|
{
|
||||||
LARGE_INTEGER frequency;
|
LARGE_INTEGER frequency;
|
||||||
QueryPerformanceFrequency(&frequency);
|
QueryPerformanceFrequency(&frequency);
|
||||||
PerformanceFrequency = (double) frequency.QuadPart;
|
PerformanceFrequency = (double)frequency.QuadPart;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||||
@@ -665,27 +676,25 @@ double SubtractTime( uint64_t endTime, uint64_t startTime )
|
|||||||
|
|
||||||
int cf_signbit(double x)
|
int cf_signbit(double x)
|
||||||
{
|
{
|
||||||
union
|
union {
|
||||||
{
|
|
||||||
double f;
|
double f;
|
||||||
cl_ulong u;
|
cl_ulong u;
|
||||||
}u;
|
} u;
|
||||||
u.f = x;
|
u.f = x;
|
||||||
return u.u >> 63;
|
return u.u >> 63;
|
||||||
}
|
}
|
||||||
|
|
||||||
int cf_signbitf(float x)
|
int cf_signbitf(float x)
|
||||||
{
|
{
|
||||||
union
|
union {
|
||||||
{
|
|
||||||
float f;
|
float f;
|
||||||
cl_uint u;
|
cl_uint u;
|
||||||
}u;
|
} u;
|
||||||
u.f = x;
|
u.f = x;
|
||||||
return u.u >> 31;
|
return u.u >> 31;
|
||||||
}
|
}
|
||||||
|
|
||||||
float int2float (int32_t ix)
|
float int2float(int32_t ix)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
float f;
|
float f;
|
||||||
@@ -695,7 +704,7 @@ float int2float (int32_t ix)
|
|||||||
return u.f;
|
return u.f;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t float2int (float fx)
|
int32_t float2int(float fx)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
float f;
|
float f;
|
||||||
@@ -722,26 +731,49 @@ int __builtin_clz(unsigned int pattern)
|
|||||||
return 31 - res;
|
return 31 - res;
|
||||||
#endif
|
#endif
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
unsigned char res = _BitScanReverse( &index, pattern);
|
unsigned char res = _BitScanReverse(&index, pattern);
|
||||||
if (res) {
|
if (res)
|
||||||
return 8*sizeof(int) - 1 - index;
|
{
|
||||||
} else {
|
return 8 * sizeof(int) - 1 - index;
|
||||||
return 8*sizeof(int);
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return 8 * sizeof(int);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
int __builtin_clz(unsigned int pattern)
|
int __builtin_clz(unsigned int pattern)
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
if (pattern == 0u) {
|
if (pattern == 0u)
|
||||||
|
{
|
||||||
return 32;
|
return 32;
|
||||||
}
|
}
|
||||||
count = 31;
|
count = 31;
|
||||||
if (pattern >= 1u<<16) { pattern >>= 16; count -= 16; }
|
if (pattern >= 1u << 16)
|
||||||
if (pattern >= 1u<<8) { pattern >>= 8; count -= 8; }
|
{
|
||||||
if (pattern >= 1u<<4) { pattern >>= 4; count -= 4; }
|
pattern >>= 16;
|
||||||
if (pattern >= 1u<<2) { pattern >>= 2; count -= 2; }
|
count -= 16;
|
||||||
if (pattern >= 1u<<1) { count -= 1; }
|
}
|
||||||
|
if (pattern >= 1u << 8)
|
||||||
|
{
|
||||||
|
pattern >>= 8;
|
||||||
|
count -= 8;
|
||||||
|
}
|
||||||
|
if (pattern >= 1u << 4)
|
||||||
|
{
|
||||||
|
pattern >>= 4;
|
||||||
|
count -= 4;
|
||||||
|
}
|
||||||
|
if (pattern >= 1u << 2)
|
||||||
|
{
|
||||||
|
pattern >>= 2;
|
||||||
|
count -= 2;
|
||||||
|
}
|
||||||
|
if (pattern >= 1u << 1)
|
||||||
|
{
|
||||||
|
count -= 1;
|
||||||
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -756,9 +788,9 @@ int usleep(int usec)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int sleep( unsigned int sec )
|
unsigned int sleep(unsigned int sec)
|
||||||
{
|
{
|
||||||
Sleep( sec * 1000 );
|
Sleep(sec * 1000);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,8 +26,8 @@
|
|||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
@@ -51,15 +51,15 @@
|
|||||||
#include "harness/alloc.h"
|
#include "harness/alloc.h"
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Period parameters */
|
/* Period parameters */
|
||||||
#define N 624 /* vector code requires multiple of 4 here */
|
#define N 624 /* vector code requires multiple of 4 here */
|
||||||
#define M 397
|
#define M 397
|
||||||
#define MATRIX_A (cl_uint) 0x9908b0dfUL /* constant vector a */
|
#define MATRIX_A (cl_uint)0x9908b0dfUL /* constant vector a */
|
||||||
#define UPPER_MASK (cl_uint) 0x80000000UL /* most significant w-r bits */
|
#define UPPER_MASK (cl_uint)0x80000000UL /* most significant w-r bits */
|
||||||
#define LOWER_MASK (cl_uint) 0x7fffffffUL /* least significant r bits */
|
#define LOWER_MASK (cl_uint)0x7fffffffUL /* least significant r bits */
|
||||||
|
|
||||||
typedef struct _MTdata
|
typedef struct _MTdata
|
||||||
{
|
{
|
||||||
@@ -68,20 +68,21 @@ typedef struct _MTdata
|
|||||||
cl_uint cache[N];
|
cl_uint cache[N];
|
||||||
#endif
|
#endif
|
||||||
cl_int mti;
|
cl_int mti;
|
||||||
}_MTdata;
|
} _MTdata;
|
||||||
|
|
||||||
/* initializes mt[N] with a seed */
|
/* initializes mt[N] with a seed */
|
||||||
MTdata init_genrand(cl_uint s)
|
MTdata init_genrand(cl_uint s)
|
||||||
{
|
{
|
||||||
MTdata r = (MTdata) align_malloc( sizeof( _MTdata ), 16 );
|
MTdata r = (MTdata)align_malloc(sizeof(_MTdata), 16);
|
||||||
if( NULL != r )
|
if (NULL != r)
|
||||||
{
|
{
|
||||||
cl_uint *mt = r->mt;
|
cl_uint *mt = r->mt;
|
||||||
int mti = 0;
|
int mti = 0;
|
||||||
mt[0]= s; // & 0xffffffffUL;
|
mt[0] = s; // & 0xffffffffUL;
|
||||||
for (mti=1; mti<N; mti++) {
|
for (mti = 1; mti < N; mti++)
|
||||||
mt[mti] = (cl_uint)
|
{
|
||||||
(1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
|
mt[mti] = (cl_uint)(
|
||||||
|
1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti);
|
||||||
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
/* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
|
||||||
/* In the previous versions, MSBs of the seed affect */
|
/* In the previous versions, MSBs of the seed affect */
|
||||||
/* only MSBs of the array mt[]. */
|
/* only MSBs of the array mt[]. */
|
||||||
@@ -95,20 +96,22 @@ MTdata init_genrand(cl_uint s)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
void free_mtdata( MTdata d )
|
void free_mtdata(MTdata d)
|
||||||
{
|
{
|
||||||
if(d)
|
if (d) align_free(d);
|
||||||
align_free(d);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generates a random number on [0,0xffffffff]-interval */
|
/* generates a random number on [0,0xffffffff]-interval */
|
||||||
cl_uint genrand_int32( MTdata d)
|
cl_uint genrand_int32(MTdata d)
|
||||||
{
|
{
|
||||||
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
/* mag01[x] = x * MATRIX_A for x=0,1 */
|
||||||
static const cl_uint mag01[2]={0x0UL, MATRIX_A};
|
static const cl_uint mag01[2] = { 0x0UL, MATRIX_A };
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
static volatile int init = 0;
|
static volatile int init = 0;
|
||||||
static union{ __m128i v; cl_uint s[4]; } upper_mask, lower_mask, one, matrix_a, c0, c1;
|
static union {
|
||||||
|
__m128i v;
|
||||||
|
cl_uint s[4];
|
||||||
|
} upper_mask, lower_mask, one, matrix_a, c0, c1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@@ -120,14 +123,17 @@ cl_uint genrand_int32( MTdata d)
|
|||||||
int kk;
|
int kk;
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
if( 0 == init )
|
if (0 == init)
|
||||||
{
|
{
|
||||||
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK;
|
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] =
|
||||||
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = lower_mask.s[3] = LOWER_MASK;
|
upper_mask.s[3] = UPPER_MASK;
|
||||||
|
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] =
|
||||||
|
lower_mask.s[3] = LOWER_MASK;
|
||||||
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
one.s[0] = one.s[1] = one.s[2] = one.s[3] = 1;
|
||||||
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] = MATRIX_A;
|
matrix_a.s[0] = matrix_a.s[1] = matrix_a.s[2] = matrix_a.s[3] =
|
||||||
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint) 0x9d2c5680UL;
|
MATRIX_A;
|
||||||
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint) 0xefc60000UL;
|
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL;
|
||||||
|
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL;
|
||||||
init = 1;
|
init = 1;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -135,61 +141,89 @@ cl_uint genrand_int32( MTdata d)
|
|||||||
kk = 0;
|
kk = 0;
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
// vector loop
|
// vector loop
|
||||||
for( ; kk + 4 <= N-M; kk += 4 )
|
for (; kk + 4 <= N - M; kk += 4)
|
||||||
{
|
{
|
||||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
// ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
__m128i vy = _mm_or_si128(
|
||||||
|
_mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
|
||||||
|
upper_mask.v),
|
||||||
|
_mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
|
||||||
|
lower_mask.v));
|
||||||
|
|
||||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
// y & 1 ? -1 : 0
|
||||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
__m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
|
||||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M)), (__m128i) _mm_srli_epi32( vy, 1 ) ); // mt[kk+M] ^ (y >> 1)
|
// y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
__m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
|
||||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
// mt[kk+M] ^ (y >> 1)
|
||||||
|
__m128i vr =
|
||||||
|
_mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M)),
|
||||||
|
(__m128i)_mm_srli_epi32(vy, 1));
|
||||||
|
// mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||||
|
vr = _mm_xor_si128(vr, vmag01);
|
||||||
|
_mm_store_si128((__m128i *)(mt + kk), vr);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
for ( ;kk<N-M;kk++) {
|
for (; kk < N - M; kk++)
|
||||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
{
|
||||||
mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
|
||||||
|
mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
// advance to next aligned location
|
// advance to next aligned location
|
||||||
for (;kk<N-1 && (kk & 3);kk++) {
|
for (; kk < N - 1 && (kk & 3); kk++)
|
||||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
{
|
||||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
|
||||||
|
mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||||
}
|
}
|
||||||
|
|
||||||
// vector loop
|
// vector loop
|
||||||
for( ; kk + 4 <= N-1; kk += 4 )
|
for (; kk + 4 <= N - 1; kk += 4)
|
||||||
{
|
{
|
||||||
__m128i vy = _mm_or_si128( _mm_and_si128( _mm_load_si128( (__m128i*)(mt + kk) ), upper_mask.v ),
|
__m128i vy = _mm_or_si128(
|
||||||
_mm_and_si128( _mm_loadu_si128( (__m128i*)(mt + kk + 1) ), lower_mask.v )); // ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
_mm_and_si128(_mm_load_si128((__m128i *)(mt + kk)),
|
||||||
|
upper_mask.v),
|
||||||
|
// ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK))
|
||||||
|
_mm_and_si128(_mm_loadu_si128((__m128i *)(mt + kk + 1)),
|
||||||
|
lower_mask.v));
|
||||||
|
|
||||||
__m128i mask = _mm_cmpeq_epi32( _mm_and_si128( vy, one.v), one.v ); // y & 1 ? -1 : 0
|
// y & 1 ? -1 : 0
|
||||||
__m128i vmag01 = _mm_and_si128( mask, matrix_a.v ); // y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
__m128i mask = _mm_cmpeq_epi32(_mm_and_si128(vy, one.v), one.v);
|
||||||
__m128i vr = _mm_xor_si128( _mm_loadu_si128( (__m128i*)(mt + kk + M - N)), _mm_srli_epi32( vy, 1 ) ); // mt[kk+M-N] ^ (y >> 1)
|
// y & 1 ? MATRIX_A, 0 = mag01[y & (cl_uint) 0x1UL]
|
||||||
vr = _mm_xor_si128( vr, vmag01 ); // mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
__m128i vmag01 = _mm_and_si128(mask, matrix_a.v);
|
||||||
_mm_store_si128( (__m128i*) (mt + kk ), vr );
|
// mt[kk+M-N] ^ (y >> 1)
|
||||||
|
__m128i vr =
|
||||||
|
_mm_xor_si128(_mm_loadu_si128((__m128i *)(mt + kk + M - N)),
|
||||||
|
_mm_srli_epi32(vy, 1));
|
||||||
|
// mt[kk+M] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL]
|
||||||
|
vr = _mm_xor_si128(vr, vmag01);
|
||||||
|
_mm_store_si128((__m128i *)(mt + kk), vr);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (;kk<N-1;kk++) {
|
for (; kk < N - 1; kk++)
|
||||||
y = (cl_uint) ((mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK));
|
{
|
||||||
mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
y = (cl_uint)((mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK));
|
||||||
|
mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||||
}
|
}
|
||||||
y = (cl_uint)((mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK));
|
y = (cl_uint)((mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK));
|
||||||
mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & (cl_uint) 0x1UL];
|
mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & (cl_uint)0x1UL];
|
||||||
|
|
||||||
#ifdef __SSE2__
|
#ifdef __SSE2__
|
||||||
// Do the tempering ahead of time in vector code
|
// Do the tempering ahead of time in vector code
|
||||||
for( kk = 0; kk + 4 <= N; kk += 4 )
|
for (kk = 0; kk + 4 <= N; kk += 4)
|
||||||
{
|
{
|
||||||
__m128i vy = _mm_load_si128( (__m128i*)(mt + kk ) ); // y = mt[k];
|
// y = mt[k];
|
||||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 11 ) ); // y ^= (y >> 11);
|
__m128i vy = _mm_load_si128((__m128i *)(mt + kk));
|
||||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 7 ), c0.v) ); // y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
// y ^= (y >> 11);
|
||||||
vy = _mm_xor_si128( vy, _mm_and_si128( _mm_slli_epi32( vy, 15 ), c1.v) ); // y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 11));
|
||||||
vy = _mm_xor_si128( vy, _mm_srli_epi32( vy, 18 ) ); // y ^= (y >> 18);
|
// y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
||||||
_mm_store_si128( (__m128i*)(d->cache+kk), vy );
|
vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 7), c0.v));
|
||||||
|
// y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
||||||
|
vy = _mm_xor_si128(vy, _mm_and_si128(_mm_slli_epi32(vy, 15), c1.v));
|
||||||
|
// y ^= (y >> 18);
|
||||||
|
vy = _mm_xor_si128(vy, _mm_srli_epi32(vy, 18));
|
||||||
|
_mm_store_si128((__m128i *)(d->cache + kk), vy);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -202,8 +236,8 @@ cl_uint genrand_int32( MTdata d)
|
|||||||
|
|
||||||
/* Tempering */
|
/* Tempering */
|
||||||
y ^= (y >> 11);
|
y ^= (y >> 11);
|
||||||
y ^= (y << 7) & (cl_uint) 0x9d2c5680UL;
|
y ^= (y << 7) & (cl_uint)0x9d2c5680UL;
|
||||||
y ^= (y << 15) & (cl_uint) 0xefc60000UL;
|
y ^= (y << 15) & (cl_uint)0xefc60000UL;
|
||||||
y ^= (y >> 18);
|
y ^= (y >> 18);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -211,35 +245,35 @@ cl_uint genrand_int32( MTdata d)
|
|||||||
return y;
|
return y;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_ulong genrand_int64( MTdata d)
|
cl_ulong genrand_int64(MTdata d)
|
||||||
{
|
{
|
||||||
return ((cl_ulong) genrand_int32(d) << 32) | (cl_uint) genrand_int32(d);
|
return ((cl_ulong)genrand_int32(d) << 32) | (cl_uint)genrand_int32(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generates a random number on [0,1]-real-interval */
|
/* generates a random number on [0,1]-real-interval */
|
||||||
double genrand_real1(MTdata d)
|
double genrand_real1(MTdata d)
|
||||||
{
|
{
|
||||||
return genrand_int32(d)*(1.0/4294967295.0);
|
return genrand_int32(d) * (1.0 / 4294967295.0);
|
||||||
/* divided by 2^32-1 */
|
/* divided by 2^32-1 */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generates a random number on [0,1)-real-interval */
|
/* generates a random number on [0,1)-real-interval */
|
||||||
double genrand_real2(MTdata d)
|
double genrand_real2(MTdata d)
|
||||||
{
|
{
|
||||||
return genrand_int32(d)*(1.0/4294967296.0);
|
return genrand_int32(d) * (1.0 / 4294967296.0);
|
||||||
/* divided by 2^32 */
|
/* divided by 2^32 */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generates a random number on (0,1)-real-interval */
|
/* generates a random number on (0,1)-real-interval */
|
||||||
double genrand_real3(MTdata d)
|
double genrand_real3(MTdata d)
|
||||||
{
|
{
|
||||||
return (((double)genrand_int32(d)) + 0.5)*(1.0/4294967296.0);
|
return (((double)genrand_int32(d)) + 0.5) * (1.0 / 4294967296.0);
|
||||||
/* divided by 2^32 */
|
/* divided by 2^32 */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||||
double genrand_res53(MTdata d)
|
double genrand_res53(MTdata d)
|
||||||
{
|
{
|
||||||
unsigned long a=genrand_int32(d)>>5, b=genrand_int32(d)>>6;
|
unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
|
||||||
return(a*67108864.0+b)*(1.0/9007199254740992.0);
|
return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,8 +31,8 @@
|
|||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
@@ -49,10 +49,10 @@
|
|||||||
#ifndef MT19937_H
|
#ifndef MT19937_H
|
||||||
#define MT19937_H 1
|
#define MT19937_H 1
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
#include <OpenCL/cl_platform.h>
|
#include <OpenCL/cl_platform.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/cl_platform.h>
|
#include <CL/cl_platform.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -64,49 +64,47 @@
|
|||||||
typedef struct _MTdata *MTdata;
|
typedef struct _MTdata *MTdata;
|
||||||
|
|
||||||
/* Create the random number generator with seed */
|
/* Create the random number generator with seed */
|
||||||
MTdata init_genrand( cl_uint /*seed*/ );
|
MTdata init_genrand(cl_uint /*seed*/);
|
||||||
|
|
||||||
/* release memory used by a MTdata private data */
|
/* release memory used by a MTdata private data */
|
||||||
void free_mtdata( MTdata /*data*/ );
|
void free_mtdata(MTdata /*data*/);
|
||||||
|
|
||||||
/* generates a random number on [0,0xffffffff]-interval */
|
/* generates a random number on [0,0xffffffff]-interval */
|
||||||
cl_uint genrand_int32( MTdata /*data*/);
|
cl_uint genrand_int32(MTdata /*data*/);
|
||||||
|
|
||||||
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
|
/* generates a random number on [0,0xffffffffffffffffULL]-interval */
|
||||||
cl_ulong genrand_int64( MTdata /*data*/);
|
cl_ulong genrand_int64(MTdata /*data*/);
|
||||||
|
|
||||||
/* generates a random number on [0,1]-real-interval */
|
/* generates a random number on [0,1]-real-interval */
|
||||||
double genrand_real1( MTdata /*data*/);
|
double genrand_real1(MTdata /*data*/);
|
||||||
|
|
||||||
/* generates a random number on [0,1)-real-interval */
|
/* generates a random number on [0,1)-real-interval */
|
||||||
double genrand_real2( MTdata /*data*/);
|
double genrand_real2(MTdata /*data*/);
|
||||||
|
|
||||||
/* generates a random number on (0,1)-real-interval */
|
/* generates a random number on (0,1)-real-interval */
|
||||||
double genrand_real3( MTdata /*data*/);
|
double genrand_real3(MTdata /*data*/);
|
||||||
|
|
||||||
/* generates a random number on [0,1) with 53-bit resolution*/
|
/* generates a random number on [0,1) with 53-bit resolution*/
|
||||||
double genrand_res53( MTdata /*data*/ );
|
double genrand_res53(MTdata /*data*/);
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
struct MTdataHolder {
|
struct MTdataHolder
|
||||||
MTdataHolder(cl_uint seed) {
|
{
|
||||||
|
MTdataHolder(cl_uint seed)
|
||||||
|
{
|
||||||
m_mtdata = init_genrand(seed);
|
m_mtdata = init_genrand(seed);
|
||||||
assert(m_mtdata != nullptr);
|
assert(m_mtdata != nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
MTdataHolder(MTdata mtdata) : m_mtdata(mtdata) {}
|
MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
|
||||||
|
|
||||||
~MTdataHolder() {
|
~MTdataHolder() { free_mtdata(m_mtdata); }
|
||||||
free_mtdata(m_mtdata);
|
|
||||||
}
|
|
||||||
|
|
||||||
operator MTdata () const {
|
operator MTdata() const { return m_mtdata; }
|
||||||
return m_mtdata;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MTdata m_mtdata;
|
MTdata m_mtdata;
|
||||||
|
|||||||
@@ -32,15 +32,16 @@
|
|||||||
#include <android/api-level.h>
|
#include <android/api-level.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CHECK_PTR( ptr ) \
|
#define CHECK_PTR(ptr) \
|
||||||
if ( (ptr) == NULL ) { \
|
if ((ptr) == NULL) \
|
||||||
|
{ \
|
||||||
abort(); \
|
abort(); \
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef std::vector< char > buffer_t;
|
typedef std::vector<char> buffer_t;
|
||||||
|
|
||||||
#if ! defined( PATH_MAX )
|
#if !defined(PATH_MAX)
|
||||||
#define PATH_MAX 1000
|
#define PATH_MAX 1000
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int const _size = PATH_MAX + 1; // Initial buffer size for path.
|
int const _size = PATH_MAX + 1; // Initial buffer size for path.
|
||||||
@@ -50,19 +51,18 @@ int const _count = 8; // How many times we will try to double buff
|
|||||||
// MacOS X
|
// MacOS X
|
||||||
// -------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
|
|
||||||
|
|
||||||
#include <mach-o/dyld.h> // _NSGetExecutablePath
|
#include <mach-o/dyld.h> // _NSGetExecutablePath
|
||||||
#include <libgen.h> // dirname
|
#include <libgen.h> // dirname
|
||||||
|
|
||||||
|
|
||||||
static
|
static std::string
|
||||||
std::string
|
_err_msg(int err, // Error number (e. g. errno).
|
||||||
_err_msg(
|
|
||||||
int err, // Error number (e. g. errno).
|
|
||||||
int level // Nesting level, for avoiding infinite recursion.
|
int level // Nesting level, for avoiding infinite recursion.
|
||||||
) {
|
)
|
||||||
|
{
|
||||||
|
|
||||||
/*
|
/*
|
||||||
There are 3 incompatible versions of strerror_r:
|
There are 3 incompatible versions of strerror_r:
|
||||||
@@ -71,92 +71,102 @@ int const _count = 8; // How many times we will try to double buff
|
|||||||
int strerror_r( int, char *, size_t ); // BSD version
|
int strerror_r( int, char *, size_t ); // BSD version
|
||||||
int strerror_r( int, char *, size_t ); // XSI version
|
int strerror_r( int, char *, size_t ); // XSI version
|
||||||
|
|
||||||
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
BSD version returns error code, while XSI version returns 0 or -1 and
|
||||||
|
sets errno.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// BSD version of strerror_r.
|
// BSD version of strerror_r.
|
||||||
buffer_t buffer( 100 );
|
buffer_t buffer(100);
|
||||||
int count = _count;
|
int count = _count;
|
||||||
for ( ; ; ) {
|
for (;;)
|
||||||
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
{
|
||||||
if ( rc == EINVAL ) {
|
int rc = strerror_r(err, &buffer.front(), buffer.size());
|
||||||
|
if (rc == EINVAL)
|
||||||
|
{
|
||||||
// Error code is not recognized, but anyway we got the message.
|
// Error code is not recognized, but anyway we got the message.
|
||||||
return & buffer.front();
|
return &buffer.front();
|
||||||
} else if ( rc == ERANGE ) {
|
}
|
||||||
|
else if (rc == ERANGE)
|
||||||
|
{
|
||||||
// Buffer is not enough.
|
// Buffer is not enough.
|
||||||
if ( count > 0 ) {
|
if (count > 0)
|
||||||
|
{
|
||||||
// Enlarge the buffer.
|
// Enlarge the buffer.
|
||||||
-- count;
|
--count;
|
||||||
buffer.resize( buffer.size() * 2 );
|
buffer.resize(buffer.size() * 2);
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
std::stringstream ostr;
|
std::stringstream ostr;
|
||||||
ostr
|
ostr << "Error " << err << " "
|
||||||
<< "Error " << err << " "
|
|
||||||
<< "(Getting error message failed: "
|
<< "(Getting error message failed: "
|
||||||
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
<< "Buffer of " << buffer.size()
|
||||||
|
<< " bytes is still too small"
|
||||||
<< ")";
|
<< ")";
|
||||||
return ostr.str();
|
return ostr.str();
|
||||||
}; // if
|
}; // if
|
||||||
} else if ( rc == 0 ) {
|
}
|
||||||
|
else if (rc == 0)
|
||||||
|
{
|
||||||
// We got the message.
|
// We got the message.
|
||||||
return & buffer.front();
|
return &buffer.front();
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
std::stringstream ostr;
|
std::stringstream ostr;
|
||||||
ostr
|
ostr << "Error " << err << " "
|
||||||
<< "Error " << err << " "
|
|
||||||
<< "(Getting error message failed: "
|
<< "(Getting error message failed: "
|
||||||
<< ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
|
<< (level < 2 ? _err_msg(rc, level + 1) : "Oops") << ")";
|
||||||
<< ")";
|
|
||||||
return ostr.str();
|
return ostr.str();
|
||||||
}; // if
|
}; // if
|
||||||
}; // forever
|
}; // forever
|
||||||
|
|
||||||
} // _err_msg
|
} // _err_msg
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string dir_sep() { return "/"; } // dir_sep
|
||||||
dir_sep(
|
|
||||||
) {
|
|
||||||
return "/";
|
|
||||||
} // dir_sep
|
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string exe_path()
|
||||||
exe_path(
|
{
|
||||||
) {
|
buffer_t path(_size);
|
||||||
buffer_t path( _size );
|
|
||||||
int count = _count;
|
int count = _count;
|
||||||
for ( ; ; ) {
|
for (;;)
|
||||||
|
{
|
||||||
uint32_t size = path.size();
|
uint32_t size = path.size();
|
||||||
int rc = _NSGetExecutablePath( & path.front(), & size );
|
int rc = _NSGetExecutablePath(&path.front(), &size);
|
||||||
if ( rc == 0 ) {
|
if (rc == 0)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}; // if
|
}; // if
|
||||||
if ( count > 0 ) {
|
if (count > 0)
|
||||||
-- count;
|
{
|
||||||
path.resize( size );
|
--count;
|
||||||
} else {
|
path.resize(size);
|
||||||
log_error(
|
}
|
||||||
"ERROR: Getting executable path failed: "
|
else
|
||||||
"_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
|
{
|
||||||
(unsigned long) path.size()
|
log_error("ERROR: Getting executable path failed: "
|
||||||
);
|
"_NSGetExecutablePath failed: Buffer of %lu bytes is "
|
||||||
exit( 2 );
|
"still too small\n",
|
||||||
|
(unsigned long)path.size());
|
||||||
|
exit(2);
|
||||||
}; // if
|
}; // if
|
||||||
}; // forever
|
}; // forever
|
||||||
return & path.front();
|
return &path.front();
|
||||||
} // exe_path
|
} // exe_path
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string exe_dir()
|
||||||
exe_dir(
|
{
|
||||||
) {
|
|
||||||
std::string path = exe_path();
|
std::string path = exe_path();
|
||||||
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its
|
||||||
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
// argument.
|
||||||
return dirname( & buffer.front() );
|
buffer_t buffer(path.c_str(),
|
||||||
} // exe_dir
|
path.c_str() + path.size() + 1); // Copy with trailing zero.
|
||||||
|
return dirname(&buffer.front());
|
||||||
|
} // exe_dir
|
||||||
|
|
||||||
|
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
||||||
@@ -165,20 +175,16 @@ int const _count = 8; // How many times we will try to double buff
|
|||||||
// Linux
|
// Linux
|
||||||
// -------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
#if defined( __linux__ )
|
#if defined(__linux__)
|
||||||
|
|
||||||
|
|
||||||
#include <cerrno> // errno
|
#include <cerrno> // errno
|
||||||
#include <libgen.h> // dirname
|
#include <libgen.h> // dirname
|
||||||
#include <unistd.h> // readlink
|
#include <unistd.h> // readlink
|
||||||
|
|
||||||
|
|
||||||
static
|
static std::string _err_msg(int err, int level)
|
||||||
std::string
|
{
|
||||||
_err_msg(
|
|
||||||
int err,
|
|
||||||
int level
|
|
||||||
) {
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
There are 3 incompatible versions of strerror_r:
|
There are 3 incompatible versions of strerror_r:
|
||||||
@@ -187,127 +193,135 @@ int const _count = 8; // How many times we will try to double buff
|
|||||||
int strerror_r( int, char *, size_t ); // BSD version
|
int strerror_r( int, char *, size_t ); // BSD version
|
||||||
int strerror_r( int, char *, size_t ); // XSI version
|
int strerror_r( int, char *, size_t ); // XSI version
|
||||||
|
|
||||||
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
BSD version returns error code, while XSI version returns 0 or -1 and
|
||||||
|
sets errno.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#if (defined(__ANDROID__) && __ANDROID_API__ < 23) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
|
#if (defined(__ANDROID__) && __ANDROID_API__ < 23) \
|
||||||
|
|| ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE)
|
||||||
|
|
||||||
// XSI version of strerror_r.
|
// XSI version of strerror_r.
|
||||||
#warning Not tested!
|
#warning Not tested!
|
||||||
buffer_t buffer( 200 );
|
buffer_t buffer(200);
|
||||||
int count = _count;
|
int count = _count;
|
||||||
for ( ; ; ) {
|
for (;;)
|
||||||
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
{
|
||||||
if ( rc == -1 ) {
|
int rc = strerror_r(err, &buffer.front(), buffer.size());
|
||||||
|
if (rc == -1)
|
||||||
|
{
|
||||||
int _err = errno;
|
int _err = errno;
|
||||||
if ( _err == ERANGE ) {
|
if (_err == ERANGE)
|
||||||
if ( count > 0 ) {
|
{
|
||||||
|
if (count > 0)
|
||||||
|
{
|
||||||
// Enlarge the buffer.
|
// Enlarge the buffer.
|
||||||
-- count;
|
--count;
|
||||||
buffer.resize( buffer.size() * 2 );
|
buffer.resize(buffer.size() * 2);
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
std::stringstream ostr;
|
std::stringstream ostr;
|
||||||
ostr
|
ostr << "Error " << err << " "
|
||||||
<< "Error " << err << " "
|
|
||||||
<< "(Getting error message failed: "
|
<< "(Getting error message failed: "
|
||||||
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
<< "Buffer of " << buffer.size()
|
||||||
|
<< " bytes is still too small"
|
||||||
<< ")";
|
<< ")";
|
||||||
return ostr.str();
|
return ostr.str();
|
||||||
}; // if
|
}; // if
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
std::stringstream ostr;
|
std::stringstream ostr;
|
||||||
ostr
|
ostr << "Error " << err << " "
|
||||||
<< "Error " << err << " "
|
|
||||||
<< "(Getting error message failed: "
|
<< "(Getting error message failed: "
|
||||||
<< ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
|
<< (level < 2 ? _err_msg(_err, level + 1) : "Oops") << ")";
|
||||||
<< ")";
|
|
||||||
return ostr.str();
|
return ostr.str();
|
||||||
}; // if
|
}; // if
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// We got the message.
|
// We got the message.
|
||||||
return & buffer.front();
|
return &buffer.front();
|
||||||
}; // if
|
}; // if
|
||||||
}; // forever
|
}; // forever
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
// GNU version of strerror_r.
|
// GNU version of strerror_r.
|
||||||
char buffer[ 2000 ];
|
char buffer[2000];
|
||||||
return strerror_r( err, buffer, sizeof( buffer ) );
|
return strerror_r(err, buffer, sizeof(buffer));
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} // _err_msg
|
} // _err_msg
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string dir_sep() { return "/"; } // dir_sep
|
||||||
dir_sep(
|
|
||||||
) {
|
|
||||||
return "/";
|
|
||||||
} // dir_sep
|
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string exe_path()
|
||||||
exe_path(
|
{
|
||||||
) {
|
|
||||||
|
|
||||||
static std::string const exe = "/proc/self/exe";
|
static std::string const exe = "/proc/self/exe";
|
||||||
|
|
||||||
buffer_t path( _size );
|
buffer_t path(_size);
|
||||||
int count = _count; // Max number of iterations.
|
int count = _count; // Max number of iterations.
|
||||||
|
|
||||||
for ( ; ; ) {
|
for (;;)
|
||||||
|
{
|
||||||
|
|
||||||
ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
|
ssize_t len = readlink(exe.c_str(), &path.front(), path.size());
|
||||||
|
|
||||||
if ( len < 0 ) {
|
if (len < 0)
|
||||||
|
{
|
||||||
// Oops.
|
// Oops.
|
||||||
int err = errno;
|
int err = errno;
|
||||||
log_error(
|
log_error("ERROR: Getting executable path failed: "
|
||||||
"ERROR: Getting executable path failed: "
|
|
||||||
"Reading symlink `%s' failed: %s\n",
|
"Reading symlink `%s' failed: %s\n",
|
||||||
exe.c_str(), err_msg( err ).c_str()
|
exe.c_str(), err_msg(err).c_str());
|
||||||
);
|
exit(2);
|
||||||
exit( 2 );
|
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
if ( len < path.size() ) {
|
if (len < path.size())
|
||||||
|
{
|
||||||
// We got the path.
|
// We got the path.
|
||||||
path.resize( len );
|
path.resize(len);
|
||||||
break;
|
break;
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
// Oops, buffer is too small.
|
// Oops, buffer is too small.
|
||||||
if ( count > 0 ) {
|
if (count > 0)
|
||||||
-- count;
|
{
|
||||||
|
--count;
|
||||||
// Enlarge the buffer.
|
// Enlarge the buffer.
|
||||||
path.resize( path.size() * 2 );
|
path.resize(path.size() * 2);
|
||||||
} else {
|
}
|
||||||
log_error(
|
else
|
||||||
"ERROR: Getting executable path failed: "
|
{
|
||||||
"Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
|
log_error("ERROR: Getting executable path failed: "
|
||||||
exe.c_str(),
|
"Reading symlink `%s' failed: Buffer of %lu bytes is "
|
||||||
(unsigned long) path.size()
|
"still too small\n",
|
||||||
);
|
exe.c_str(), (unsigned long)path.size());
|
||||||
exit( 2 );
|
exit(2);
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
}; // forever
|
}; // forever
|
||||||
|
|
||||||
return std::string( & path.front(), path.size() );
|
return std::string(&path.front(), path.size());
|
||||||
|
|
||||||
} // exe_path
|
} // exe_path
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string exe_dir()
|
||||||
exe_dir(
|
{
|
||||||
) {
|
|
||||||
std::string path = exe_path();
|
std::string path = exe_path();
|
||||||
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its
|
||||||
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
// argument.
|
||||||
return dirname( & buffer.front() );
|
buffer_t buffer(path.c_str(),
|
||||||
} // exe_dir
|
path.c_str() + path.size() + 1); // Copy with trailing zero.
|
||||||
|
return dirname(&buffer.front());
|
||||||
|
} // exe_dir
|
||||||
|
|
||||||
#endif // __linux__
|
#endif // __linux__
|
||||||
|
|
||||||
@@ -315,212 +329,196 @@ int const _count = 8; // How many times we will try to double buff
|
|||||||
// MS Windows
|
// MS Windows
|
||||||
// -------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
#if defined( _WIN32 )
|
#if defined(_WIN32)
|
||||||
|
|
||||||
|
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#if defined( max )
|
#if defined(max)
|
||||||
#undef max
|
#undef max
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <cctype>
|
#include <cctype>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
|
||||||
static
|
static std::string _err_msg(int err, int level)
|
||||||
std::string
|
{
|
||||||
_err_msg(
|
|
||||||
int err,
|
|
||||||
int level
|
|
||||||
) {
|
|
||||||
|
|
||||||
std::string msg;
|
std::string msg;
|
||||||
|
|
||||||
LPSTR buffer = NULL;
|
LPSTR buffer = NULL;
|
||||||
DWORD flags =
|
DWORD flags = FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM
|
||||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
| FORMAT_MESSAGE_IGNORE_INSERTS;
|
||||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
|
||||||
FORMAT_MESSAGE_IGNORE_INSERTS;
|
|
||||||
|
|
||||||
DWORD len =
|
DWORD len = FormatMessageA(flags, NULL, err, LANG_USER_DEFAULT,
|
||||||
FormatMessageA(
|
reinterpret_cast<LPSTR>(&buffer), 0, NULL);
|
||||||
flags,
|
|
||||||
NULL,
|
|
||||||
err,
|
|
||||||
LANG_USER_DEFAULT,
|
|
||||||
reinterpret_cast< LPSTR >( & buffer ),
|
|
||||||
0,
|
|
||||||
NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
if ( buffer == NULL || len == 0 ) {
|
if (buffer == NULL || len == 0)
|
||||||
|
{
|
||||||
|
|
||||||
int _err = GetLastError();
|
int _err = GetLastError();
|
||||||
char str[1024] = { 0 };
|
char str[1024] = { 0 };
|
||||||
snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
|
snprintf(str, sizeof(str),
|
||||||
|
"Error 0x%08x (Getting error message failed: %s )", err,
|
||||||
|
(level < 2 ? _err_msg(_err, level + 1).c_str() : "Oops"));
|
||||||
msg = std::string(str);
|
msg = std::string(str);
|
||||||
|
}
|
||||||
} else {
|
else
|
||||||
|
{
|
||||||
|
|
||||||
// Trim trailing whitespace (including `\r' and `\n').
|
// Trim trailing whitespace (including `\r' and `\n').
|
||||||
while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
|
while (len > 0 && isspace(buffer[len - 1]))
|
||||||
-- len;
|
{
|
||||||
|
--len;
|
||||||
}; // while
|
}; // while
|
||||||
|
|
||||||
// Drop trailing full stop.
|
// Drop trailing full stop.
|
||||||
if ( len > 0 && buffer[ len - 1 ] == '.' ) {
|
if (len > 0 && buffer[len - 1] == '.')
|
||||||
-- len;
|
{
|
||||||
|
--len;
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
msg.assign( buffer, len );
|
msg.assign(buffer, len);
|
||||||
|
|
||||||
}; //if
|
}; // if
|
||||||
|
|
||||||
if ( buffer != NULL ) {
|
if (buffer != NULL)
|
||||||
LocalFree( buffer );
|
{
|
||||||
|
LocalFree(buffer);
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
return msg;
|
return msg;
|
||||||
|
|
||||||
} // _get_err_msg
|
} // _get_err_msg
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string dir_sep() { return "\\"; } // dir_sep
|
||||||
dir_sep(
|
|
||||||
) {
|
|
||||||
return "\\";
|
|
||||||
} // dir_sep
|
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string exe_path()
|
||||||
exe_path(
|
{
|
||||||
) {
|
|
||||||
|
|
||||||
buffer_t path( _size );
|
buffer_t path(_size);
|
||||||
int count = _count;
|
int count = _count;
|
||||||
|
|
||||||
for ( ; ; ) {
|
for (;;)
|
||||||
|
{
|
||||||
|
|
||||||
DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
|
DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size());
|
||||||
|
|
||||||
if ( len == 0 ) {
|
if (len == 0)
|
||||||
|
{
|
||||||
int err = GetLastError();
|
int err = GetLastError();
|
||||||
log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
|
log_error("ERROR: Getting executable path failed: %s\n",
|
||||||
exit( 2 );
|
err_msg(err).c_str());
|
||||||
|
exit(2);
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
if ( len < path.size() ) {
|
if (len < path.size())
|
||||||
path.resize( len );
|
{
|
||||||
|
path.resize(len);
|
||||||
break;
|
break;
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
// Buffer too small.
|
// Buffer too small.
|
||||||
if ( count > 0 ) {
|
if (count > 0)
|
||||||
-- count;
|
{
|
||||||
path.resize( path.size() * 2 );
|
--count;
|
||||||
} else {
|
path.resize(path.size() * 2);
|
||||||
log_error(
|
}
|
||||||
"ERROR: Getting executable path failed: "
|
else
|
||||||
|
{
|
||||||
|
log_error("ERROR: Getting executable path failed: "
|
||||||
"Buffer of %lu bytes is still too small\n",
|
"Buffer of %lu bytes is still too small\n",
|
||||||
(unsigned long) path.size()
|
(unsigned long)path.size());
|
||||||
);
|
exit(2);
|
||||||
exit( 2 );
|
|
||||||
}; // if
|
}; // if
|
||||||
|
|
||||||
}; // forever
|
}; // forever
|
||||||
|
|
||||||
return std::string( & path.front(), path.size() );
|
return std::string(&path.front(), path.size());
|
||||||
|
|
||||||
} // exe_path
|
} // exe_path
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string exe_dir()
|
||||||
exe_dir(
|
{
|
||||||
) {
|
|
||||||
|
|
||||||
std::string exe = exe_path();
|
std::string exe = exe_path();
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
// Splitting path into components.
|
// Splitting path into components.
|
||||||
buffer_t drv( _MAX_DRIVE );
|
buffer_t drv(_MAX_DRIVE);
|
||||||
buffer_t dir( _MAX_DIR );
|
buffer_t dir(_MAX_DIR);
|
||||||
count = _count;
|
count = _count;
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
for ( ; ; ) {
|
for (;;)
|
||||||
|
{
|
||||||
int rc =
|
int rc =
|
||||||
_splitpath_s(
|
_splitpath_s(exe.c_str(), &drv.front(), drv.size(), &dir.front(),
|
||||||
exe.c_str(),
|
dir.size(), NULL, 0, // We need neither name
|
||||||
& drv.front(), drv.size(),
|
|
||||||
& dir.front(), dir.size(),
|
|
||||||
NULL, 0, // We need neither name
|
|
||||||
NULL, 0 // nor extension
|
NULL, 0 // nor extension
|
||||||
);
|
);
|
||||||
if ( rc == 0 ) {
|
if (rc == 0)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
} else if ( rc == ERANGE ) {
|
}
|
||||||
if ( count > 0 ) {
|
else if (rc == ERANGE)
|
||||||
-- count;
|
{
|
||||||
|
if (count > 0)
|
||||||
|
{
|
||||||
|
--count;
|
||||||
// Buffer is too small, but it is not clear which one.
|
// Buffer is too small, but it is not clear which one.
|
||||||
// So we have to enlarge all.
|
// So we have to enlarge all.
|
||||||
drv.resize( drv.size() * 2 );
|
drv.resize(drv.size() * 2);
|
||||||
dir.resize( dir.size() * 2 );
|
dir.resize(dir.size() * 2);
|
||||||
} else {
|
}
|
||||||
log_error(
|
else
|
||||||
"ERROR: Getting executable path failed: "
|
{
|
||||||
|
log_error("ERROR: Getting executable path failed: "
|
||||||
"Splitting path `%s' to components failed: "
|
"Splitting path `%s' to components failed: "
|
||||||
"Buffers of %lu and %lu bytes are still too small\n",
|
"Buffers of %lu and %lu bytes are still too small\n",
|
||||||
exe.c_str(),
|
exe.c_str(), (unsigned long)drv.size(),
|
||||||
(unsigned long) drv.size(),
|
(unsigned long)dir.size());
|
||||||
(unsigned long) dir.size()
|
exit(2);
|
||||||
);
|
|
||||||
exit( 2 );
|
|
||||||
}; // if
|
}; // if
|
||||||
} else {
|
}
|
||||||
log_error(
|
else
|
||||||
"ERROR: Getting executable path failed: "
|
{
|
||||||
|
log_error("ERROR: Getting executable path failed: "
|
||||||
"Splitting path `%s' to components failed: %s\n",
|
"Splitting path `%s' to components failed: %s\n",
|
||||||
exe.c_str(),
|
exe.c_str(), err_msg(rc).c_str());
|
||||||
err_msg( rc ).c_str()
|
exit(2);
|
||||||
);
|
|
||||||
exit( 2 );
|
|
||||||
}; // if
|
}; // if
|
||||||
}; // forever
|
}; // forever
|
||||||
|
|
||||||
#else // __MINGW32__
|
#else // __MINGW32__
|
||||||
|
|
||||||
// MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
|
// MinGW does not have the "secure" _splitpath_s, use the insecure version
|
||||||
_splitpath(
|
// instead.
|
||||||
exe.c_str(),
|
_splitpath(exe.c_str(), &drv.front(), &dir.front(),
|
||||||
& drv.front(),
|
|
||||||
& dir.front(),
|
|
||||||
NULL, // We need neither name
|
NULL, // We need neither name
|
||||||
NULL // nor extension
|
NULL // nor extension
|
||||||
);
|
);
|
||||||
#endif // __MINGW32__
|
#endif // __MINGW32__
|
||||||
|
|
||||||
// Combining components back to path.
|
// Combining components back to path.
|
||||||
// I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
|
// I failed with "secure" `_makepath_s'. If buffer is too small, instead of
|
||||||
// ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
|
// returning ERANGE, `_makepath_s' pops up dialog box and offers to debug
|
||||||
// So let us try to guess the size of result and go with insecure `_makepath'.
|
// the program. D'oh! So let us try to guess the size of result and go with
|
||||||
buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
|
// insecure `_makepath'.
|
||||||
_makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
|
buffer_t path(std::max(drv.size() + dir.size(), size_t(_MAX_PATH)) + 10);
|
||||||
|
_makepath(&path.front(), &drv.front(), &dir.front(), NULL, NULL);
|
||||||
|
|
||||||
return & path.front();
|
return &path.front();
|
||||||
|
|
||||||
} // exe_dir
|
} // exe_dir
|
||||||
|
|
||||||
|
|
||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
|
|
||||||
|
|
||||||
std::string
|
std::string err_msg(int err) { return _err_msg(err, 0); } // err_msg
|
||||||
err_msg(
|
|
||||||
int err
|
|
||||||
) {
|
|
||||||
|
|
||||||
return _err_msg( err, 0 );
|
|
||||||
|
|
||||||
} // err_msg
|
|
||||||
|
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
@@ -528,39 +526,34 @@ err_msg(
|
|||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
|
||||||
char *
|
char* get_err_msg(int err)
|
||||||
get_err_msg(
|
{
|
||||||
int err
|
char* msg = strdup(err_msg(err).c_str());
|
||||||
) {
|
CHECK_PTR(msg);
|
||||||
char * msg = strdup( err_msg( err ).c_str() );
|
|
||||||
CHECK_PTR( msg );
|
|
||||||
return msg;
|
return msg;
|
||||||
} // get_err_msg
|
} // get_err_msg
|
||||||
|
|
||||||
|
|
||||||
char *
|
char* get_dir_sep()
|
||||||
get_dir_sep(
|
{
|
||||||
) {
|
char* sep = strdup(dir_sep().c_str());
|
||||||
char * sep = strdup( dir_sep().c_str() );
|
CHECK_PTR(sep);
|
||||||
CHECK_PTR( sep );
|
|
||||||
return sep;
|
return sep;
|
||||||
} // get_dir_sep
|
} // get_dir_sep
|
||||||
|
|
||||||
|
|
||||||
char *
|
char* get_exe_path()
|
||||||
get_exe_path(
|
{
|
||||||
) {
|
char* path = strdup(exe_path().c_str());
|
||||||
char * path = strdup( exe_path().c_str() );
|
CHECK_PTR(path);
|
||||||
CHECK_PTR( path );
|
|
||||||
return path;
|
return path;
|
||||||
} // get_exe_path
|
} // get_exe_path
|
||||||
|
|
||||||
|
|
||||||
char *
|
char* get_exe_dir()
|
||||||
get_exe_dir(
|
{
|
||||||
) {
|
char* dir = strdup(exe_dir().c_str());
|
||||||
char * dir = strdup( exe_dir().c_str() );
|
CHECK_PTR(dir);
|
||||||
CHECK_PTR( dir );
|
|
||||||
return dir;
|
return dir;
|
||||||
} // get_exe_dir
|
} // get_exe_dir
|
||||||
|
|
||||||
|
|||||||
@@ -24,12 +24,12 @@
|
|||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
std::string err_msg( int err );
|
std::string err_msg(int err);
|
||||||
std::string dir_sep();
|
std::string dir_sep();
|
||||||
std::string exe_path();
|
std::string exe_path();
|
||||||
std::string exe_dir();
|
std::string exe_dir();
|
||||||
|
|
||||||
#endif // __cplusplus
|
#endif // __cplusplus
|
||||||
|
|
||||||
@@ -37,9 +37,9 @@
|
|||||||
// C interface.
|
// C interface.
|
||||||
// -------------------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
char * get_err_msg( int err ); // Returns system error message. Subject to free.
|
char* get_err_msg(int err); // Returns system error message. Subject to free.
|
||||||
char * get_dir_sep(); // Returns dir separator. Subject to free.
|
char* get_dir_sep(); // Returns dir separator. Subject to free.
|
||||||
char * get_exe_path(); // Returns path of current executable. Subject to free.
|
char* get_exe_path(); // Returns path of current executable. Subject to free.
|
||||||
char * get_exe_dir(); // Returns dir of current executable. Subject to free.
|
char* get_exe_dir(); // Returns dir of current executable. Subject to free.
|
||||||
|
|
||||||
#endif // __os_helpers_h__
|
#endif // __os_helpers_h__
|
||||||
|
|||||||
@@ -34,42 +34,53 @@ CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent;
|
|||||||
std::string gCompilationCachePath = ".";
|
std::string gCompilationCachePath = ".";
|
||||||
std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
|
std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
|
||||||
|
|
||||||
void helpInfo ()
|
void helpInfo()
|
||||||
{
|
{
|
||||||
log_info("Common options:\n"
|
log_info(
|
||||||
" -h, --help This help\n"
|
R"(Common options:
|
||||||
" --compilation-mode <mode> Specify a compilation mode. Mode can be:\n"
|
-h, --help
|
||||||
" online Use online compilation (default)\n"
|
This help
|
||||||
" binary Use binary offline compilation\n"
|
--compilation-mode <mode>
|
||||||
" spir-v Use SPIR-V offline compilation\n"
|
Specify a compilation mode. Mode can be:
|
||||||
"\n"
|
online Use online compilation (default)
|
||||||
" For offline compilation (binary and spir-v modes) only:\n"
|
binary Use binary offline compilation
|
||||||
" --compilation-cache-mode <cache-mode> Specify a compilation caching mode:\n"
|
spir-v Use SPIR-V offline compilation
|
||||||
" compile-if-absent Read from cache if already populated, or\n"
|
|
||||||
" else perform offline compilation (default)\n"
|
For offline compilation (binary and spir-v modes) only:
|
||||||
" force-read Force reading from the cache\n"
|
--compilation-cache-mode <cache-mode>
|
||||||
" overwrite Disable reading from the cache\n"
|
Specify a compilation caching mode:
|
||||||
" dump-cl-files Dumps the .cl and build .options files used by the test suite\n"
|
compile-if-absent
|
||||||
" --compilation-cache-path <path> Path for offline compiler output and CL source\n"
|
Read from cache if already populated, or else perform
|
||||||
" --compilation-program <prog> Program to use for offline compilation,\n"
|
offline compilation (default)
|
||||||
" defaults to " DEFAULT_COMPILATION_PROGRAM "\n"
|
force-read
|
||||||
"\n");
|
Force reading from the cache
|
||||||
|
overwrite
|
||||||
|
Disable reading from the cache
|
||||||
|
dump-cl-files
|
||||||
|
Dumps the .cl and build .options files used by the test suite
|
||||||
|
--compilation-cache-path <path>
|
||||||
|
Path for offline compiler output and CL source
|
||||||
|
--compilation-program <prog>
|
||||||
|
Program to use for offline compilation, defaults to:
|
||||||
|
)" DEFAULT_COMPILATION_PROGRAM "\n\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int parseCustomParam (int argc, const char *argv[], const char *ignore)
|
int parseCustomParam(int argc, const char *argv[], const char *ignore)
|
||||||
{
|
{
|
||||||
int delArg = 0;
|
int delArg = 0;
|
||||||
|
|
||||||
for (int i=1; i<argc; i++)
|
for (int i = 1; i < argc; i++)
|
||||||
{
|
{
|
||||||
if(ignore != 0)
|
if (ignore != 0)
|
||||||
{
|
{
|
||||||
// skip parameters that require special/different treatment in application
|
// skip parameters that require special/different treatment in
|
||||||
// (generic interpretation and parameter removal will not be performed)
|
// application (generic interpretation and parameter removal will
|
||||||
const char * ptr = strstr(ignore, argv[i]);
|
// not be performed)
|
||||||
if(ptr != 0 &&
|
const char *ptr = strstr(ignore, argv[i]);
|
||||||
(ptr == ignore || ptr[-1] == ' ') && //first on list or ' ' before
|
if (ptr != 0 && (ptr == ignore || ptr[-1] == ' ')
|
||||||
(ptr[strlen(argv[i])] == 0 || ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
|
&& // first on list or ' ' before
|
||||||
|
(ptr[strlen(argv[i])] == 0
|
||||||
|
|| ptr[strlen(argv[i])] == ' ')) // last on list or ' ' after
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,7 +91,7 @@ int parseCustomParam (int argc, const char *argv[], const char *ignore)
|
|||||||
// Note: we don't increment delArg to delete this argument,
|
// Note: we don't increment delArg to delete this argument,
|
||||||
// to allow the caller's argument parsing routine to see the
|
// to allow the caller's argument parsing routine to see the
|
||||||
// option and print its own help.
|
// option and print its own help.
|
||||||
helpInfo ();
|
helpInfo();
|
||||||
}
|
}
|
||||||
else if (!strcmp(argv[i], "--compilation-mode"))
|
else if (!strcmp(argv[i], "--compilation-mode"))
|
||||||
{
|
{
|
||||||
@@ -142,15 +153,18 @@ int parseCustomParam (int argc, const char *argv[], const char *ignore)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_error("Compilation cache mode not recognized: %s\n", mode);
|
log_error("Compilation cache mode not recognized: %s\n",
|
||||||
|
mode);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
log_info("Compilation cache mode specified: %s\n", mode);
|
log_info("Compilation cache mode specified: %s\n", mode);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_error("Compilation cache mode parameters are incorrect. Usage:\n"
|
log_error(
|
||||||
" --compilation-cache-mode <compile-if-absent|force-read|overwrite>\n");
|
"Compilation cache mode parameters are incorrect. Usage:\n"
|
||||||
|
" --compilation-cache-mode "
|
||||||
|
"<compile-if-absent|force-read|overwrite>\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -164,7 +178,8 @@ int parseCustomParam (int argc, const char *argv[], const char *ignore)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_error("Path argument for --compilation-cache-path was not specified.\n");
|
log_error("Path argument for --compilation-cache-path was not "
|
||||||
|
"specified.\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -178,34 +193,34 @@ int parseCustomParam (int argc, const char *argv[], const char *ignore)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_error("Program argument for --compilation-program was not specified.\n");
|
log_error("Program argument for --compilation-program was not "
|
||||||
|
"specified.\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//cleaning parameters from argv tab
|
// cleaning parameters from argv tab
|
||||||
for (int j = i; j < argc - delArg; j++)
|
for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
|
||||||
argv[j] = argv[j + delArg];
|
|
||||||
argc -= delArg;
|
argc -= delArg;
|
||||||
i -= delArg;
|
i -= delArg;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((gCompilationCacheMode == kCacheModeForceRead || gCompilationCacheMode == kCacheModeOverwrite)
|
if ((gCompilationCacheMode == kCacheModeForceRead
|
||||||
|
|| gCompilationCacheMode == kCacheModeOverwrite)
|
||||||
&& gCompilationMode == kOnline)
|
&& gCompilationMode == kOnline)
|
||||||
{
|
{
|
||||||
log_error("Compilation cache mode can only be specified when using an offline compilation mode.\n");
|
log_error("Compilation cache mode can only be specified when using an "
|
||||||
|
"offline compilation mode.\n");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
return argc;
|
return argc;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_power_of_two(int number)
|
bool is_power_of_two(int number) { return number && !(number & (number - 1)); }
|
||||||
{
|
|
||||||
return number && !(number & (number - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
|
extern void parseWimpyReductionFactor(const char *&arg,
|
||||||
|
int &wimpyReductionFactor)
|
||||||
{
|
{
|
||||||
const char *arg_temp = strchr(&arg[1], ']');
|
const char *arg_temp = strchr(&arg[1], ']');
|
||||||
if (arg_temp != 0)
|
if (arg_temp != 0)
|
||||||
@@ -214,12 +229,15 @@ extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFacto
|
|||||||
arg = arg_temp; // Advance until ']'
|
arg = arg_temp; // Advance until ']'
|
||||||
if (is_power_of_two(new_factor))
|
if (is_power_of_two(new_factor))
|
||||||
{
|
{
|
||||||
log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
|
log_info("\n Wimpy reduction factor changed from %d to %d \n",
|
||||||
|
wimpyReductionFactor, new_factor);
|
||||||
wimpyReductionFactor = new_factor;
|
wimpyReductionFactor = new_factor;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
|
log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be "
|
||||||
|
"power of 2. The default value will be used.\n",
|
||||||
|
new_factor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,8 +39,10 @@ extern CompilationCacheMode gCompilationCacheMode;
|
|||||||
extern std::string gCompilationCachePath;
|
extern std::string gCompilationCachePath;
|
||||||
extern std::string gCompilationProgram;
|
extern std::string gCompilationProgram;
|
||||||
|
|
||||||
extern int parseCustomParam (int argc, const char *argv[], const char *ignore = 0 );
|
extern int parseCustomParam(int argc, const char *argv[],
|
||||||
|
const char *ignore = 0);
|
||||||
|
|
||||||
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
|
extern void parseWimpyReductionFactor(const char *&arg,
|
||||||
|
int &wimpyReductionFactor);
|
||||||
|
|
||||||
#endif // _parseParameters_h
|
#endif // _parseParameters_h
|
||||||
|
|||||||
@@ -16,34 +16,39 @@
|
|||||||
#ifndef _ref_counting_h
|
#ifndef _ref_counting_h
|
||||||
#define _ref_counting_h
|
#define _ref_counting_h
|
||||||
|
|
||||||
#define MARK_REF_COUNT_BASE( c, type, bigType ) \
|
#define MARK_REF_COUNT_BASE(c, type, bigType) \
|
||||||
cl_uint c##_refCount; \
|
cl_uint c##_refCount; \
|
||||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount ), &c##_refCount, NULL ); \
|
error = clGet##type##Info(c, CL_##bigType##_REFERENCE_COUNT, \
|
||||||
test_error( error, "Unable to check reference count for " #type );
|
sizeof(c##_refCount), &c##_refCount, NULL); \
|
||||||
|
test_error(error, "Unable to check reference count for " #type);
|
||||||
|
|
||||||
#define TEST_REF_COUNT_BASE( c, type, bigType ) \
|
#define TEST_REF_COUNT_BASE(c, type, bigType) \
|
||||||
cl_uint c##_refCount_new; \
|
cl_uint c##_refCount_new; \
|
||||||
error = clGet##type##Info( c, CL_##bigType##_REFERENCE_COUNT, sizeof( c##_refCount_new ), &c##_refCount_new, NULL ); \
|
error = \
|
||||||
test_error( error, "Unable to check reference count for " #type ); \
|
clGet##type##Info(c, CL_##bigType##_REFERENCE_COUNT, \
|
||||||
if( c##_refCount != c##_refCount_new ) \
|
sizeof(c##_refCount_new), &c##_refCount_new, NULL); \
|
||||||
|
test_error(error, "Unable to check reference count for " #type); \
|
||||||
|
if (c##_refCount != c##_refCount_new) \
|
||||||
{ \
|
{ \
|
||||||
log_error( "ERROR: Reference count for " #type " changed! (was %d, now %d)\n", c##_refCount, c##_refCount_new ); \
|
log_error("ERROR: Reference count for " #type \
|
||||||
|
" changed! (was %d, now %d)\n", \
|
||||||
|
c##_refCount, c##_refCount_new); \
|
||||||
return -1; \
|
return -1; \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MARK_REF_COUNT_CONTEXT( c ) MARK_REF_COUNT_BASE( c, Context, CONTEXT )
|
#define MARK_REF_COUNT_CONTEXT(c) MARK_REF_COUNT_BASE(c, Context, CONTEXT)
|
||||||
#define TEST_REF_COUNT_CONTEXT( c ) TEST_REF_COUNT_BASE( c, Context, CONTEXT )
|
#define TEST_REF_COUNT_CONTEXT(c) TEST_REF_COUNT_BASE(c, Context, CONTEXT)
|
||||||
|
|
||||||
#define MARK_REF_COUNT_DEVICE( c ) MARK_REF_COUNT_BASE( c, Device, DEVICE )
|
#define MARK_REF_COUNT_DEVICE(c) MARK_REF_COUNT_BASE(c, Device, DEVICE)
|
||||||
#define TEST_REF_COUNT_DEVICE( c ) TEST_REF_COUNT_BASE( c, Device, DEVICE )
|
#define TEST_REF_COUNT_DEVICE(c) TEST_REF_COUNT_BASE(c, Device, DEVICE)
|
||||||
|
|
||||||
#define MARK_REF_COUNT_QUEUE( c ) MARK_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
#define MARK_REF_COUNT_QUEUE(c) MARK_REF_COUNT_BASE(c, CommandQueue, QUEUE)
|
||||||
#define TEST_REF_COUNT_QUEUE( c ) TEST_REF_COUNT_BASE( c, CommandQueue, QUEUE )
|
#define TEST_REF_COUNT_QUEUE(c) TEST_REF_COUNT_BASE(c, CommandQueue, QUEUE)
|
||||||
|
|
||||||
#define MARK_REF_COUNT_PROGRAM( c ) MARK_REF_COUNT_BASE( c, Program, PROGRAM )
|
#define MARK_REF_COUNT_PROGRAM(c) MARK_REF_COUNT_BASE(c, Program, PROGRAM)
|
||||||
#define TEST_REF_COUNT_PROGRAM( c ) TEST_REF_COUNT_BASE( c, Program, PROGRAM )
|
#define TEST_REF_COUNT_PROGRAM(c) TEST_REF_COUNT_BASE(c, Program, PROGRAM)
|
||||||
|
|
||||||
#define MARK_REF_COUNT_MEM( c ) MARK_REF_COUNT_BASE( c, MemObject, MEM )
|
#define MARK_REF_COUNT_MEM(c) MARK_REF_COUNT_BASE(c, MemObject, MEM)
|
||||||
#define TEST_REF_COUNT_MEM( c ) TEST_REF_COUNT_BASE( c, MemObject, MEM )
|
#define TEST_REF_COUNT_MEM(c) TEST_REF_COUNT_BASE(c, MemObject, MEM)
|
||||||
|
|
||||||
#endif // _ref_counting_h
|
#endif // _ref_counting_h
|
||||||
|
|||||||
@@ -15,46 +15,49 @@
|
|||||||
//
|
//
|
||||||
#include "rounding_mode.h"
|
#include "rounding_mode.h"
|
||||||
|
|
||||||
#if (defined( __arm__ ) || defined(__aarch64__))
|
#if (defined(__arm__) || defined(__aarch64__))
|
||||||
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
|
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
|
||||||
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
|
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
|
||||||
|
|
||||||
#define _ARM_FE_FTZ 0x1000000
|
#define _ARM_FE_FTZ 0x1000000
|
||||||
#define _ARM_FE_NFTZ 0x0
|
#define _ARM_FE_NFTZ 0x0
|
||||||
#if defined(__aarch64__)
|
#if defined(__aarch64__)
|
||||||
#define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
|
#define _FPU_GETCW(cw) __asm__("MRS %0,FPCR" : "=r"(cw))
|
||||||
#define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
|
#define _FPU_SETCW(cw) __asm__("MSR FPCR,%0" : : "ri"(cw))
|
||||||
#else
|
#else
|
||||||
#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
|
#define _FPU_GETCW(cw) __asm__("VMRS %0,FPSCR" : "=r"(cw))
|
||||||
#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
|
#define _FPU_SETCW(cw) __asm__("VMSR FPSCR,%0" : : "ri"(cw))
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
|
#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
|
||||||
#define _ARM_FE_TONEAREST 0x0
|
#define _ARM_FE_TONEAREST 0x0
|
||||||
#define _ARM_FE_UPWARD 0x400000
|
#define _ARM_FE_UPWARD 0x400000
|
||||||
#define _ARM_FE_DOWNWARD 0x800000
|
#define _ARM_FE_DOWNWARD 0x800000
|
||||||
#define _ARM_FE_TOWARDZERO 0xc00000
|
#define _ARM_FE_TOWARDZERO 0xc00000
|
||||||
RoundingMode set_round( RoundingMode r, Type outType )
|
RoundingMode set_round(RoundingMode r, Type outType)
|
||||||
{
|
{
|
||||||
static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
|
static const int flt_rounds[kRoundingModeCount] = {
|
||||||
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
_ARM_FE_TONEAREST, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
|
||||||
static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
|
_ARM_FE_TOWARDZERO
|
||||||
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
};
|
||||||
|
static const int int_rounds[kRoundingModeCount] = {
|
||||||
|
_ARM_FE_TOWARDZERO, _ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD,
|
||||||
|
_ARM_FE_TOWARDZERO
|
||||||
|
};
|
||||||
const int *p = int_rounds;
|
const int *p = int_rounds;
|
||||||
if( outType == kfloat || outType == kdouble )
|
if (outType == kfloat || outType == kdouble) p = flt_rounds;
|
||||||
p = flt_rounds;
|
|
||||||
|
|
||||||
int fpscr = 0;
|
int fpscr = 0;
|
||||||
RoundingMode oldRound = get_round();
|
RoundingMode oldRound = get_round();
|
||||||
|
|
||||||
_FPU_GETCW(fpscr);
|
_FPU_GETCW(fpscr);
|
||||||
_FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
|
_FPU_SETCW(p[r] | (fpscr & ~FPSCR_ROUND_MASK));
|
||||||
|
|
||||||
return oldRound;
|
return oldRound;
|
||||||
}
|
}
|
||||||
|
|
||||||
RoundingMode get_round( void )
|
RoundingMode get_round(void)
|
||||||
{
|
{
|
||||||
int fpscr;
|
int fpscr;
|
||||||
int oldRound;
|
int oldRound;
|
||||||
@@ -62,180 +65,192 @@ RoundingMode get_round( void )
|
|||||||
_FPU_GETCW(fpscr);
|
_FPU_GETCW(fpscr);
|
||||||
oldRound = (fpscr & FPSCR_ROUND_MASK);
|
oldRound = (fpscr & FPSCR_ROUND_MASK);
|
||||||
|
|
||||||
switch( oldRound )
|
switch (oldRound)
|
||||||
{
|
{
|
||||||
case _ARM_FE_TONEAREST:
|
case _ARM_FE_TONEAREST: return kRoundToNearestEven;
|
||||||
return kRoundToNearestEven;
|
case _ARM_FE_UPWARD: return kRoundUp;
|
||||||
case _ARM_FE_UPWARD:
|
case _ARM_FE_DOWNWARD: return kRoundDown;
|
||||||
return kRoundUp;
|
case _ARM_FE_TOWARDZERO: return kRoundTowardZero;
|
||||||
case _ARM_FE_DOWNWARD:
|
|
||||||
return kRoundDown;
|
|
||||||
case _ARM_FE_TOWARDZERO:
|
|
||||||
return kRoundTowardZero;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return kDefaultRoundingMode;
|
return kDefaultRoundingMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif !(defined(_WIN32) && defined(_MSC_VER))
|
#elif !(defined(_WIN32) && defined(_MSC_VER))
|
||||||
RoundingMode set_round( RoundingMode r, Type outType )
|
RoundingMode set_round(RoundingMode r, Type outType)
|
||||||
{
|
{
|
||||||
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
static const int flt_rounds[kRoundingModeCount] = {
|
||||||
static const int int_rounds[ kRoundingModeCount ] = { FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
|
||||||
|
};
|
||||||
|
static const int int_rounds[kRoundingModeCount] = {
|
||||||
|
FE_TOWARDZERO, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO
|
||||||
|
};
|
||||||
const int *p = int_rounds;
|
const int *p = int_rounds;
|
||||||
if( outType == kfloat || outType == kdouble )
|
if (outType == kfloat || outType == kdouble) p = flt_rounds;
|
||||||
p = flt_rounds;
|
|
||||||
int oldRound = fegetround();
|
int oldRound = fegetround();
|
||||||
fesetround( p[r] );
|
fesetround(p[r]);
|
||||||
|
|
||||||
switch( oldRound )
|
switch (oldRound)
|
||||||
{
|
{
|
||||||
case FE_TONEAREST:
|
case FE_TONEAREST: return kRoundToNearestEven;
|
||||||
return kRoundToNearestEven;
|
case FE_UPWARD: return kRoundUp;
|
||||||
case FE_UPWARD:
|
case FE_DOWNWARD: return kRoundDown;
|
||||||
return kRoundUp;
|
case FE_TOWARDZERO: return kRoundTowardZero;
|
||||||
case FE_DOWNWARD:
|
default: abort(); // ??!
|
||||||
return kRoundDown;
|
|
||||||
case FE_TOWARDZERO:
|
|
||||||
return kRoundTowardZero;
|
|
||||||
default:
|
|
||||||
abort(); // ??!
|
|
||||||
}
|
}
|
||||||
return kDefaultRoundingMode; //never happens
|
return kDefaultRoundingMode; // never happens
|
||||||
}
|
}
|
||||||
|
|
||||||
RoundingMode get_round( void )
|
RoundingMode get_round(void)
|
||||||
{
|
{
|
||||||
int oldRound = fegetround();
|
int oldRound = fegetround();
|
||||||
|
|
||||||
switch( oldRound )
|
switch (oldRound)
|
||||||
{
|
{
|
||||||
case FE_TONEAREST:
|
case FE_TONEAREST: return kRoundToNearestEven;
|
||||||
return kRoundToNearestEven;
|
case FE_UPWARD: return kRoundUp;
|
||||||
case FE_UPWARD:
|
case FE_DOWNWARD: return kRoundDown;
|
||||||
return kRoundUp;
|
case FE_TOWARDZERO: return kRoundTowardZero;
|
||||||
case FE_DOWNWARD:
|
|
||||||
return kRoundDown;
|
|
||||||
case FE_TOWARDZERO:
|
|
||||||
return kRoundTowardZero;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return kDefaultRoundingMode;
|
return kDefaultRoundingMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
RoundingMode set_round( RoundingMode r, Type outType )
|
RoundingMode set_round(RoundingMode r, Type outType)
|
||||||
{
|
{
|
||||||
static const int flt_rounds[ kRoundingModeCount ] = { _RC_NEAR, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
static const int flt_rounds[kRoundingModeCount] = { _RC_NEAR, _RC_NEAR,
|
||||||
static const int int_rounds[ kRoundingModeCount ] = { _RC_CHOP, _RC_NEAR, _RC_UP, _RC_DOWN, _RC_CHOP };
|
_RC_UP, _RC_DOWN,
|
||||||
const int *p = ( outType == kfloat || outType == kdouble )? flt_rounds : int_rounds;
|
_RC_CHOP };
|
||||||
|
static const int int_rounds[kRoundingModeCount] = { _RC_CHOP, _RC_NEAR,
|
||||||
|
_RC_UP, _RC_DOWN,
|
||||||
|
_RC_CHOP };
|
||||||
|
const int *p =
|
||||||
|
(outType == kfloat || outType == kdouble) ? flt_rounds : int_rounds;
|
||||||
unsigned int oldRound;
|
unsigned int oldRound;
|
||||||
|
|
||||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
|
||||||
if (err) {
|
if (err)
|
||||||
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n", __FILE__, __LINE__);
|
{
|
||||||
return kDefaultRoundingMode; //what else never happens
|
vlog_error("\t\tERROR: -- cannot get rounding mode in %s:%d\n",
|
||||||
|
__FILE__, __LINE__);
|
||||||
|
return kDefaultRoundingMode; // what else never happens
|
||||||
}
|
}
|
||||||
|
|
||||||
oldRound &= _MCW_RC;
|
oldRound &= _MCW_RC;
|
||||||
|
|
||||||
RoundingMode old =
|
RoundingMode old = (oldRound == _RC_NEAR)
|
||||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
? kRoundToNearestEven
|
||||||
(oldRound == _RC_UP)? kRoundUp :
|
: (oldRound == _RC_UP) ? kRoundUp
|
||||||
(oldRound == _RC_DOWN)? kRoundDown :
|
: (oldRound == _RC_DOWN)
|
||||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
? kRoundDown
|
||||||
kDefaultRoundingMode;
|
: (oldRound == _RC_CHOP) ? kRoundTowardZero
|
||||||
|
: kDefaultRoundingMode;
|
||||||
|
|
||||||
_controlfp_s(&oldRound, p[r], _MCW_RC); //setting new rounding mode
|
_controlfp_s(&oldRound, p[r], _MCW_RC); // setting new rounding mode
|
||||||
return old; //returning old rounding mode
|
return old; // returning old rounding mode
|
||||||
}
|
}
|
||||||
|
|
||||||
RoundingMode get_round( void )
|
RoundingMode get_round(void)
|
||||||
{
|
{
|
||||||
unsigned int oldRound;
|
unsigned int oldRound;
|
||||||
|
|
||||||
int err = _controlfp_s(&oldRound, 0, 0); //get rounding mode into oldRound
|
int err = _controlfp_s(&oldRound, 0, 0); // get rounding mode into oldRound
|
||||||
oldRound &= _MCW_RC;
|
oldRound &= _MCW_RC;
|
||||||
return
|
return (oldRound == _RC_NEAR)
|
||||||
(oldRound == _RC_NEAR)? kRoundToNearestEven :
|
? kRoundToNearestEven
|
||||||
(oldRound == _RC_UP)? kRoundUp :
|
: (oldRound == _RC_UP) ? kRoundUp
|
||||||
(oldRound == _RC_DOWN)? kRoundDown :
|
: (oldRound == _RC_DOWN)
|
||||||
(oldRound == _RC_CHOP)? kRoundTowardZero:
|
? kRoundDown
|
||||||
kDefaultRoundingMode;
|
: (oldRound == _RC_CHOP) ? kRoundTowardZero
|
||||||
|
: kDefaultRoundingMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//
|
//
|
||||||
// FlushToZero() sets the host processor into ftz mode. It is intended to have a remote effect on the behavior of the code in
|
// FlushToZero() sets the host processor into ftz mode. It is intended to have
|
||||||
// basic_test_conversions.c. Some host processors may not support this mode, which case you'll need to do some clamping in
|
// a remote effect on the behavior of the code in basic_test_conversions.c. Some
|
||||||
// software by testing against FLT_MIN or DBL_MIN in that file.
|
// host processors may not support this mode, which case you'll need to do some
|
||||||
|
// clamping in software by testing against FLT_MIN or DBL_MIN in that file.
|
||||||
//
|
//
|
||||||
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT* have the behavior in section 7.5.3 of
|
// Note: IEEE-754 says conversions are basic operations. As such they do *NOT*
|
||||||
// the OpenCL spec. They *ALWAYS* flush to zero for subnormal inputs or outputs when FTZ mode is on like other basic
|
// have the behavior in section 7.5.3 of the OpenCL spec. They *ALWAYS* flush to
|
||||||
|
// zero for subnormal inputs or outputs when FTZ mode is on like other basic
|
||||||
// operators do (e.g. add, subtract, multiply, divide, etc.)
|
// operators do (e.g. add, subtract, multiply, divide, etc.)
|
||||||
//
|
//
|
||||||
// Configuring hardware to FTZ mode varies by platform.
|
// Configuring hardware to FTZ mode varies by platform.
|
||||||
// CAUTION: Some C implementations may also fail to behave properly in this mode.
|
// CAUTION: Some C implementations may also fail to behave properly in this
|
||||||
|
// mode.
|
||||||
//
|
//
|
||||||
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
|
// On PowerPC, it is done by setting the FPSCR into non-IEEE mode.
|
||||||
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR -- provided that SSE/SSE2
|
// On Intel, you can do this by turning on the FZ and DAZ bits in the MXCSR --
|
||||||
// is used for floating point computation! If your OS uses x87, you'll need to figure out how
|
// provided that SSE/SSE2
|
||||||
// to turn that off for the conversions code in basic_test_conversions.c so that they flush to
|
// is used for floating point computation! If your OS uses x87, you'll
|
||||||
// zero properly. Otherwise, you'll need to add appropriate software clamping to basic_test_conversions.c
|
// need to figure out how to turn that off for the conversions code in
|
||||||
// in which case, these function are at liberty to do nothing.
|
// basic_test_conversions.c so that they flush to zero properly.
|
||||||
|
// Otherwise, you'll need to add appropriate software clamping to
|
||||||
|
// basic_test_conversions.c in which case, these function are at
|
||||||
|
// liberty to do nothing.
|
||||||
//
|
//
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined (_WIN32)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_WIN32)
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#elif defined( __PPC__ )
|
#elif defined(__PPC__)
|
||||||
#include <fpu_control.h>
|
#include <fpu_control.h>
|
||||||
#endif
|
#endif
|
||||||
void *FlushToZero( void )
|
void *FlushToZero(void)
|
||||||
{
|
{
|
||||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
||||||
union{ int i; void *p; }u = { _mm_getcsr() };
|
union {
|
||||||
_mm_setcsr( u.i | 0x8040 );
|
int i;
|
||||||
|
void *p;
|
||||||
|
} u = { _mm_getcsr() };
|
||||||
|
_mm_setcsr(u.i | 0x8040);
|
||||||
return u.p;
|
return u.p;
|
||||||
#elif defined( __arm__ ) || defined(__aarch64__)
|
#elif defined(__arm__) || defined(__aarch64__)
|
||||||
int fpscr;
|
int fpscr;
|
||||||
_FPU_GETCW(fpscr);
|
_FPU_GETCW(fpscr);
|
||||||
_FPU_SETCW(fpscr | FPSCR_FZ);
|
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||||
return NULL;
|
return NULL;
|
||||||
#elif defined( __PPC__ )
|
#elif defined(__PPC__)
|
||||||
fpu_control_t flags = 0;
|
fpu_control_t flags = 0;
|
||||||
_FPU_GETCW(flags);
|
_FPU_GETCW(flags);
|
||||||
flags |= _FPU_MASK_NI;
|
flags |= _FPU_MASK_NI;
|
||||||
_FPU_SETCW(flags);
|
_FPU_SETCW(flags);
|
||||||
return NULL;
|
return NULL;
|
||||||
#else
|
|
||||||
#error Unknown arch
|
|
||||||
#endif
|
|
||||||
#else
|
#else
|
||||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
#error Unknown arch
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Undo the effects of FlushToZero above, restoring the host to default behavior, using the information passed in p.
|
// Undo the effects of FlushToZero above, restoring the host to default
|
||||||
void UnFlushToZero( void *p)
|
// behavior, using the information passed in p.
|
||||||
|
void UnFlushToZero(void *p)
|
||||||
{
|
{
|
||||||
#if defined( __APPLE__ ) || defined(__linux__) || defined (_WIN32)
|
#if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
#if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
|
||||||
union{ void *p; int i; }u = { p };
|
union {
|
||||||
_mm_setcsr( u.i );
|
void *p;
|
||||||
#elif defined( __arm__ ) || defined(__aarch64__)
|
int i;
|
||||||
|
} u = { p };
|
||||||
|
_mm_setcsr(u.i);
|
||||||
|
#elif defined(__arm__) || defined(__aarch64__)
|
||||||
int fpscr;
|
int fpscr;
|
||||||
_FPU_GETCW(fpscr);
|
_FPU_GETCW(fpscr);
|
||||||
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||||
#elif defined( __PPC__)
|
#elif defined(__PPC__)
|
||||||
fpu_control_t flags = 0;
|
fpu_control_t flags = 0;
|
||||||
_FPU_GETCW(flags);
|
_FPU_GETCW(flags);
|
||||||
flags &= ~_FPU_MASK_NI;
|
flags &= ~_FPU_MASK_NI;
|
||||||
_FPU_SETCW(flags);
|
_FPU_SETCW(flags);
|
||||||
#else
|
|
||||||
#error Unknown arch
|
|
||||||
#endif
|
|
||||||
#else
|
#else
|
||||||
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
#error Unknown arch
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#error Please configure FlushToZero and UnFlushToZero to behave properly on this operating system.
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
|
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
|
|
||||||
#if (defined(_WIN32) && defined (_MSC_VER))
|
#if (defined(_WIN32) && defined(_MSC_VER))
|
||||||
#include "errorHelpers.h"
|
#include "errorHelpers.h"
|
||||||
#include "testHarness.h"
|
#include "testHarness.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -34,7 +34,7 @@ typedef enum
|
|||||||
kRoundTowardZero,
|
kRoundTowardZero,
|
||||||
|
|
||||||
kRoundingModeCount
|
kRoundingModeCount
|
||||||
}RoundingMode;
|
} RoundingMode;
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
@@ -49,15 +49,14 @@ typedef enum
|
|||||||
kulong = 8,
|
kulong = 8,
|
||||||
klong = 9,
|
klong = 9,
|
||||||
|
|
||||||
//This goes last
|
// This goes last
|
||||||
kTypeCount
|
kTypeCount
|
||||||
}Type;
|
} Type;
|
||||||
|
|
||||||
extern RoundingMode set_round( RoundingMode r, Type outType );
|
|
||||||
extern RoundingMode get_round( void );
|
|
||||||
extern void *FlushToZero( void );
|
|
||||||
extern void UnFlushToZero( void *p);
|
|
||||||
|
|
||||||
|
extern RoundingMode set_round(RoundingMode r, Type outType);
|
||||||
|
extern RoundingMode get_round(void);
|
||||||
|
extern void *FlushToZero(void);
|
||||||
|
extern void UnFlushToZero(void *p);
|
||||||
|
|
||||||
|
|
||||||
#endif /* __ROUNDING_MODE_H__ */
|
#endif /* __ROUNDING_MODE_H__ */
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -23,16 +23,24 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
class Version
|
class Version {
|
||||||
{
|
|
||||||
public:
|
public:
|
||||||
Version() : m_major(0), m_minor(0) {}
|
Version(): m_major(0), m_minor(0) {}
|
||||||
Version(int major, int minor) : m_major(major), m_minor(minor) {}
|
Version(int major, int minor): m_major(major), m_minor(minor) {}
|
||||||
bool operator>(const Version& rhs) const { return to_int() > rhs.to_int(); }
|
bool operator>(const Version &rhs) const { return to_int() > rhs.to_int(); }
|
||||||
bool operator<(const Version& rhs) const { return to_int() < rhs.to_int(); }
|
bool operator<(const Version &rhs) const { return to_int() < rhs.to_int(); }
|
||||||
bool operator<=(const Version& rhs) const { return to_int() <= rhs.to_int(); }
|
bool operator<=(const Version &rhs) const
|
||||||
bool operator>=(const Version& rhs) const { return to_int() >= rhs.to_int(); }
|
{
|
||||||
bool operator==(const Version& rhs) const { return to_int() == rhs.to_int(); }
|
return to_int() <= rhs.to_int();
|
||||||
|
}
|
||||||
|
bool operator>=(const Version &rhs) const
|
||||||
|
{
|
||||||
|
return to_int() >= rhs.to_int();
|
||||||
|
}
|
||||||
|
bool operator==(const Version &rhs) const
|
||||||
|
{
|
||||||
|
return to_int() == rhs.to_int();
|
||||||
|
}
|
||||||
int to_int() const { return m_major * 10 + m_minor; }
|
int to_int() const { return m_major * 10 + m_minor; }
|
||||||
std::string to_string() const
|
std::string to_string() const
|
||||||
{
|
{
|
||||||
@@ -66,7 +74,7 @@ Version get_device_cl_version(cl_device_id device);
|
|||||||
typedef struct test_definition
|
typedef struct test_definition
|
||||||
{
|
{
|
||||||
basefn func;
|
basefn func;
|
||||||
const char* name;
|
const char *name;
|
||||||
Version min_version;
|
Version min_version;
|
||||||
} test_definition;
|
} test_definition;
|
||||||
|
|
||||||
@@ -83,57 +91,78 @@ extern int gTestCount;
|
|||||||
extern cl_uint gReSeed;
|
extern cl_uint gReSeed;
|
||||||
extern cl_uint gRandomSeed;
|
extern cl_uint gRandomSeed;
|
||||||
|
|
||||||
// Supply a list of functions to test here. This will allocate a CL device, create a context, all that
|
// Supply a list of functions to test here. This will allocate a CL device,
|
||||||
// setup work, and then call each function in turn as dictatated by the passed arguments.
|
// create a context, all that setup work, and then call each function in turn as
|
||||||
// Returns EXIT_SUCCESS iff all tests succeeded or the tests were listed,
|
// dictatated by the passed arguments. Returns EXIT_SUCCESS iff all tests
|
||||||
// otherwise return EXIT_FAILURE.
|
// succeeded or the tests were listed, otherwise return EXIT_FAILURE.
|
||||||
extern int runTestHarness( int argc, const char *argv[], int testNum, test_definition testList[],
|
extern int runTestHarness(int argc, const char *argv[], int testNum,
|
||||||
int imageSupportRequired, int forceNoContextCreation, cl_command_queue_properties queueProps );
|
test_definition testList[], int imageSupportRequired,
|
||||||
|
int forceNoContextCreation,
|
||||||
|
cl_command_queue_properties queueProps);
|
||||||
|
|
||||||
// Device checking function. See runTestHarnessWithCheck. If this function returns anything other than TEST_PASS, the harness exits.
|
// Device checking function. See runTestHarnessWithCheck. If this function
|
||||||
typedef test_status (*DeviceCheckFn)( cl_device_id device );
|
// returns anything other than TEST_PASS, the harness exits.
|
||||||
|
typedef test_status (*DeviceCheckFn)(cl_device_id device);
|
||||||
|
|
||||||
// Same as runTestHarness, but also supplies a function that checks the created device for required functionality.
|
// Same as runTestHarness, but also supplies a function that checks the created
|
||||||
// Returns EXIT_SUCCESS iff all tests succeeded or the tests were listed,
|
// device for required functionality. Returns EXIT_SUCCESS iff all tests
|
||||||
// otherwise return EXIT_FAILURE.
|
// succeeded or the tests were listed, otherwise return EXIT_FAILURE.
|
||||||
extern int runTestHarnessWithCheck( int argc, const char *argv[], int testNum, test_definition testList[],
|
extern int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
|
||||||
int forceNoContextCreation, cl_command_queue_properties queueProps,
|
test_definition testList[],
|
||||||
DeviceCheckFn deviceCheckFn );
|
int forceNoContextCreation,
|
||||||
|
cl_command_queue_properties queueProps,
|
||||||
|
DeviceCheckFn deviceCheckFn);
|
||||||
|
|
||||||
// The command line parser used by runTestHarness to break up parameters into calls to callTestFunctions
|
// The command line parser used by runTestHarness to break up parameters into
|
||||||
extern int parseAndCallCommandLineTests( int argc, const char *argv[], cl_device_id device, int testNum,
|
// calls to callTestFunctions
|
||||||
test_definition testList[], int forceNoContextCreation,
|
extern int parseAndCallCommandLineTests(int argc, const char *argv[],
|
||||||
cl_command_queue_properties queueProps, int num_elements );
|
cl_device_id device, int testNum,
|
||||||
|
test_definition testList[],
|
||||||
|
int forceNoContextCreation,
|
||||||
|
cl_command_queue_properties queueProps,
|
||||||
|
int num_elements);
|
||||||
|
|
||||||
// Call this function if you need to do all the setup work yourself, and just need the function list called/
|
// Call this function if you need to do all the setup work yourself, and just
|
||||||
// managed.
|
// need the function list called/ managed.
|
||||||
// testList is the data structure that contains test functions and its names
|
// testList is the data structure that contains test functions and its names
|
||||||
// selectedTestList is an array of integers (treated as bools) which tell which function is to be called,
|
// selectedTestList is an array of integers (treated as bools) which tell
|
||||||
// each element at index i, corresponds to the element in testList at index i
|
// which function is to be called,
|
||||||
// resultTestList is an array of statuses which contain the result of each selected test
|
// each element at index i, corresponds to the element in testList at
|
||||||
// testNum is the number of tests in testList, selectedTestList and resultTestList
|
// index i
|
||||||
// contextProps are used to create a testing context for each test
|
// resultTestList is an array of statuses which contain the result of each
|
||||||
// deviceToUse and numElementsToUse are all just passed to each test function
|
// selected test testNum is the number of tests in testList, selectedTestList
|
||||||
extern void callTestFunctions( test_definition testList[], unsigned char selectedTestList[], test_status resultTestList[],
|
// and resultTestList contextProps are used to create a testing context for
|
||||||
int testNum, cl_device_id deviceToUse, int forceNoContextCreation, int numElementsToUse,
|
// each test deviceToUse and numElementsToUse are all just passed to each
|
||||||
cl_command_queue_properties queueProps );
|
// test function
|
||||||
|
extern void callTestFunctions(test_definition testList[],
|
||||||
|
unsigned char selectedTestList[],
|
||||||
|
test_status resultTestList[], int testNum,
|
||||||
|
cl_device_id deviceToUse,
|
||||||
|
int forceNoContextCreation, int numElementsToUse,
|
||||||
|
cl_command_queue_properties queueProps);
|
||||||
|
|
||||||
// This function is called by callTestFunctions, once per function, to do setup, call, logging and cleanup
|
// This function is called by callTestFunctions, once per function, to do setup,
|
||||||
extern test_status callSingleTestFunction( test_definition test, cl_device_id deviceToUse, int forceNoContextCreation,
|
// call, logging and cleanup
|
||||||
int numElementsToUse, cl_command_queue_properties queueProps );
|
extern test_status
|
||||||
|
callSingleTestFunction(test_definition test, cl_device_id deviceToUse,
|
||||||
|
int forceNoContextCreation, int numElementsToUse,
|
||||||
|
cl_command_queue_properties queueProps);
|
||||||
|
|
||||||
///// Miscellaneous steps
|
///// Miscellaneous steps
|
||||||
|
|
||||||
// standard callback function for context pfn_notify
|
// standard callback function for context pfn_notify
|
||||||
extern void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data);
|
extern void CL_CALLBACK notify_callback(const char *errinfo,
|
||||||
|
const void *private_info, size_t cb,
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
extern cl_device_type GetDeviceType( cl_device_id );
|
extern cl_device_type GetDeviceType(cl_device_id);
|
||||||
|
|
||||||
// Given a device (most likely passed in by the harness, but not required), will attempt to find
|
// Given a device (most likely passed in by the harness, but not required), will
|
||||||
// a DIFFERENT device and return it. Useful for finding another device to run multi-device tests against.
|
// attempt to find a DIFFERENT device and return it. Useful for finding another
|
||||||
// Note that returning NULL means an error was hit, but if no error was hit and the device passed in
|
// device to run multi-device tests against. Note that returning NULL means an
|
||||||
// is the only device available, the SAME device is returned, so check!
|
// error was hit, but if no error was hit and the device passed in is the only
|
||||||
extern cl_device_id GetOpposingDevice( cl_device_id device );
|
// device available, the SAME device is returned, so check!
|
||||||
|
extern cl_device_id GetOpposingDevice(cl_device_id device);
|
||||||
|
|
||||||
Version get_device_spirv_il_version(cl_device_id device);
|
Version get_device_spirv_il_version(cl_device_id device);
|
||||||
bool check_device_spirv_il_support(cl_device_id device);
|
bool check_device_spirv_il_support(cl_device_id device);
|
||||||
@@ -143,19 +172,20 @@ void version_expected_info(const char *test_name, const char *api_name,
|
|||||||
test_status check_spirv_compilation_readiness(cl_device_id device);
|
test_status check_spirv_compilation_readiness(cl_device_id device);
|
||||||
|
|
||||||
|
|
||||||
extern int gFlushDenormsToZero; // This is set to 1 if the device does not support denorms (CL_FP_DENORM)
|
extern int gFlushDenormsToZero; // This is set to 1 if the device does not
|
||||||
extern int gInfNanSupport; // This is set to 1 if the device supports infinities and NaNs
|
// support denorms (CL_FP_DENORM)
|
||||||
|
extern int gInfNanSupport; // This is set to 1 if the device supports infinities
|
||||||
|
// and NaNs
|
||||||
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
extern int gIsEmbedded; // This is set to 1 if the device is an embedded device
|
||||||
extern int gHasLong; // This is set to 1 if the device suppots long and ulong types in OpenCL C.
|
extern int gHasLong; // This is set to 1 if the device suppots long and ulong
|
||||||
|
// types in OpenCL C.
|
||||||
extern bool gCoreILProgram;
|
extern bool gCoreILProgram;
|
||||||
|
|
||||||
#if ! defined( __APPLE__ )
|
#if !defined(__APPLE__)
|
||||||
void memset_pattern4(void *, const void *, size_t);
|
void memset_pattern4(void *, const void *, size_t);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void PrintArch(void);
|
extern void PrintArch(void);
|
||||||
|
|
||||||
|
|
||||||
#endif // _testHarness_h
|
#endif // _testHarness_h
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,24 +16,26 @@
|
|||||||
#include "mt19937.h"
|
#include "mt19937.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
int main( void )
|
int main(void)
|
||||||
{
|
{
|
||||||
MTdata d = init_genrand(42);
|
MTdata d = init_genrand(42);
|
||||||
int i;
|
int i;
|
||||||
const cl_uint reference[16] = { 0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4,
|
const cl_uint reference[16] = {
|
||||||
0x55c31f24, 0x8bcd21ab, 0x14d5fef5, 0x9416d2b6,
|
0x5fe1dc66, 0x8b255210, 0x0380b0c8, 0xc87d2ce4, 0x55c31f24, 0x8bcd21ab,
|
||||||
0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
|
0x14d5fef5, 0x9416d2b6, 0xdf875de9, 0x00517d76, 0xd861c944, 0xa7676404,
|
||||||
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92 };
|
0x5491aff4, 0x67616209, 0xc368b3fb, 0x929dfc92
|
||||||
|
};
|
||||||
int errcount = 0;
|
int errcount = 0;
|
||||||
|
|
||||||
for( i = 0; i < 65536; i++ )
|
for (i = 0; i < 65536; i++)
|
||||||
{
|
{
|
||||||
cl_uint u = genrand_int32( d );
|
cl_uint u = genrand_int32(d);
|
||||||
if( 0 == (i & 4095) )
|
if (0 == (i & 4095))
|
||||||
{
|
{
|
||||||
if( u != reference[i>>12] )
|
if (u != reference[i >> 12])
|
||||||
{
|
{
|
||||||
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n", reference[i>>12], i, u );
|
printf("ERROR: expected *0x%8.8x at %d. Got 0x%8.8x\n",
|
||||||
|
reference[i >> 12], i, u);
|
||||||
errcount++;
|
errcount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -41,7 +43,7 @@ int main( void )
|
|||||||
|
|
||||||
free_mtdata(d);
|
free_mtdata(d);
|
||||||
|
|
||||||
if( errcount )
|
if (errcount)
|
||||||
printf("mt19937 test failed.\n");
|
printf("mt19937 test failed.\n");
|
||||||
else
|
else
|
||||||
printf("mt19937 test passed.\n");
|
printf("mt19937 test passed.\n");
|
||||||
|
|||||||
@@ -96,5 +96,3 @@ int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context con
|
|||||||
return (int)((intptr_t)retVal);
|
return (int)((intptr_t)retVal);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -17,17 +17,18 @@
|
|||||||
#define _threadTesting_h
|
#define _threadTesting_h
|
||||||
|
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
#include <OpenCL/opencl.h>
|
#include <OpenCL/opencl.h>
|
||||||
#else
|
#else
|
||||||
#include <CL/opencl.h>
|
#include <CL/opencl.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TEST_NOT_IMPLEMENTED -99
|
#define TEST_NOT_IMPLEMENTED -99
|
||||||
#define TEST_SKIPPED_ITSELF -100
|
#define TEST_SKIPPED_ITSELF -100
|
||||||
|
|
||||||
typedef int (*basefn)(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
|
typedef int (*basefn)(cl_device_id deviceID, cl_context context,
|
||||||
extern int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
|
cl_command_queue queue, int num_elements);
|
||||||
|
extern int test_threaded_function(basefn fnToTest, cl_device_id device,
|
||||||
|
cl_context context, cl_command_queue queue,
|
||||||
|
int numElements);
|
||||||
|
|
||||||
#endif // _threadTesting_h
|
#endif // _threadTesting_h
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -19,256 +19,310 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "clImageHelper.h"
|
#include "clImageHelper.h"
|
||||||
|
|
||||||
#define ROUND_SIZE_UP( _size, _align ) (((size_t)(_size) + (size_t)(_align) - 1) & -((size_t)(_align)))
|
#define ROUND_SIZE_UP(_size, _align) \
|
||||||
|
(((size_t)(_size) + (size_t)(_align)-1) & -((size_t)(_align)))
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
#define kPageSize 4096
|
#define kPageSize 4096
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#elif defined(__linux__)
|
#elif defined(__linux__)
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#define kPageSize (getpagesize())
|
#define kPageSize (getpagesize())
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret )
|
clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_int err = Create( context, mem_flags, fmt, width );
|
cl_int err = Create(context, mem_flags, fmt, width);
|
||||||
if( errcode_ret != NULL )
|
if (errcode_ret != NULL) *errcode_ret = err;
|
||||||
*errcode_ret = err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width )
|
cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width)
|
||||||
{
|
{
|
||||||
cl_int error;
|
cl_int error;
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
int protect_pages = 1;
|
int protect_pages = 1;
|
||||||
cl_device_id devices[16];
|
cl_device_id devices[16];
|
||||||
size_t number_of_devices;
|
size_t number_of_devices;
|
||||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
|
||||||
|
devices, &number_of_devices);
|
||||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||||
|
|
||||||
number_of_devices /= sizeof(cl_device_id);
|
number_of_devices /= sizeof(cl_device_id);
|
||||||
for (int i=0; i<(int)number_of_devices; i++) {
|
for (int i = 0; i < (int)number_of_devices; i++)
|
||||||
|
{
|
||||||
cl_device_type type;
|
cl_device_type type;
|
||||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
|
||||||
|
NULL);
|
||||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||||
if (type == CL_DEVICE_TYPE_GPU) {
|
if (type == CL_DEVICE_TYPE_GPU)
|
||||||
|
{
|
||||||
protect_pages = 0;
|
protect_pages = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect_pages) {
|
if (protect_pages)
|
||||||
|
{
|
||||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
|
||||||
size_t rowStride = rowBytes + kPageSize;
|
size_t rowStride = rowBytes + kPageSize;
|
||||||
|
|
||||||
// create backing store
|
// create backing store
|
||||||
backingStoreSize = rowStride + 8 * rowStride;
|
backingStoreSize = rowStride + 8 * rowStride;
|
||||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||||
|
|
||||||
// add guard pages
|
// add guard pages
|
||||||
size_t row;
|
size_t row;
|
||||||
char *p = (char*) backingStore;
|
char *p = (char *)backingStore;
|
||||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
char *imagePtr = (char *)backingStore + 4 * rowStride;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p += rowBytes;
|
p += rowBytes;
|
||||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
mprotect(p, kPageSize, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
p -= rowBytes;
|
p -= rowBytes;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
if (getenv("CL_ALIGN_RIGHT"))
|
||||||
{
|
{
|
||||||
static int spewEnv = 1;
|
static int spewEnv = 1;
|
||||||
if(spewEnv)
|
if (spewEnv)
|
||||||
{
|
{
|
||||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
|
||||||
|
"edge of page\n");
|
||||||
spewEnv = 0;
|
spewEnv = 0;
|
||||||
}
|
}
|
||||||
imagePtr += rowBytes - pixelBytes * width;
|
imagePtr += rowBytes - pixelBytes * width;
|
||||||
}
|
}
|
||||||
|
|
||||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
image = create_image_1d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
|
||||||
} else {
|
width, rowStride, imagePtr, NULL, &error);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
image = create_image_1d(context, mem_flags, fmt, width, 0, NULL, NULL,
|
||||||
|
&error);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
image =
|
||||||
|
create_image_1d(context, mem_flags, fmt, width, 0, NULL, NULL, &error);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret )
|
clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height, cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_int err = Create( context, mem_flags, fmt, width, height );
|
cl_int err = Create(context, mem_flags, fmt, width, height);
|
||||||
if( errcode_ret != NULL )
|
if (errcode_ret != NULL) *errcode_ret = err;
|
||||||
*errcode_ret = err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height )
|
cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height)
|
||||||
{
|
{
|
||||||
cl_int error;
|
cl_int error;
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
int protect_pages = 1;
|
int protect_pages = 1;
|
||||||
cl_device_id devices[16];
|
cl_device_id devices[16];
|
||||||
size_t number_of_devices;
|
size_t number_of_devices;
|
||||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
|
||||||
|
devices, &number_of_devices);
|
||||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||||
|
|
||||||
number_of_devices /= sizeof(cl_device_id);
|
number_of_devices /= sizeof(cl_device_id);
|
||||||
for (int i=0; i<(int)number_of_devices; i++) {
|
for (int i = 0; i < (int)number_of_devices; i++)
|
||||||
|
{
|
||||||
cl_device_type type;
|
cl_device_type type;
|
||||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
|
||||||
|
NULL);
|
||||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||||
if (type == CL_DEVICE_TYPE_GPU) {
|
if (type == CL_DEVICE_TYPE_GPU)
|
||||||
|
{
|
||||||
protect_pages = 0;
|
protect_pages = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect_pages) {
|
if (protect_pages)
|
||||||
|
{
|
||||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
|
||||||
size_t rowStride = rowBytes + kPageSize;
|
size_t rowStride = rowBytes + kPageSize;
|
||||||
|
|
||||||
// create backing store
|
// create backing store
|
||||||
backingStoreSize = height * rowStride + 8 * rowStride;
|
backingStoreSize = height * rowStride + 8 * rowStride;
|
||||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||||
|
|
||||||
// add guard pages
|
// add guard pages
|
||||||
size_t row;
|
size_t row;
|
||||||
char *p = (char*) backingStore;
|
char *p = (char *)backingStore;
|
||||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
char *imagePtr = (char *)backingStore + 4 * rowStride;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p += rowBytes;
|
p += rowBytes;
|
||||||
for( row = 0; row < height; row++ )
|
for (row = 0; row < height; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
mprotect(p, kPageSize, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p -= rowBytes;
|
p -= rowBytes;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
if (getenv("CL_ALIGN_RIGHT"))
|
||||||
{
|
{
|
||||||
static int spewEnv = 1;
|
static int spewEnv = 1;
|
||||||
if(spewEnv)
|
if (spewEnv)
|
||||||
{
|
{
|
||||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
|
||||||
|
"edge of page\n");
|
||||||
spewEnv = 0;
|
spewEnv = 0;
|
||||||
}
|
}
|
||||||
imagePtr += rowBytes - pixelBytes * width;
|
imagePtr += rowBytes - pixelBytes * width;
|
||||||
}
|
}
|
||||||
|
|
||||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
image = create_image_2d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
|
||||||
} else {
|
width, height, rowStride, imagePtr, &error);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
image = create_image_2d(context, mem_flags, fmt, width, height, 0, NULL,
|
||||||
|
&error);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
image = create_image_2d(context, mem_flags, fmt, width, height, 0, NULL,
|
||||||
|
&error);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret )
|
clProtectedImage::clProtectedImage(cl_context context, cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height, size_t depth,
|
||||||
|
cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_int err = Create( context, mem_flags, fmt, width, height, depth );
|
cl_int err = Create(context, mem_flags, fmt, width, height, depth);
|
||||||
if( errcode_ret != NULL )
|
if (errcode_ret != NULL) *errcode_ret = err;
|
||||||
*errcode_ret = err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth )
|
cl_int clProtectedImage::Create(cl_context context, cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height, size_t depth)
|
||||||
{
|
{
|
||||||
cl_int error;
|
cl_int error;
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
int protect_pages = 1;
|
int protect_pages = 1;
|
||||||
cl_device_id devices[16];
|
cl_device_id devices[16];
|
||||||
size_t number_of_devices;
|
size_t number_of_devices;
|
||||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
|
||||||
|
devices, &number_of_devices);
|
||||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||||
|
|
||||||
number_of_devices /= sizeof(cl_device_id);
|
number_of_devices /= sizeof(cl_device_id);
|
||||||
for (int i=0; i<(int)number_of_devices; i++) {
|
for (int i = 0; i < (int)number_of_devices; i++)
|
||||||
|
{
|
||||||
cl_device_type type;
|
cl_device_type type;
|
||||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
|
||||||
|
NULL);
|
||||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||||
if (type == CL_DEVICE_TYPE_GPU) {
|
if (type == CL_DEVICE_TYPE_GPU)
|
||||||
|
{
|
||||||
protect_pages = 0;
|
protect_pages = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect_pages) {
|
if (protect_pages)
|
||||||
|
{
|
||||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
|
||||||
size_t rowStride = rowBytes + kPageSize;
|
size_t rowStride = rowBytes + kPageSize;
|
||||||
|
|
||||||
// create backing store
|
// create backing store
|
||||||
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
backingStoreSize = height * depth * rowStride + 8 * rowStride;
|
||||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||||
|
|
||||||
// add guard pages
|
// add guard pages
|
||||||
size_t row;
|
size_t row;
|
||||||
char *p = (char*) backingStore;
|
char *p = (char *)backingStore;
|
||||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
char *imagePtr = (char *)backingStore + 4 * rowStride;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p += rowBytes;
|
p += rowBytes;
|
||||||
for( row = 0; row < height*depth; row++ )
|
for (row = 0; row < height * depth; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
mprotect(p, kPageSize, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p -= rowBytes;
|
p -= rowBytes;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
if (getenv("CL_ALIGN_RIGHT"))
|
||||||
{
|
{
|
||||||
static int spewEnv = 1;
|
static int spewEnv = 1;
|
||||||
if(spewEnv)
|
if (spewEnv)
|
||||||
{
|
{
|
||||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
|
||||||
|
"edge of page\n");
|
||||||
spewEnv = 0;
|
spewEnv = 0;
|
||||||
}
|
}
|
||||||
imagePtr += rowBytes - pixelBytes * width;
|
imagePtr += rowBytes - pixelBytes * width;
|
||||||
}
|
}
|
||||||
|
|
||||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
image = create_image_3d(context, mem_flags | CL_MEM_USE_HOST_PTR, fmt,
|
||||||
} else {
|
width, height, depth, rowStride,
|
||||||
|
height * rowStride, imagePtr, &error);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
image = create_image_3d(context, mem_flags, fmt, width, height, depth,
|
||||||
|
0, 0, NULL, &error);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
image = create_image_3d( context, mem_flags, fmt, width, height, depth, 0, 0, NULL, &error );
|
image = create_image_3d(context, mem_flags, fmt, width, height, depth, 0, 0,
|
||||||
|
NULL, &error);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -276,37 +330,51 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_flags mem_flags, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
clProtectedImage::clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret )
|
clProtectedImage::clProtectedImage(cl_context context,
|
||||||
|
cl_mem_object_type imageType,
|
||||||
|
cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height, size_t depth,
|
||||||
|
size_t arraySize, cl_int *errcode_ret)
|
||||||
{
|
{
|
||||||
cl_int err = Create( context, imageType, mem_flags, fmt, width, height, depth, arraySize );
|
cl_int err = Create(context, imageType, mem_flags, fmt, width, height,
|
||||||
if( errcode_ret != NULL )
|
depth, arraySize);
|
||||||
*errcode_ret = err;
|
if (errcode_ret != NULL) *errcode_ret = err;
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags mem_flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize )
|
cl_int clProtectedImage::Create(cl_context context,
|
||||||
|
cl_mem_object_type imageType,
|
||||||
|
cl_mem_flags mem_flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height, size_t depth, size_t arraySize)
|
||||||
{
|
{
|
||||||
cl_int error;
|
cl_int error;
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
int protect_pages = 1;
|
int protect_pages = 1;
|
||||||
cl_device_id devices[16];
|
cl_device_id devices[16];
|
||||||
size_t number_of_devices;
|
size_t number_of_devices;
|
||||||
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &number_of_devices);
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices),
|
||||||
|
devices, &number_of_devices);
|
||||||
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
|
||||||
|
|
||||||
number_of_devices /= sizeof(cl_device_id);
|
number_of_devices /= sizeof(cl_device_id);
|
||||||
for (int i=0; i<(int)number_of_devices; i++) {
|
for (int i = 0; i < (int)number_of_devices; i++)
|
||||||
|
{
|
||||||
cl_device_type type;
|
cl_device_type type;
|
||||||
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(type), &type,
|
||||||
|
NULL);
|
||||||
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_TYPE failed");
|
||||||
if (type == CL_DEVICE_TYPE_GPU) {
|
if (type == CL_DEVICE_TYPE_GPU)
|
||||||
|
{
|
||||||
protect_pages = 0;
|
protect_pages = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (protect_pages) {
|
if (protect_pages)
|
||||||
|
{
|
||||||
size_t pixelBytes = get_pixel_bytes(fmt);
|
size_t pixelBytes = get_pixel_bytes(fmt);
|
||||||
size_t rowBytes = ROUND_SIZE_UP( width * pixelBytes, kPageSize );
|
size_t rowBytes = ROUND_SIZE_UP(width * pixelBytes, kPageSize);
|
||||||
size_t rowStride = rowBytes + kPageSize;
|
size_t rowStride = rowBytes + kPageSize;
|
||||||
|
|
||||||
// create backing store
|
// create backing store
|
||||||
@@ -325,37 +393,44 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageTyp
|
|||||||
backingStoreSize = arraySize * rowStride + 8 * rowStride;
|
backingStoreSize = arraySize * rowStride + 8 * rowStride;
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||||
backingStoreSize = height * arraySize * rowStride + 8 * rowStride;
|
backingStoreSize =
|
||||||
|
height * arraySize * rowStride + 8 * rowStride;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
backingStore = mmap(0, backingStoreSize, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||||
|
|
||||||
// add guard pages
|
// add guard pages
|
||||||
size_t row;
|
size_t row;
|
||||||
char *p = (char*) backingStore;
|
char *p = (char *)backingStore;
|
||||||
char *imagePtr = (char*) backingStore + 4 * rowStride;
|
char *imagePtr = (char *)backingStore + 4 * rowStride;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p += rowBytes;
|
p += rowBytes;
|
||||||
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1) * (arraySize > 0 ? arraySize : 1);
|
size_t sz = (height > 0 ? height : 1) * (depth > 0 ? depth : 1)
|
||||||
for( row = 0; row < sz; row++ )
|
* (arraySize > 0 ? arraySize : 1);
|
||||||
|
for (row = 0; row < sz; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, kPageSize, PROT_NONE ); p += rowStride;
|
mprotect(p, kPageSize, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
p -= rowBytes;
|
p -= rowBytes;
|
||||||
for( row = 0; row < 4; row++ )
|
for (row = 0; row < 4; row++)
|
||||||
{
|
{
|
||||||
mprotect( p, rowStride, PROT_NONE ); p += rowStride;
|
mprotect(p, rowStride, PROT_NONE);
|
||||||
|
p += rowStride;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( getenv( "CL_ALIGN_RIGHT" ) )
|
if (getenv("CL_ALIGN_RIGHT"))
|
||||||
{
|
{
|
||||||
static int spewEnv = 1;
|
static int spewEnv = 1;
|
||||||
if(spewEnv)
|
if (spewEnv)
|
||||||
{
|
{
|
||||||
log_info( "***CL_ALIGN_RIGHT is set. Aligning images at right edge of page\n" );
|
log_info("***CL_ALIGN_RIGHT is set. Aligning images at right "
|
||||||
|
"edge of page\n");
|
||||||
spewEnv = 0;
|
spewEnv = 0;
|
||||||
}
|
}
|
||||||
imagePtr += rowBytes - pixelBytes * width;
|
imagePtr += rowBytes - pixelBytes * width;
|
||||||
@@ -364,43 +439,61 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageTyp
|
|||||||
switch (imageType)
|
switch (imageType)
|
||||||
{
|
{
|
||||||
case CL_MEM_OBJECT_IMAGE1D:
|
case CL_MEM_OBJECT_IMAGE1D:
|
||||||
image = create_image_1d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, rowStride, imagePtr, NULL, &error );
|
image = create_image_1d(
|
||||||
|
context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
|
||||||
|
rowStride, imagePtr, NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D:
|
case CL_MEM_OBJECT_IMAGE2D:
|
||||||
image = create_image_2d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, rowStride, imagePtr, &error );
|
image = create_image_2d(
|
||||||
|
context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
|
||||||
|
height, rowStride, imagePtr, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE3D:
|
case CL_MEM_OBJECT_IMAGE3D:
|
||||||
image = create_image_3d( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, depth, rowStride, height*rowStride, imagePtr, &error );
|
image =
|
||||||
|
create_image_3d(context, mem_flags | CL_MEM_USE_HOST_PTR,
|
||||||
|
fmt, width, height, depth, rowStride,
|
||||||
|
height * rowStride, imagePtr, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||||
image = create_image_1d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, arraySize, rowStride, rowStride, imagePtr, &error );
|
image = create_image_1d_array(
|
||||||
|
context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
|
||||||
|
arraySize, rowStride, rowStride, imagePtr, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||||
image = create_image_2d_array( context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width, height, arraySize, rowStride, height*rowStride, imagePtr, &error );
|
image = create_image_2d_array(
|
||||||
|
context, mem_flags | CL_MEM_USE_HOST_PTR, fmt, width,
|
||||||
|
height, arraySize, rowStride, height * rowStride, imagePtr,
|
||||||
|
&error);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
backingStore = NULL;
|
backingStore = NULL;
|
||||||
switch (imageType)
|
switch (imageType)
|
||||||
{
|
{
|
||||||
case CL_MEM_OBJECT_IMAGE1D:
|
case CL_MEM_OBJECT_IMAGE1D:
|
||||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
image = create_image_1d(context, mem_flags, fmt, width, 0, NULL,
|
||||||
|
NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D:
|
case CL_MEM_OBJECT_IMAGE2D:
|
||||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
image = create_image_2d(context, mem_flags, fmt, width, height,
|
||||||
|
0, NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE3D:
|
case CL_MEM_OBJECT_IMAGE3D:
|
||||||
image = create_image_3d(context, mem_flags, fmt, width, height,
|
image = create_image_3d(context, mem_flags, fmt, width, height,
|
||||||
depth, 0, 0, NULL, &error);
|
depth, 0, 0, NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
image = create_image_1d_array(context, mem_flags, fmt, width,
|
||||||
|
arraySize, 0, 0, NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
image = create_image_2d_array(context, mem_flags, fmt, width,
|
||||||
|
height, arraySize, 0, 0, NULL,
|
||||||
|
&error);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@@ -408,20 +501,25 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageTyp
|
|||||||
switch (imageType)
|
switch (imageType)
|
||||||
{
|
{
|
||||||
case CL_MEM_OBJECT_IMAGE1D:
|
case CL_MEM_OBJECT_IMAGE1D:
|
||||||
image = create_image_1d( context, mem_flags, fmt, width, 0, NULL, NULL, &error );
|
image = create_image_1d(context, mem_flags, fmt, width, 0, NULL,
|
||||||
|
NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D:
|
case CL_MEM_OBJECT_IMAGE2D:
|
||||||
image = create_image_2d( context, mem_flags, fmt, width, height, 0, NULL, &error );
|
image = create_image_2d(context, mem_flags, fmt, width, height, 0,
|
||||||
|
NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE3D:
|
case CL_MEM_OBJECT_IMAGE3D:
|
||||||
image = create_image_3d(context, mem_flags, fmt, width, height,
|
image = create_image_3d(context, mem_flags, fmt, width, height,
|
||||||
depth, 0, 0, NULL, &error);
|
depth, 0, 0, NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||||
image = create_image_1d_array( context, mem_flags, fmt, width, arraySize, 0, 0, NULL, &error );
|
image = create_image_1d_array(context, mem_flags, fmt, width,
|
||||||
|
arraySize, 0, 0, NULL, &error);
|
||||||
break;
|
break;
|
||||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||||
image = create_image_2d_array( context, mem_flags, fmt, width, height, arraySize, 0, 0, NULL, &error );
|
image =
|
||||||
|
create_image_2d_array(context, mem_flags, fmt, width, height,
|
||||||
|
arraySize, 0, 0, NULL, &error);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@@ -429,55 +527,52 @@ cl_int clProtectedImage::Create( cl_context context, cl_mem_object_type imageTyp
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*******
|
/*******
|
||||||
* clProtectedArray implementation
|
* clProtectedArray implementation
|
||||||
*******/
|
*******/
|
||||||
clProtectedArray::clProtectedArray()
|
clProtectedArray::clProtectedArray() { mBuffer = mValidBuffer = NULL; }
|
||||||
{
|
|
||||||
mBuffer = mValidBuffer = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
clProtectedArray::clProtectedArray( size_t sizeInBytes )
|
clProtectedArray::clProtectedArray(size_t sizeInBytes)
|
||||||
{
|
{
|
||||||
mBuffer = mValidBuffer = NULL;
|
mBuffer = mValidBuffer = NULL;
|
||||||
Allocate( sizeInBytes );
|
Allocate(sizeInBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
clProtectedArray::~clProtectedArray()
|
clProtectedArray::~clProtectedArray()
|
||||||
{
|
{
|
||||||
if( mBuffer != NULL ) {
|
if (mBuffer != NULL)
|
||||||
#if defined( __APPLE__ )
|
{
|
||||||
int error = munmap( mBuffer, mRealSize );
|
#if defined(__APPLE__)
|
||||||
|
int error = munmap(mBuffer, mRealSize);
|
||||||
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
if (error) log_error("WARNING: munmap failed in clProtectedArray.\n");
|
||||||
#else
|
#else
|
||||||
free( mBuffer );
|
free(mBuffer);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void clProtectedArray::Allocate( size_t sizeInBytes )
|
void clProtectedArray::Allocate(size_t sizeInBytes)
|
||||||
{
|
{
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
|
|
||||||
// Allocate enough space to: round up our actual allocation to an even number of pages
|
// Allocate enough space to: round up our actual allocation to an even
|
||||||
// and allocate two pages on either side
|
// number of pages and allocate two pages on either side
|
||||||
mRoundedSize = ROUND_SIZE_UP( sizeInBytes, kPageSize );
|
mRoundedSize = ROUND_SIZE_UP(sizeInBytes, kPageSize);
|
||||||
mRealSize = mRoundedSize + kPageSize * 2;
|
mRealSize = mRoundedSize + kPageSize * 2;
|
||||||
|
|
||||||
// Use mmap here to ensure we start on a page boundary, so the mprotect calls will work OK
|
// Use mmap here to ensure we start on a page boundary, so the mprotect
|
||||||
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
|
// calls will work OK
|
||||||
|
mBuffer = (char *)mmap(0, mRealSize, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_ANON | MAP_PRIVATE, 0, 0);
|
||||||
|
|
||||||
mValidBuffer = mBuffer + kPageSize;
|
mValidBuffer = mBuffer + kPageSize;
|
||||||
|
|
||||||
// Protect guard area from access
|
// Protect guard area from access
|
||||||
mprotect( mValidBuffer - kPageSize, kPageSize, PROT_NONE );
|
mprotect(mValidBuffer - kPageSize, kPageSize, PROT_NONE);
|
||||||
mprotect( mValidBuffer + mRoundedSize, kPageSize, PROT_NONE );
|
mprotect(mValidBuffer + mRoundedSize, kPageSize, PROT_NONE);
|
||||||
#else
|
#else
|
||||||
mRoundedSize = mRealSize = sizeInBytes;
|
mRoundedSize = mRealSize = sizeInBytes;
|
||||||
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
|
mBuffer = mValidBuffer = (char *)calloc(1, mRealSize);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -31,254 +31,323 @@
|
|||||||
|
|
||||||
/* cl_context wrapper */
|
/* cl_context wrapper */
|
||||||
|
|
||||||
class clContextWrapper
|
class clContextWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clContextWrapper() { mContext = NULL; }
|
clContextWrapper() { mContext = NULL; }
|
||||||
clContextWrapper( cl_context program ) { mContext = program; }
|
clContextWrapper(cl_context program) { mContext = program; }
|
||||||
~clContextWrapper() { if( mContext != NULL ) clReleaseContext( mContext ); }
|
~clContextWrapper()
|
||||||
|
{
|
||||||
|
if (mContext != NULL) clReleaseContext(mContext);
|
||||||
|
}
|
||||||
|
|
||||||
clContextWrapper & operator=( const cl_context &rhs ) { mContext = rhs; return *this; }
|
clContextWrapper &operator=(const cl_context &rhs)
|
||||||
|
{
|
||||||
|
mContext = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_context() const { return mContext; }
|
operator cl_context() const { return mContext; }
|
||||||
|
|
||||||
cl_context * operator&() { return &mContext; }
|
cl_context *operator&() { return &mContext; }
|
||||||
|
|
||||||
bool operator==( const cl_context &rhs ) { return mContext == rhs; }
|
bool operator==(const cl_context &rhs) { return mContext == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_context mContext;
|
cl_context mContext;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cl_program wrapper */
|
/* cl_program wrapper */
|
||||||
|
|
||||||
class clProgramWrapper
|
class clProgramWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clProgramWrapper() { mProgram = NULL; }
|
clProgramWrapper() { mProgram = NULL; }
|
||||||
clProgramWrapper( cl_program program ) { mProgram = program; }
|
clProgramWrapper(cl_program program) { mProgram = program; }
|
||||||
~clProgramWrapper() { if( mProgram != NULL ) clReleaseProgram( mProgram ); }
|
~clProgramWrapper()
|
||||||
|
{
|
||||||
|
if (mProgram != NULL) clReleaseProgram(mProgram);
|
||||||
|
}
|
||||||
|
|
||||||
clProgramWrapper & operator=( const cl_program &rhs ) { mProgram = rhs; return *this; }
|
clProgramWrapper &operator=(const cl_program &rhs)
|
||||||
|
{
|
||||||
|
mProgram = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_program() const { return mProgram; }
|
operator cl_program() const { return mProgram; }
|
||||||
|
|
||||||
cl_program * operator&() { return &mProgram; }
|
cl_program *operator&() { return &mProgram; }
|
||||||
|
|
||||||
bool operator==( const cl_program &rhs ) { return mProgram == rhs; }
|
bool operator==(const cl_program &rhs) { return mProgram == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_program mProgram;
|
cl_program mProgram;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cl_kernel wrapper */
|
/* cl_kernel wrapper */
|
||||||
|
|
||||||
class clKernelWrapper
|
class clKernelWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clKernelWrapper() { mKernel = NULL; }
|
clKernelWrapper() { mKernel = NULL; }
|
||||||
clKernelWrapper( cl_kernel kernel ) { mKernel = kernel; }
|
clKernelWrapper(cl_kernel kernel) { mKernel = kernel; }
|
||||||
~clKernelWrapper() { if( mKernel != NULL ) clReleaseKernel( mKernel ); }
|
~clKernelWrapper()
|
||||||
|
{
|
||||||
|
if (mKernel != NULL) clReleaseKernel(mKernel);
|
||||||
|
}
|
||||||
|
|
||||||
clKernelWrapper & operator=( const cl_kernel &rhs ) { mKernel = rhs; return *this; }
|
clKernelWrapper &operator=(const cl_kernel &rhs)
|
||||||
|
{
|
||||||
|
mKernel = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_kernel() const { return mKernel; }
|
operator cl_kernel() const { return mKernel; }
|
||||||
|
|
||||||
cl_kernel * operator&() { return &mKernel; }
|
cl_kernel *operator&() { return &mKernel; }
|
||||||
|
|
||||||
bool operator==( const cl_kernel &rhs ) { return mKernel == rhs; }
|
bool operator==(const cl_kernel &rhs) { return mKernel == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_kernel mKernel;
|
cl_kernel mKernel;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cl_mem (stream) wrapper */
|
/* cl_mem (stream) wrapper */
|
||||||
|
|
||||||
class clMemWrapper
|
class clMemWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clMemWrapper() { mMem = NULL; }
|
clMemWrapper() { mMem = NULL; }
|
||||||
clMemWrapper( cl_mem mem ) { mMem = mem; }
|
clMemWrapper(cl_mem mem) { mMem = mem; }
|
||||||
~clMemWrapper() { if( mMem != NULL ) clReleaseMemObject( mMem ); }
|
~clMemWrapper()
|
||||||
|
{
|
||||||
|
if (mMem != NULL) clReleaseMemObject(mMem);
|
||||||
|
}
|
||||||
|
|
||||||
clMemWrapper & operator=( const cl_mem &rhs ) { mMem = rhs; return *this; }
|
clMemWrapper &operator=(const cl_mem &rhs)
|
||||||
|
{
|
||||||
|
mMem = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_mem() const { return mMem; }
|
operator cl_mem() const { return mMem; }
|
||||||
|
|
||||||
cl_mem * operator&() { return &mMem; }
|
cl_mem *operator&() { return &mMem; }
|
||||||
|
|
||||||
bool operator==( const cl_mem &rhs ) { return mMem == rhs; }
|
bool operator==(const cl_mem &rhs) { return mMem == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_mem mMem;
|
cl_mem mMem;
|
||||||
};
|
};
|
||||||
|
|
||||||
class clProtectedImage
|
class clProtectedImage {
|
||||||
{
|
public:
|
||||||
public:
|
clProtectedImage()
|
||||||
clProtectedImage() { image = NULL; backingStore = NULL; }
|
{
|
||||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, cl_int *errcode_ret );
|
image = NULL;
|
||||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, cl_int *errcode_ret );
|
backingStore = NULL;
|
||||||
clProtectedImage( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, cl_int *errcode_ret );
|
}
|
||||||
clProtectedImage( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize, cl_int *errcode_ret );
|
clProtectedImage(cl_context context, cl_mem_flags flags,
|
||||||
|
const cl_image_format *fmt, size_t width,
|
||||||
|
cl_int *errcode_ret);
|
||||||
|
clProtectedImage(cl_context context, cl_mem_flags flags,
|
||||||
|
const cl_image_format *fmt, size_t width, size_t height,
|
||||||
|
cl_int *errcode_ret);
|
||||||
|
clProtectedImage(cl_context context, cl_mem_flags flags,
|
||||||
|
const cl_image_format *fmt, size_t width, size_t height,
|
||||||
|
size_t depth, cl_int *errcode_ret);
|
||||||
|
clProtectedImage(cl_context context, cl_mem_object_type imageType,
|
||||||
|
cl_mem_flags flags, const cl_image_format *fmt,
|
||||||
|
size_t width, size_t height, size_t depth,
|
||||||
|
size_t arraySize, cl_int *errcode_ret);
|
||||||
~clProtectedImage()
|
~clProtectedImage()
|
||||||
{
|
{
|
||||||
if( image != NULL )
|
if (image != NULL) clReleaseMemObject(image);
|
||||||
clReleaseMemObject( image );
|
|
||||||
|
|
||||||
#if defined( __APPLE__ )
|
#if defined(__APPLE__)
|
||||||
if(backingStore)
|
if (backingStore) munmap(backingStore, backingStoreSize);
|
||||||
munmap(backingStore, backingStoreSize);
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width );
|
cl_int Create(cl_context context, cl_mem_flags flags,
|
||||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height );
|
const cl_image_format *fmt, size_t width);
|
||||||
cl_int Create( cl_context context, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth );
|
cl_int Create(cl_context context, cl_mem_flags flags,
|
||||||
cl_int Create( cl_context context, cl_mem_object_type imageType, cl_mem_flags flags, const cl_image_format *fmt, size_t width, size_t height, size_t depth, size_t arraySize );
|
const cl_image_format *fmt, size_t width, size_t height);
|
||||||
|
cl_int Create(cl_context context, cl_mem_flags flags,
|
||||||
|
const cl_image_format *fmt, size_t width, size_t height,
|
||||||
|
size_t depth);
|
||||||
|
cl_int Create(cl_context context, cl_mem_object_type imageType,
|
||||||
|
cl_mem_flags flags, const cl_image_format *fmt, size_t width,
|
||||||
|
size_t height, size_t depth, size_t arraySize);
|
||||||
|
|
||||||
clProtectedImage & operator=( const cl_mem &rhs ) { image = rhs; backingStore = NULL; return *this; }
|
clProtectedImage &operator=(const cl_mem &rhs)
|
||||||
|
{
|
||||||
|
image = rhs;
|
||||||
|
backingStore = NULL;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_mem() { return image; }
|
operator cl_mem() { return image; }
|
||||||
|
|
||||||
cl_mem * operator&() { return ℑ }
|
cl_mem *operator&() { return ℑ }
|
||||||
|
|
||||||
bool operator==( const cl_mem &rhs ) { return image == rhs; }
|
bool operator==(const cl_mem &rhs) { return image == rhs; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void *backingStore;
|
void *backingStore;
|
||||||
size_t backingStoreSize;
|
size_t backingStoreSize;
|
||||||
cl_mem image;
|
cl_mem image;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cl_command_queue wrapper */
|
/* cl_command_queue wrapper */
|
||||||
class clCommandQueueWrapper
|
class clCommandQueueWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clCommandQueueWrapper() { mMem = NULL; }
|
clCommandQueueWrapper() { mMem = NULL; }
|
||||||
clCommandQueueWrapper( cl_command_queue mem ) { mMem = mem; }
|
clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; }
|
||||||
~clCommandQueueWrapper() { if( mMem != NULL ) { clReleaseCommandQueue( mMem ); } }
|
~clCommandQueueWrapper()
|
||||||
|
{
|
||||||
|
if (mMem != NULL)
|
||||||
|
{
|
||||||
|
clReleaseCommandQueue(mMem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
clCommandQueueWrapper & operator=( const cl_command_queue &rhs ) { mMem = rhs; return *this; }
|
clCommandQueueWrapper &operator=(const cl_command_queue &rhs)
|
||||||
|
{
|
||||||
|
mMem = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_command_queue() const { return mMem; }
|
operator cl_command_queue() const { return mMem; }
|
||||||
|
|
||||||
cl_command_queue * operator&() { return &mMem; }
|
cl_command_queue *operator&() { return &mMem; }
|
||||||
|
|
||||||
bool operator==( const cl_command_queue &rhs ) { return mMem == rhs; }
|
bool operator==(const cl_command_queue &rhs) { return mMem == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_command_queue mMem;
|
cl_command_queue mMem;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cl_sampler wrapper */
|
/* cl_sampler wrapper */
|
||||||
class clSamplerWrapper
|
class clSamplerWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clSamplerWrapper() { mMem = NULL; }
|
clSamplerWrapper() { mMem = NULL; }
|
||||||
clSamplerWrapper( cl_sampler mem ) { mMem = mem; }
|
clSamplerWrapper(cl_sampler mem) { mMem = mem; }
|
||||||
~clSamplerWrapper() { if( mMem != NULL ) clReleaseSampler( mMem ); }
|
~clSamplerWrapper()
|
||||||
|
{
|
||||||
|
if (mMem != NULL) clReleaseSampler(mMem);
|
||||||
|
}
|
||||||
|
|
||||||
clSamplerWrapper & operator=( const cl_sampler &rhs ) { mMem = rhs; return *this; }
|
clSamplerWrapper &operator=(const cl_sampler &rhs)
|
||||||
|
{
|
||||||
|
mMem = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_sampler() const { return mMem; }
|
operator cl_sampler() const { return mMem; }
|
||||||
|
|
||||||
cl_sampler * operator&() { return &mMem; }
|
cl_sampler *operator&() { return &mMem; }
|
||||||
|
|
||||||
bool operator==( const cl_sampler &rhs ) { return mMem == rhs; }
|
bool operator==(const cl_sampler &rhs) { return mMem == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_sampler mMem;
|
cl_sampler mMem;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* cl_event wrapper */
|
/* cl_event wrapper */
|
||||||
class clEventWrapper
|
class clEventWrapper {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clEventWrapper() { mMem = NULL; }
|
clEventWrapper() { mMem = NULL; }
|
||||||
clEventWrapper( cl_event mem ) { mMem = mem; }
|
clEventWrapper(cl_event mem) { mMem = mem; }
|
||||||
~clEventWrapper() { if( mMem != NULL ) clReleaseEvent( mMem ); }
|
~clEventWrapper()
|
||||||
|
{
|
||||||
|
if (mMem != NULL) clReleaseEvent(mMem);
|
||||||
|
}
|
||||||
|
|
||||||
clEventWrapper & operator=( const cl_event &rhs ) { mMem = rhs; return *this; }
|
clEventWrapper &operator=(const cl_event &rhs)
|
||||||
|
{
|
||||||
|
mMem = rhs;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
operator cl_event() const { return mMem; }
|
operator cl_event() const { return mMem; }
|
||||||
|
|
||||||
cl_event * operator&() { return &mMem; }
|
cl_event *operator&() { return &mMem; }
|
||||||
|
|
||||||
bool operator==( const cl_event &rhs ) { return mMem == rhs; }
|
bool operator==(const cl_event &rhs) { return mMem == rhs; }
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
|
protected:
|
||||||
cl_event mMem;
|
cl_event mMem;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Generic protected memory buffer, for verifying access within bounds */
|
/* Generic protected memory buffer, for verifying access within bounds */
|
||||||
class clProtectedArray
|
class clProtectedArray {
|
||||||
{
|
public:
|
||||||
public:
|
|
||||||
clProtectedArray();
|
clProtectedArray();
|
||||||
clProtectedArray( size_t sizeInBytes );
|
clProtectedArray(size_t sizeInBytes);
|
||||||
virtual ~clProtectedArray();
|
virtual ~clProtectedArray();
|
||||||
|
|
||||||
void Allocate( size_t sizeInBytes );
|
void Allocate(size_t sizeInBytes);
|
||||||
|
|
||||||
operator void *() { return (void *)mValidBuffer; }
|
operator void *() { return (void *)mValidBuffer; }
|
||||||
operator const void *() const { return (const void *)mValidBuffer; }
|
operator const void *() const { return (const void *)mValidBuffer; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
char *mBuffer;
|
||||||
char * mBuffer;
|
char *mValidBuffer;
|
||||||
char * mValidBuffer;
|
|
||||||
size_t mRealSize, mRoundedSize;
|
size_t mRealSize, mRoundedSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RandomSeed
|
class RandomSeed {
|
||||||
{
|
public:
|
||||||
public:
|
RandomSeed(cl_uint seed)
|
||||||
RandomSeed( cl_uint seed ){ if(seed) log_info( "(seed = %10.10u) ", seed ); mtData = init_genrand(seed); }
|
{
|
||||||
|
if (seed) log_info("(seed = %10.10u) ", seed);
|
||||||
|
mtData = init_genrand(seed);
|
||||||
|
}
|
||||||
~RandomSeed()
|
~RandomSeed()
|
||||||
{
|
{
|
||||||
if( gReSeed )
|
if (gReSeed) gRandomSeed = genrand_int32(mtData);
|
||||||
gRandomSeed = genrand_int32( mtData );
|
|
||||||
free_mtdata(mtData);
|
free_mtdata(mtData);
|
||||||
}
|
}
|
||||||
|
|
||||||
operator MTdata () {return mtData;}
|
operator MTdata() { return mtData; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
MTdata mtData;
|
MTdata mtData;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T> class BufferOwningPtr
|
template <typename T> class BufferOwningPtr {
|
||||||
{
|
|
||||||
BufferOwningPtr(BufferOwningPtr const &); // do not implement
|
BufferOwningPtr(BufferOwningPtr const &); // do not implement
|
||||||
void operator=(BufferOwningPtr const &); // do not implement
|
void operator=(BufferOwningPtr const &); // do not implement
|
||||||
|
|
||||||
void *ptr;
|
void *ptr;
|
||||||
void *map;
|
void *map;
|
||||||
size_t mapsize; // Bytes allocated total, pointed to by map.
|
// Bytes allocated total, pointed to by map:
|
||||||
size_t allocsize; // Bytes allocated in unprotected pages, pointed to by ptr.
|
size_t mapsize;
|
||||||
|
// Bytes allocated in unprotected pages, pointed to by ptr:
|
||||||
|
size_t allocsize;
|
||||||
bool aligned;
|
bool aligned;
|
||||||
public:
|
|
||||||
explicit BufferOwningPtr(void *p = 0) : ptr(p), map(0), mapsize(0), allocsize(0), aligned(false) {}
|
public:
|
||||||
|
explicit BufferOwningPtr(void *p = 0)
|
||||||
|
: ptr(p), map(0), mapsize(0), allocsize(0), aligned(false)
|
||||||
|
{}
|
||||||
explicit BufferOwningPtr(void *p, void *m, size_t s)
|
explicit BufferOwningPtr(void *p, void *m, size_t s)
|
||||||
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
|
: ptr(p), map(m), mapsize(s), allocsize(0), aligned(false)
|
||||||
{
|
{
|
||||||
#if ! defined( __APPLE__ )
|
#if !defined(__APPLE__)
|
||||||
if(m)
|
if (m)
|
||||||
{
|
{
|
||||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
log_error("ERROR: unhandled code path. BufferOwningPtr allocated "
|
||||||
|
"with mapped buffer!");
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
~BufferOwningPtr() {
|
~BufferOwningPtr()
|
||||||
if (map) {
|
{
|
||||||
#if defined( __APPLE__ )
|
if (map)
|
||||||
|
{
|
||||||
|
#if defined(__APPLE__)
|
||||||
int error = munmap(map, mapsize);
|
int error = munmap(map, mapsize);
|
||||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
if (error)
|
||||||
|
log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||||
#endif
|
#endif
|
||||||
} else {
|
}
|
||||||
if ( aligned )
|
else
|
||||||
|
{
|
||||||
|
if (aligned)
|
||||||
{
|
{
|
||||||
align_free(ptr);
|
align_free(ptr);
|
||||||
}
|
}
|
||||||
@@ -288,17 +357,24 @@ template <typename T> class BufferOwningPtr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0, bool aligned_ = false) {
|
void reset(void *p, void *m = 0, size_t mapsize_ = 0, size_t allocsize_ = 0,
|
||||||
if (map){
|
bool aligned_ = false)
|
||||||
#if defined( __APPLE__ )
|
{
|
||||||
|
if (map)
|
||||||
|
{
|
||||||
|
#if defined(__APPLE__)
|
||||||
int error = munmap(map, mapsize);
|
int error = munmap(map, mapsize);
|
||||||
if (error) log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
if (error)
|
||||||
|
log_error("WARNING: munmap failed in BufferOwningPtr.\n");
|
||||||
#else
|
#else
|
||||||
log_error( "ERROR: unhandled code path. BufferOwningPtr reset with mapped buffer!" );
|
log_error("ERROR: unhandled code path. BufferOwningPtr reset with "
|
||||||
|
"mapped buffer!");
|
||||||
abort();
|
abort();
|
||||||
#endif
|
#endif
|
||||||
} else {
|
}
|
||||||
if ( aligned )
|
else
|
||||||
|
{
|
||||||
|
if (aligned)
|
||||||
{
|
{
|
||||||
align_free(ptr);
|
align_free(ptr);
|
||||||
}
|
}
|
||||||
@@ -310,20 +386,21 @@ template <typename T> class BufferOwningPtr
|
|||||||
ptr = p;
|
ptr = p;
|
||||||
map = m;
|
map = m;
|
||||||
mapsize = mapsize_;
|
mapsize = mapsize_;
|
||||||
allocsize = (ptr != NULL) ? allocsize_ : 0; // Force allocsize to zero if ptr is NULL.
|
// Force allocsize to zero if ptr is NULL:
|
||||||
|
allocsize = (ptr != NULL) ? allocsize_ : 0;
|
||||||
aligned = aligned_;
|
aligned = aligned_;
|
||||||
#if ! defined( __APPLE__ )
|
#if !defined(__APPLE__)
|
||||||
if(m)
|
if (m)
|
||||||
{
|
{
|
||||||
log_error( "ERROR: unhandled code path. BufferOwningPtr allocated with mapped buffer!" );
|
log_error("ERROR: unhandled code path. BufferOwningPtr allocated "
|
||||||
|
"with mapped buffer!");
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
operator T*() { return (T*)ptr; }
|
operator T *() { return (T *)ptr; }
|
||||||
|
|
||||||
size_t getSize() const { return allocsize; };
|
size_t getSize() const { return allocsize; };
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _typeWrappers_h
|
#endif // _typeWrappers_h
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user