mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-25 16:29:03 +00:00
Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
104
clean_tests.py
Executable file
104
clean_tests.py
Executable file
@@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import sys, os, re
|
||||||
|
from subprocess import Popen, PIPE
|
||||||
|
from optparse import OptionParser
|
||||||
|
|
||||||
|
# trail_spaces: This method removes the trailing whitespaces and trailing tabs
|
||||||
|
def trail_spaces(line):
|
||||||
|
newline=line
|
||||||
|
carreturn = 0
|
||||||
|
if re.search("\r\n",line):
|
||||||
|
carreturn = 1
|
||||||
|
status = re.search("\s+$",line)
|
||||||
|
if status:
|
||||||
|
if carreturn:
|
||||||
|
newline = re.sub("\s+$","\r\n",line)
|
||||||
|
else:
|
||||||
|
newline = re.sub("\s+$","\n",line)
|
||||||
|
|
||||||
|
status = re.search("\t+$",newline)
|
||||||
|
if status:
|
||||||
|
newline = re.sub("\t+$","",newline)
|
||||||
|
return newline
|
||||||
|
|
||||||
|
#convert_tabs: This methos converts tabs to 4 spaces
|
||||||
|
def convert_tabs(line):
|
||||||
|
newline=line
|
||||||
|
status = re.search("\t",line)
|
||||||
|
if status:
|
||||||
|
newline = re.sub("\t"," ",line)
|
||||||
|
return newline
|
||||||
|
|
||||||
|
#convert_lineends: This method converts lineendings from DOS to Unix
|
||||||
|
def convert_lineends(line):
|
||||||
|
newline=line
|
||||||
|
status = re.search("\r\n",line)
|
||||||
|
if status:
|
||||||
|
newline = re.sub("\r\n","\n",line)
|
||||||
|
return newline
|
||||||
|
|
||||||
|
#processfile: This method processes each file passed to it depending
|
||||||
|
# on the flags passed
|
||||||
|
|
||||||
|
def processfile(file,tabs, lineends,trails,verbose):
|
||||||
|
processed_data = []
|
||||||
|
if verbose:
|
||||||
|
print "processing file: "+file
|
||||||
|
|
||||||
|
with open(file,'r') as fr:
|
||||||
|
data = fr.readlines()
|
||||||
|
for line in data:
|
||||||
|
if tabs:
|
||||||
|
line = convert_tabs(line)
|
||||||
|
if lineends:
|
||||||
|
line = convert_lineends(line)
|
||||||
|
if trails:
|
||||||
|
line = trail_spaces(line)
|
||||||
|
processed_data.append(line)
|
||||||
|
|
||||||
|
with open(file,'w') as fw:
|
||||||
|
fw.writelines(processed_data)
|
||||||
|
|
||||||
|
#findfiles: This method finds all the code files present in current
|
||||||
|
# directory and subdirectories.
|
||||||
|
|
||||||
|
def findfiles(tabs,lineends,trails,verbose):
|
||||||
|
testfiles = []
|
||||||
|
for root, dirs, files in os.walk("./"):
|
||||||
|
for file in files:
|
||||||
|
for extn in ('.c','.cpp','.h','.hpp'):
|
||||||
|
if file.endswith(extn):
|
||||||
|
testfiles.append(os.path.join(root, file))
|
||||||
|
for file in testfiles:
|
||||||
|
processfile(file,tabs,lineends,trails,verbose)
|
||||||
|
|
||||||
|
# Main function
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
parser = OptionParser()
|
||||||
|
parser.add_option("--notabs", dest="tabs", action="store_false", default=True, help="Disable converting tabs to 4 spaces.")
|
||||||
|
parser.add_option("--notrails", dest="trails", action="store_false", default=True, help="Disable removing trailing whitespaces and trailing tabs.")
|
||||||
|
parser.add_option("--nolineends", dest="lineends", action="store_false", default=True, help=" Disable converting line endings to Unix from DOS.")
|
||||||
|
parser.add_option("--verbose", dest="verbose", action="store_true", default=False, help="Prints out the files being processed.")
|
||||||
|
parser.add_option("--git", dest="SHA1", default="", help="Processes only the files present in the particular <SHA1> commit.")
|
||||||
|
parser.add_option('-o', action="store", default=True, help="Default: All the code files (.c,.cpp,.h,.hpp) in the current directory and subdirectories will be processed")
|
||||||
|
|
||||||
|
(options, args) = parser.parse_args()
|
||||||
|
|
||||||
|
if options.SHA1:
|
||||||
|
pl = Popen(["git","show", "--pretty=format:", "--name-only",options.SHA1], stdout=PIPE)
|
||||||
|
cmdout = pl.communicate()[0]
|
||||||
|
gitout=cmdout.split("\n")
|
||||||
|
for file in gitout:
|
||||||
|
print file
|
||||||
|
if file:
|
||||||
|
processfile(file,options.tabs,options.lineends,options.trails,options.verbose)
|
||||||
|
|
||||||
|
|
||||||
|
if not options.SHA1:
|
||||||
|
findfiles(options.tabs,options.lineends,options.trails,options.verbose)
|
||||||
|
|
||||||
|
# start the process by calling main
|
||||||
|
main()
|
||||||
@@ -32,6 +32,9 @@
|
|||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/errno.h>
|
#include <sys/errno.h>
|
||||||
|
#ifdef __linux__
|
||||||
|
#include <sched.h>
|
||||||
|
#endif
|
||||||
#endif // !_WIN32
|
#endif // !_WIN32
|
||||||
|
|
||||||
// declarations
|
// declarations
|
||||||
@@ -251,7 +254,6 @@ void *ThreadPool_WorkerFunc( void *p )
|
|||||||
{
|
{
|
||||||
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
|
cl_uint threadID = ThreadPool_AtomicAdd( (volatile cl_int *) p, 1 );
|
||||||
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
cl_int item = ThreadPool_AtomicAdd( &gRunCount, -1 );
|
||||||
ThreadPool_AtomicAdd( &gRunning, 1 );
|
|
||||||
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
|
// log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
|
||||||
|
|
||||||
while( MAX_COUNT > item )
|
while( MAX_COUNT > item )
|
||||||
@@ -444,7 +446,6 @@ void ThreadPool_Init(void)
|
|||||||
// Check for manual override of multithreading code. We add this for better debuggability.
|
// Check for manual override of multithreading code. We add this for better debuggability.
|
||||||
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
|
if( getenv( "CL_TEST_SINGLE_THREADED" ) )
|
||||||
{
|
{
|
||||||
log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n*** TEST IS INVALID! ***\n");
|
|
||||||
gThreadCount = 1;
|
gThreadCount = 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -458,7 +459,9 @@ void ThreadPool_Init(void)
|
|||||||
|
|
||||||
GetLogicalProcessorInformation( NULL, &length );
|
GetLogicalProcessorInformation( NULL, &length );
|
||||||
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
|
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) malloc( length );
|
||||||
if( buffer != NULL && GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
if( buffer != NULL )
|
||||||
|
{
|
||||||
|
if ( GetLogicalProcessorInformation( buffer, &length ) == TRUE )
|
||||||
{
|
{
|
||||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer;
|
||||||
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
|
while( ptr < &buffer[ length / sizeof( SYSTEM_LOGICAL_PROCESSOR_INFORMATION ) ] )
|
||||||
@@ -475,6 +478,7 @@ void ThreadPool_Init(void)
|
|||||||
}
|
}
|
||||||
++ptr;
|
++ptr;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
free(buffer);
|
free(buffer);
|
||||||
}
|
}
|
||||||
#elif defined (__MINGW32__)
|
#elif defined (__MINGW32__)
|
||||||
@@ -484,6 +488,20 @@ void ThreadPool_Init(void)
|
|||||||
GetSystemInfo( &sysinfo );
|
GetSystemInfo( &sysinfo );
|
||||||
gThreadCount = sysinfo.dwNumberOfProcessors;
|
gThreadCount = sysinfo.dwNumberOfProcessors;
|
||||||
}
|
}
|
||||||
|
#elif defined (__linux__) && !defined(__ANDROID__)
|
||||||
|
cpu_set_t affinity;
|
||||||
|
if ( 0 == sched_getaffinity(0, sizeof(cpu_set_t), &affinity) )
|
||||||
|
{
|
||||||
|
#if !(defined(CPU_COUNT))
|
||||||
|
gThreadCount = 1;
|
||||||
|
#else
|
||||||
|
gThreadCount = CPU_COUNT(&affinity);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||||
|
}
|
||||||
#else // !_WIN32
|
#else // !_WIN32
|
||||||
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
gThreadCount = (cl_int) sysconf(_SC_NPROCESSORS_CONF); // Hopefully your system returns logical cpus here, as does MacOS X
|
||||||
#endif // !_WIN32
|
#endif // !_WIN32
|
||||||
@@ -493,6 +511,18 @@ void ThreadPool_Init(void)
|
|||||||
gThreadCount = 2;
|
gThreadCount = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// When working in 32 bit limit the thread number to 12
|
||||||
|
// This fix was made due to memory issues in integer_ops test
|
||||||
|
// When running integer_ops, the test opens as many threads as the
|
||||||
|
// machine has and each thread allocates a fixed amount of memory
|
||||||
|
// When running this test on dual socket machine in 32-bit, the
|
||||||
|
// process memory is not sufficient and the test fails
|
||||||
|
#if defined(_WIN32) && !defined(_M_X64)
|
||||||
|
if (gThreadCount > 12) {
|
||||||
|
gThreadCount = 12;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
|
//Allow the app to set thread count to <0 for debugging purposes. This will cause the test to run single threaded.
|
||||||
if( gThreadCount < 2 )
|
if( gThreadCount < 2 )
|
||||||
{
|
{
|
||||||
@@ -532,6 +562,7 @@ void ThreadPool_Init(void)
|
|||||||
}
|
}
|
||||||
#endif // !_WIN32
|
#endif // !_WIN32
|
||||||
|
|
||||||
|
gRunning = gThreadCount;
|
||||||
// init threads
|
// init threads
|
||||||
for( i = 0; i < gThreadCount; i++ )
|
for( i = 0; i < gThreadCount; i++ )
|
||||||
{
|
{
|
||||||
@@ -745,6 +776,7 @@ cl_int ThreadPool_Do( TPFuncPtr func_ptr,
|
|||||||
gUserInfo = userInfo;
|
gUserInfo = userInfo;
|
||||||
|
|
||||||
#if defined( _WIN32 )
|
#if defined( _WIN32 )
|
||||||
|
ResetEvent(caller_event);
|
||||||
_WakeAllConditionVariable( cond_var );
|
_WakeAllConditionVariable( cond_var );
|
||||||
LeaveCriticalSection( cond_lock );
|
LeaveCriticalSection( cond_lock );
|
||||||
#else // !_WIN32
|
#else // !_WIN32
|
||||||
|
|||||||
@@ -17,27 +17,51 @@
|
|||||||
#define _COMPAT_H_
|
#define _COMPAT_H_
|
||||||
|
|
||||||
#if defined(_WIN32) && defined (_MSC_VER)
|
#if defined(_WIN32) && defined (_MSC_VER)
|
||||||
|
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
#include <Winbase.h>
|
|
||||||
#include <CL/cl.h>
|
|
||||||
#include <float.h>
|
|
||||||
#include <xmmintrin.h>
|
|
||||||
|
|
||||||
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
|
||||||
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
|
||||||
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
|
||||||
|
|
||||||
#define isfinite(x) _finite(x)
|
|
||||||
|
|
||||||
#if !defined(__cplusplus)
|
|
||||||
typedef char bool;
|
|
||||||
#define inline
|
|
||||||
|
|
||||||
#else
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define EXTERN_C extern "C"
|
||||||
|
#else
|
||||||
|
#define EXTERN_C
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// stdlib.h
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <stdlib.h> // On Windows, _MAX_PATH defined there.
|
||||||
|
|
||||||
|
// llabs appeared in MS C v16 (VS 10/2010).
|
||||||
|
#if defined( _MSC_VER ) && _MSC_VER <= 1500
|
||||||
|
EXTERN_C inline long long llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// stdbool.h
|
||||||
|
//
|
||||||
|
|
||||||
|
// stdbool.h appeared in MS C v18 (VS 12/2013).
|
||||||
|
#if defined( _MSC_VER ) && MSC_VER <= 1700
|
||||||
|
#if !defined(__cplusplus)
|
||||||
|
typedef char bool;
|
||||||
|
#define true 1
|
||||||
|
#define false 0
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#include <stdbool.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// stdint.h
|
||||||
|
//
|
||||||
|
|
||||||
|
// stdint.h appeared in MS C v16 (VS 10/2010) and Intel C v12.
|
||||||
|
#if defined( _MSC_VER ) && ( ! defined( __INTEL_COMPILER ) && _MSC_VER <= 1500 || defined( __INTEL_COMPILER ) && __INTEL_COMPILER < 1200 )
|
||||||
typedef unsigned char uint8_t;
|
typedef unsigned char uint8_t;
|
||||||
typedef char int8_t;
|
typedef char int8_t;
|
||||||
typedef unsigned short uint16_t;
|
typedef unsigned short uint16_t;
|
||||||
@@ -46,25 +70,83 @@ typedef unsigned int uint32_t;
|
|||||||
typedef int int32_t;
|
typedef int int32_t;
|
||||||
typedef unsigned long long uint64_t;
|
typedef unsigned long long uint64_t;
|
||||||
typedef long long int64_t;
|
typedef long long int64_t;
|
||||||
|
#else
|
||||||
#define MAXPATHLEN MAX_PATH
|
#ifndef __STDC_LIMIT_MACROS
|
||||||
|
#define __STDC_LIMIT_MACROS
|
||||||
typedef unsigned short ushort;
|
#endif
|
||||||
typedef unsigned int uint;
|
#include <stdint.h>
|
||||||
typedef unsigned long ulong;
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define INFINITY (FLT_MAX + FLT_MAX)
|
|
||||||
//#define NAN (INFINITY | 1)
|
//
|
||||||
//const static int PINFBITPATT_SP32 = INFINITY;
|
// float.h
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <float.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// fenv.h
|
||||||
|
//
|
||||||
|
|
||||||
|
// fenv.h appeared in MS C v18 (VS 12/2013).
|
||||||
|
#if defined( _MSC_VER ) && _MSC_VER <= 1700 && ! defined( __INTEL_COMPILER )
|
||||||
|
// reimplement fenv.h because windows doesn't have it
|
||||||
|
#define FE_INEXACT 0x0020
|
||||||
|
#define FE_UNDERFLOW 0x0010
|
||||||
|
#define FE_OVERFLOW 0x0008
|
||||||
|
#define FE_DIVBYZERO 0x0004
|
||||||
|
#define FE_INVALID 0x0001
|
||||||
|
#define FE_ALL_EXCEPT 0x003D
|
||||||
|
int fetestexcept(int excepts);
|
||||||
|
int feclearexcept(int excepts);
|
||||||
|
#else
|
||||||
|
#include <fenv.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// math.h
|
||||||
|
//
|
||||||
|
|
||||||
|
#if defined( __INTEL_COMPILER )
|
||||||
|
#include <mathimf.h>
|
||||||
|
#else
|
||||||
|
#include <math.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( _MSC_VER )
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef M_PI
|
#ifndef M_PI
|
||||||
#define M_PI 3.14159265358979323846264338327950288
|
#define M_PI 3.14159265358979323846264338327950288
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( __INTEL_COMPILER )
|
||||||
|
|
||||||
|
#ifndef NAN
|
||||||
|
#define NAN (INFINITY - INFINITY)
|
||||||
|
#endif
|
||||||
|
#ifndef HUGE_VALF
|
||||||
|
#define HUGE_VALF (float)HUGE_VAL
|
||||||
|
#endif
|
||||||
|
#ifndef INFINITY
|
||||||
|
#define INFINITY (FLT_MAX + FLT_MAX)
|
||||||
|
#endif
|
||||||
|
#ifndef isfinite
|
||||||
|
#define isfinite(x) _finite(x)
|
||||||
|
#endif
|
||||||
|
#ifndef isnan
|
||||||
#define isnan( x ) ((x) != (x))
|
#define isnan( x ) ((x) != (x))
|
||||||
|
#endif
|
||||||
|
#ifndef isinf
|
||||||
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
#define isinf( _x) ((_x) == INFINITY || (_x) == -INFINITY)
|
||||||
|
#endif
|
||||||
|
|
||||||
double rint( double x);
|
double rint( double x);
|
||||||
float rintf( float x);
|
float rintf( float x);
|
||||||
@@ -98,27 +180,6 @@ long double remquol( long double x, long double y, int *quo);
|
|||||||
|
|
||||||
long double scalblnl(long double x, long n);
|
long double scalblnl(long double x, long n);
|
||||||
|
|
||||||
inline long long
|
|
||||||
llabs(long long __x) { return __x >= 0 ? __x : -__x; }
|
|
||||||
|
|
||||||
|
|
||||||
// end of math functions
|
|
||||||
|
|
||||||
uint64_t ReadTime( void );
|
|
||||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
|
||||||
|
|
||||||
#define sleep(X) Sleep(1000*X)
|
|
||||||
#define snprintf sprintf_s
|
|
||||||
//#define hypotl _hypot
|
|
||||||
|
|
||||||
float make_nan();
|
|
||||||
float nanf( const char* str);
|
|
||||||
double nan( const char* str);
|
|
||||||
long double nanl( const char* str);
|
|
||||||
|
|
||||||
//#if defined USE_BOOST
|
|
||||||
//#include <boost/math/tr1.hpp>
|
|
||||||
//double hypot(double x, double y);
|
|
||||||
float hypotf(float x, float y);
|
float hypotf(float x, float y);
|
||||||
long double hypotl(long double x, long double y) ;
|
long double hypotl(long double x, long double y) ;
|
||||||
double lgamma(double x);
|
double lgamma(double x);
|
||||||
@@ -143,58 +204,190 @@ double round(double x);
|
|||||||
float roundf(float x);
|
float roundf(float x);
|
||||||
long double roundl(long double x);
|
long double roundl(long double x);
|
||||||
|
|
||||||
int signbit(double x);
|
int cf_signbit(double x);
|
||||||
int signbitf(float x);
|
int cf_signbitf(float x);
|
||||||
|
|
||||||
//bool signbitl(long double x) { return boost::math::tr1::signbit<long double>(x); }
|
// Added in _MSC_VER == 1800 (Visual Studio 2013)
|
||||||
//#endif // USE_BOOST
|
#if _MSC_VER < 1800
|
||||||
|
static int signbit(double x) { return cf_signbit(x); }
|
||||||
|
#endif
|
||||||
|
static int signbitf(float x) { return cf_signbitf(x); }
|
||||||
|
|
||||||
long int lrint (double flt);
|
long int lrint (double flt);
|
||||||
long int lrintf (float flt);
|
long int lrintf (float flt);
|
||||||
|
|
||||||
|
|
||||||
float int2float (int32_t ix);
|
float int2float (int32_t ix);
|
||||||
int32_t float2int (float fx);
|
int32_t float2int (float fx);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ! defined( __INTEL_COMPILER ) || __INTEL_COMPILER < 1300
|
||||||
|
// These functions appeared in Intel C v13.
|
||||||
|
float nanf( const char* str);
|
||||||
|
double nan( const char* str);
|
||||||
|
long double nanl( const char* str);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined( __ANDROID__ )
|
||||||
|
#define log2(X) (log(X)/log(2))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// stdio.h
|
||||||
|
//
|
||||||
|
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
// snprintf added in _MSC_VER == 1900 (Visual Studio 2015)
|
||||||
|
#if _MSC_VER < 1900
|
||||||
|
#define snprintf sprintf_s
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// unistd.h
|
||||||
|
//
|
||||||
|
|
||||||
|
#if defined( _MSC_VER )
|
||||||
|
EXTERN_C unsigned int sleep( unsigned int sec );
|
||||||
|
EXTERN_C int usleep( int usec );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// syscall.h
|
||||||
|
//
|
||||||
|
|
||||||
|
#if defined( __ANDROID__ )
|
||||||
|
// Android bionic's isn't providing SYS_sysctl wrappers.
|
||||||
|
#define SYS__sysctl __NR__sysctl
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
// Enable deprecated syscalls on arm 64-bit.
|
||||||
|
#define __ARCH_WANT_SYSCALL_DEPRECATED
|
||||||
|
// And use the NR variant of syscall too.
|
||||||
|
#define SYS__sysctl __NR__sysctl
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Some tests use _malloca which defined in malloc.h.
|
||||||
|
#if !defined (__APPLE__)
|
||||||
|
#include <malloc.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
//
|
||||||
|
// ???
|
||||||
|
//
|
||||||
|
|
||||||
|
#if defined( _MSC_VER )
|
||||||
|
|
||||||
|
#define MAXPATHLEN _MAX_PATH
|
||||||
|
|
||||||
|
EXTERN_C uint64_t ReadTime( void );
|
||||||
|
EXTERN_C double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||||
|
|
||||||
/** Returns the number of leading 0-bits in x,
|
/** Returns the number of leading 0-bits in x,
|
||||||
starting at the most significant bit position.
|
starting at the most significant bit position.
|
||||||
If x is 0, the result is undefined.
|
If x is 0, the result is undefined.
|
||||||
*/
|
*/
|
||||||
int __builtin_clz(unsigned int pattern);
|
EXTERN_C int __builtin_clz(unsigned int pattern);
|
||||||
|
|
||||||
|
|
||||||
static const double zero= 0.00000000000000000000e+00;
|
|
||||||
#define NAN (INFINITY - INFINITY)
|
|
||||||
#define HUGE_VALF (float)HUGE_VAL
|
|
||||||
|
|
||||||
int usleep(int usec);
|
|
||||||
|
|
||||||
// reimplement fenv.h because windows doesn't have it
|
|
||||||
#define FE_INEXACT 0x0020
|
|
||||||
#define FE_UNDERFLOW 0x0010
|
|
||||||
#define FE_OVERFLOW 0x0008
|
|
||||||
#define FE_DIVBYZERO 0x0004
|
|
||||||
#define FE_INVALID 0x0001
|
|
||||||
#define FE_ALL_EXCEPT 0x003D
|
|
||||||
|
|
||||||
int fetestexcept(int excepts);
|
|
||||||
int feclearexcept(int excepts);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else // !((defined(_WIN32) && defined(_MSC_VER)
|
|
||||||
#if defined(__MINGW32__)
|
|
||||||
#include <windows.h>
|
|
||||||
#define sleep(X) Sleep(1000*X)
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef MIN
|
||||||
|
#define MIN(x,y) (((x)<(y))?(x):(y))
|
||||||
|
#endif
|
||||||
|
#ifndef MAX
|
||||||
|
#define MAX(x,y) (((x)>(y))?(x):(y))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
------------------------------------------------------------------------------------------------
|
||||||
|
WARNING: DO NOT USE THESE MACROS: MAKE_HEX_FLOAT, MAKE_HEX_DOUBLE, MAKE_HEX_LONG.
|
||||||
|
|
||||||
|
This is a typical usage of the macros:
|
||||||
|
|
||||||
|
double yhi = MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-2);
|
||||||
|
|
||||||
|
(taken from math_brute_force/reference_math.c). There are two problems:
|
||||||
|
|
||||||
|
1. There is an error here. On Windows in will produce incorrect result
|
||||||
|
`0x1.5555555555555p+50'. To have a correct result it should be written as
|
||||||
|
`MAKE_HEX_DOUBLE(0x1.5555555555555p-2,0x15555555555555LL,-54)'. A proper value of the
|
||||||
|
third argument is not obvious -- sometimes it should be the same as exponent of the
|
||||||
|
first argument, but sometimes not.
|
||||||
|
|
||||||
|
2. Information is duplicated. It is easy to make a mistake.
|
||||||
|
|
||||||
|
Use HEX_FLT, HEX_DBL, HEX_LDBL macros instead (see them in the bottom of the file).
|
||||||
|
------------------------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
||||||
|
|
||||||
|
#define MAKE_HEX_FLOAT(x,y,z) ((float)ldexp( (float)(y), z))
|
||||||
|
#define MAKE_HEX_DOUBLE(x,y,z) ldexp( (double)(y), z)
|
||||||
|
#define MAKE_HEX_LONG(x,y,z) ((long double) ldexp( (long double)(y), z))
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// Do not use these macros in new code, use HEX_FLT, HEX_DBL, HEX_LDBL instead.
|
||||||
#define MAKE_HEX_FLOAT(x,y,z) x
|
#define MAKE_HEX_FLOAT(x,y,z) x
|
||||||
#define MAKE_HEX_DOUBLE(x,y,z) x
|
#define MAKE_HEX_DOUBLE(x,y,z) x
|
||||||
#define MAKE_HEX_LONG(x,y,z) x
|
#define MAKE_HEX_LONG(x,y,z) x
|
||||||
|
|
||||||
#endif // !((defined(_WIN32) && defined(_MSC_VER)
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
------------------------------------------------------------------------------------------------
|
||||||
|
HEX_FLT, HEXT_DBL, HEX_LDBL -- Create hex floating point literal of type float, double, long
|
||||||
|
double respectively. Arguments:
|
||||||
|
|
||||||
|
sm -- sign of number,
|
||||||
|
int -- integer part of mantissa (without `0x' prefix),
|
||||||
|
fract -- fractional part of mantissa (without decimal point and `L' or `LL' suffixes),
|
||||||
|
se -- sign of exponent,
|
||||||
|
exp -- absolute value of (binary) exponent.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
double yhi = HEX_DBL( +, 1, 5555555555555, -, 2 ); // == 0x1.5555555555555p-2
|
||||||
|
|
||||||
|
Note:
|
||||||
|
|
||||||
|
We have to pass signs as separate arguments because gcc pass negative integer values
|
||||||
|
(e. g. `-2') into a macro as two separate tokens, so `HEX_FLT( 1, 0, -2 )' produces result
|
||||||
|
`0x1.0p- 2' (note a space between minus and two) which is not a correct floating point
|
||||||
|
literal.
|
||||||
|
------------------------------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#if defined ( _MSC_VER ) && ! defined( __INTEL_COMPILER )
|
||||||
|
// If compiler does not support hex floating point literals:
|
||||||
|
#define HEX_FLT( sm, int, fract, se, exp ) sm ldexpf( (float)( 0x ## int ## fract ## UL ), se exp + ilogbf( (float) 0x ## int ) - ilogbf( ( float )( 0x ## int ## fract ## UL ) ) )
|
||||||
|
#define HEX_DBL( sm, int, fract, se, exp ) sm ldexp( (double)( 0x ## int ## fract ## ULL ), se exp + ilogb( (double) 0x ## int ) - ilogb( ( double )( 0x ## int ## fract ## ULL ) ) )
|
||||||
|
#define HEX_LDBL( sm, int, fract, se, exp ) sm ldexpl( (long double)( 0x ## int ## fract ## ULL ), se exp + ilogbl( (long double) 0x ## int ) - ilogbl( ( long double )( 0x ## int ## fract ## ULL ) ) )
|
||||||
|
#else
|
||||||
|
// If compiler supports hex floating point literals: just concatenate all the parts into a literal.
|
||||||
|
#define HEX_FLT( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## F
|
||||||
|
#define HEX_DBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp
|
||||||
|
#define HEX_LDBL( sm, int, fract, se, exp ) sm 0x ## int ## . ## fract ## p ## se ## exp ## L
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__MINGW32__)
|
||||||
|
#include <Windows.h>
|
||||||
|
#define sleep(sec) Sleep((sec) * 1000)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif // _COMPAT_H_
|
#endif // _COMPAT_H_
|
||||||
|
|||||||
@@ -16,15 +16,14 @@
|
|||||||
#ifndef _conversions_h
|
#ifndef _conversions_h
|
||||||
#define _conversions_h
|
#define _conversions_h
|
||||||
|
|
||||||
|
#include "compat.h"
|
||||||
|
|
||||||
#include "errorHelpers.h"
|
#include "errorHelpers.h"
|
||||||
#include "mt19937.h"
|
#include "mt19937.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <math.h>
|
|
||||||
#include <float.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "compat.h"
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|||||||
@@ -25,7 +25,7 @@
|
|||||||
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
// rounding mode (e.g. round toward zero), then these functions should also set the reference device into that rounding mode.
|
||||||
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
#if defined( __APPLE__ ) || defined( _MSC_VER ) || defined( __linux__ ) || defined (__MINGW32__)
|
||||||
typedef int FPU_mode_type;
|
typedef int FPU_mode_type;
|
||||||
#if defined( __i386__ ) || defined( __x86_64__ )
|
#if defined( __i386__ ) || defined( __x86_64__ ) || defined( _MSC_VER ) || defined( __MINGW32__ )
|
||||||
#include <xmmintrin.h>
|
#include <xmmintrin.h>
|
||||||
#elif defined( __PPC__ )
|
#elif defined( __PPC__ )
|
||||||
#include <fpu_control.h>
|
#include <fpu_control.h>
|
||||||
@@ -45,6 +45,12 @@
|
|||||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*mode = fpscr;
|
||||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr | (1U << 24)));
|
||||||
|
// Add 64 bit support
|
||||||
|
#elif defined (__aarch64__)
|
||||||
|
unsigned fpcr;
|
||||||
|
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
|
||||||
|
*mode = fpcr;
|
||||||
|
__asm__ volatile ("msr fpcr, %0" :: "r"(fpcr | (1U << 24)));
|
||||||
#else
|
#else
|
||||||
#error ForceFTZ needs an implentation
|
#error ForceFTZ needs an implentation
|
||||||
#endif
|
#endif
|
||||||
@@ -64,6 +70,12 @@
|
|||||||
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
__asm__ volatile ("fmrx %0, fpscr" : "=r"(fpscr));
|
||||||
*mode = fpscr;
|
*mode = fpscr;
|
||||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
__asm__ volatile ("fmxr fpscr, %0" :: "r"(fpscr & ~(1U << 24)));
|
||||||
|
// Add 64 bit support
|
||||||
|
#elif defined (__aarch64__)
|
||||||
|
unsigned fpcr;
|
||||||
|
__asm__ volatile ("mrs %0, fpcr" : "=r"(fpcr));
|
||||||
|
*mode = fpcr;
|
||||||
|
__asm__ volatile ("msr fpcr, %0" :: "r"(fpcr & ~(1U << 24)));
|
||||||
#else
|
#else
|
||||||
#error DisableFTZ needs an implentation
|
#error DisableFTZ needs an implentation
|
||||||
#endif
|
#endif
|
||||||
@@ -78,6 +90,9 @@
|
|||||||
fpu_control = *mode;
|
fpu_control = *mode;
|
||||||
#elif defined (__arm__)
|
#elif defined (__arm__)
|
||||||
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
__asm__ volatile ("fmxr fpscr, %0" :: "r"(*mode));
|
||||||
|
// Add 64 bit support
|
||||||
|
#elif defined (__aarch64__)
|
||||||
|
__asm__ volatile ("msr fpcr, %0" :: "r"(*mode));
|
||||||
#else
|
#else
|
||||||
#error RestoreFPState needs an implementation
|
#error RestoreFPState needs an implementation
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -395,7 +395,7 @@ int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_ob
|
|||||||
}
|
}
|
||||||
|
|
||||||
free( list );
|
free( list );
|
||||||
return ( i < count ) ? true : false;
|
return ( i < count ) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t get_pixel_bytes( const cl_image_format *fmt );
|
size_t get_pixel_bytes( const cl_image_format *fmt );
|
||||||
@@ -545,8 +545,19 @@ void * align_malloc(size_t size, size_t alignment)
|
|||||||
return _aligned_malloc(size, alignment);
|
return _aligned_malloc(size, alignment);
|
||||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||||
void * ptr = NULL;
|
void * ptr = NULL;
|
||||||
|
// alignemnt must be a power of two and multiple of sizeof(void *).
|
||||||
|
if ( alignment < sizeof( void * ) )
|
||||||
|
{
|
||||||
|
alignment = sizeof( void * );
|
||||||
|
}
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
ptr = memalign(alignment, size);
|
||||||
|
if ( ptr )
|
||||||
|
return ptr;
|
||||||
|
#else
|
||||||
if (0 == posix_memalign(&ptr, alignment, size))
|
if (0 == posix_memalign(&ptr, alignment, size))
|
||||||
return ptr;
|
return ptr;
|
||||||
|
#endif
|
||||||
return NULL;
|
return NULL;
|
||||||
#elif defined(__MINGW32__)
|
#elif defined(__MINGW32__)
|
||||||
return __mingw_aligned_malloc(size, alignment);
|
return __mingw_aligned_malloc(size, alignment);
|
||||||
@@ -555,6 +566,7 @@ void * align_malloc(size_t size, size_t alignment)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void align_free(void * ptr)
|
void align_free(void * ptr)
|
||||||
{
|
{
|
||||||
#if defined(_WIN32) && defined(_MSC_VER)
|
#if defined(_WIN32) && defined(_MSC_VER)
|
||||||
|
|||||||
@@ -13,15 +13,18 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
#if defined(_WIN32) && defined (_MSC_VER)
|
|
||||||
|
|
||||||
#include "compat.h"
|
#include "compat.h"
|
||||||
#include <math.h>
|
|
||||||
#include <float.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include <CL/cl_platform.h>
|
|
||||||
|
|
||||||
|
#if defined ( _MSC_VER )
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include <CL/cl.h>
|
||||||
|
|
||||||
|
#include <windows.h>
|
||||||
|
|
||||||
|
#if ! defined( __INTEL_COMPILER )
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
@@ -387,86 +390,6 @@ long double log2l(long double x)
|
|||||||
return 1.44269504088896340735992468100189214L * log(x);
|
return 1.44269504088896340735992468100189214L * log(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// misc functions
|
|
||||||
//
|
|
||||||
///////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
/*
|
|
||||||
// This function is commented out because the Windows implementation should never call munmap.
|
|
||||||
// If it is calling it, we have a bug. Please file a bugzilla.
|
|
||||||
int munmap(void *addr, size_t len)
|
|
||||||
{
|
|
||||||
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
|
||||||
|
|
||||||
return (int)VirtualAlloc( (LPVOID)addr, len,
|
|
||||||
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
uint64_t ReadTime( void )
|
|
||||||
{
|
|
||||||
LARGE_INTEGER current;
|
|
||||||
QueryPerformanceCounter(¤t);
|
|
||||||
return (uint64_t)current.QuadPart;
|
|
||||||
}
|
|
||||||
|
|
||||||
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
|
||||||
{
|
|
||||||
static double PerformanceFrequency = 0.0;
|
|
||||||
|
|
||||||
if (PerformanceFrequency == 0.0) {
|
|
||||||
LARGE_INTEGER frequency;
|
|
||||||
QueryPerformanceFrequency(&frequency);
|
|
||||||
PerformanceFrequency = (double) frequency.QuadPart;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
|
||||||
}
|
|
||||||
|
|
||||||
float make_nan()
|
|
||||||
{
|
|
||||||
/* This is the IEEE 754 single-precision format:
|
|
||||||
unsigned int mantissa: 22;
|
|
||||||
unsigned int quiet_nan: 1;
|
|
||||||
unsigned int exponent: 8;
|
|
||||||
unsigned int negative: 1;
|
|
||||||
*/
|
|
||||||
//const static unsigned
|
|
||||||
static const int32_t _nan = 0x7fc00000;
|
|
||||||
return *(const float*)(&_nan);
|
|
||||||
}
|
|
||||||
|
|
||||||
float nanf( const char* str)
|
|
||||||
{
|
|
||||||
cl_uint u = atoi( str );
|
|
||||||
u |= 0x7fc00000U;
|
|
||||||
return *( float*)(&u);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
double nan( const char* str)
|
|
||||||
{
|
|
||||||
cl_ulong u = atoi( str );
|
|
||||||
u |= 0x7ff8000000000000ULL;
|
|
||||||
return *( double*)(&u);
|
|
||||||
}
|
|
||||||
|
|
||||||
// double check this implementatation
|
|
||||||
long double nanl( const char* str)
|
|
||||||
{
|
|
||||||
union
|
|
||||||
{
|
|
||||||
long double f;
|
|
||||||
struct { cl_ulong m; cl_ushort sexp; }u;
|
|
||||||
}u;
|
|
||||||
u.u.sexp = 0x7fff;
|
|
||||||
u.u.m = 0x8000000000000000ULL | atoi( str );
|
|
||||||
|
|
||||||
return u.f;
|
|
||||||
}
|
|
||||||
|
|
||||||
double trunc(double x)
|
double trunc(double x)
|
||||||
{
|
{
|
||||||
double absx = fabs(x);
|
double absx = fabs(x);
|
||||||
@@ -589,7 +512,167 @@ long double roundl(long double x)
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
int signbit(double x)
|
float cbrtf( float x )
|
||||||
|
{
|
||||||
|
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
||||||
|
return copysignf( z, x );
|
||||||
|
}
|
||||||
|
|
||||||
|
double cbrt( double x )
|
||||||
|
{
|
||||||
|
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
||||||
|
}
|
||||||
|
|
||||||
|
long int lrint (double x)
|
||||||
|
{
|
||||||
|
double absx = fabs(x);
|
||||||
|
|
||||||
|
if( x >= (double) LONG_MAX )
|
||||||
|
return LONG_MAX;
|
||||||
|
|
||||||
|
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
||||||
|
{
|
||||||
|
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
||||||
|
double rounded = x + magic;
|
||||||
|
rounded -= magic;
|
||||||
|
return (long int) rounded;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (long int) x;
|
||||||
|
}
|
||||||
|
|
||||||
|
long int lrintf (float x)
|
||||||
|
{
|
||||||
|
float absx = fabsf(x);
|
||||||
|
|
||||||
|
if( x >= (float) LONG_MAX )
|
||||||
|
return LONG_MAX;
|
||||||
|
|
||||||
|
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
||||||
|
{
|
||||||
|
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
||||||
|
float rounded = x + magic;
|
||||||
|
rounded -= magic;
|
||||||
|
return (long int) rounded;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (long int) x;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// fenv functions
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
#if _MSC_VER < 1900
|
||||||
|
int fetestexcept(int excepts)
|
||||||
|
{
|
||||||
|
unsigned int status = _statusfp();
|
||||||
|
return excepts & (
|
||||||
|
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
||||||
|
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
||||||
|
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
||||||
|
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
||||||
|
((status & _SW_INVALID) ? FE_INVALID : 0)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
int feclearexcept(int excepts)
|
||||||
|
{
|
||||||
|
_clearfp();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // __INTEL_COMPILER
|
||||||
|
|
||||||
|
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER < 1300)
|
||||||
|
|
||||||
|
float make_nan()
|
||||||
|
{
|
||||||
|
/* This is the IEEE 754 single-precision format:
|
||||||
|
unsigned int mantissa: 22;
|
||||||
|
unsigned int quiet_nan: 1;
|
||||||
|
unsigned int exponent: 8;
|
||||||
|
unsigned int negative: 1;
|
||||||
|
*/
|
||||||
|
//const static unsigned
|
||||||
|
static const int32_t _nan = 0x7fc00000;
|
||||||
|
return *(const float*)(&_nan);
|
||||||
|
}
|
||||||
|
|
||||||
|
float nanf( const char* str)
|
||||||
|
{
|
||||||
|
cl_uint u = atoi( str );
|
||||||
|
u |= 0x7fc00000U;
|
||||||
|
return *( float*)(&u);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double nan( const char* str)
|
||||||
|
{
|
||||||
|
cl_ulong u = atoi( str );
|
||||||
|
u |= 0x7ff8000000000000ULL;
|
||||||
|
return *( double*)(&u);
|
||||||
|
}
|
||||||
|
|
||||||
|
// double check this implementatation
|
||||||
|
long double nanl( const char* str)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
long double f;
|
||||||
|
struct { cl_ulong m; cl_ushort sexp; }u;
|
||||||
|
}u;
|
||||||
|
u.u.sexp = 0x7fff;
|
||||||
|
u.u.m = 0x8000000000000000ULL | atoi( str );
|
||||||
|
|
||||||
|
return u.f;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// misc functions
|
||||||
|
//
|
||||||
|
///////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
/*
|
||||||
|
// This function is commented out because the Windows implementation should never call munmap.
|
||||||
|
// If it is calling it, we have a bug. Please file a bugzilla.
|
||||||
|
int munmap(void *addr, size_t len)
|
||||||
|
{
|
||||||
|
// FIXME: this is not correct. munmap is like free() http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
|
||||||
|
|
||||||
|
return (int)VirtualAlloc( (LPVOID)addr, len,
|
||||||
|
MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
uint64_t ReadTime( void )
|
||||||
|
{
|
||||||
|
LARGE_INTEGER current;
|
||||||
|
QueryPerformanceCounter(¤t);
|
||||||
|
return (uint64_t)current.QuadPart;
|
||||||
|
}
|
||||||
|
|
||||||
|
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
||||||
|
{
|
||||||
|
static double PerformanceFrequency = 0.0;
|
||||||
|
|
||||||
|
if (PerformanceFrequency == 0.0) {
|
||||||
|
LARGE_INTEGER frequency;
|
||||||
|
QueryPerformanceFrequency(&frequency);
|
||||||
|
PerformanceFrequency = (double) frequency.QuadPart;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
|
||||||
|
}
|
||||||
|
|
||||||
|
int cf_signbit(double x)
|
||||||
{
|
{
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
@@ -600,7 +683,7 @@ int signbit(double x)
|
|||||||
return u.u >> 63;
|
return u.u >> 63;
|
||||||
}
|
}
|
||||||
|
|
||||||
int signbitf(float x)
|
int cf_signbitf(float x)
|
||||||
{
|
{
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
@@ -611,17 +694,6 @@ int signbitf(float x)
|
|||||||
return u.u >> 31;
|
return u.u >> 31;
|
||||||
}
|
}
|
||||||
|
|
||||||
float cbrtf( float x )
|
|
||||||
{
|
|
||||||
float z = pow( fabs((double) x), 1.0 / 3.0 );
|
|
||||||
return copysignf( z, x );
|
|
||||||
}
|
|
||||||
|
|
||||||
double cbrt( double x )
|
|
||||||
{
|
|
||||||
return copysign( pow( fabs( x ), 1.0 / 3.0 ), x );
|
|
||||||
}
|
|
||||||
|
|
||||||
float int2float (int32_t ix)
|
float int2float (int32_t ix)
|
||||||
{
|
{
|
||||||
union {
|
union {
|
||||||
@@ -642,7 +714,7 @@ int32_t float2int (float fx)
|
|||||||
return u.i;
|
return u.i;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(_WIN64)
|
#if !defined(_WIN64)
|
||||||
/** Returns the number of leading 0-bits in x,
|
/** Returns the number of leading 0-bits in x,
|
||||||
starting at the most significant bit position.
|
starting at the most significant bit position.
|
||||||
If x is 0, the result is undefined.
|
If x is 0, the result is undefined.
|
||||||
@@ -682,45 +754,10 @@ int __builtin_clz(unsigned int pattern)
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //defined(_MSC_VER) && !defined(_WIN64)
|
#endif // !defined(_WIN64)
|
||||||
|
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
#include <emmintrin.h>
|
#include <emmintrin.h>
|
||||||
long int lrint (double x)
|
|
||||||
{
|
|
||||||
double absx = fabs(x);
|
|
||||||
|
|
||||||
if( x >= (double) LONG_MAX )
|
|
||||||
return LONG_MAX;
|
|
||||||
|
|
||||||
if( absx < 4503599627370496.0 /* 0x1.0p52 */ )
|
|
||||||
{
|
|
||||||
double magic = copysign( 4503599627370496.0 /* 0x1.0p52 */, x );
|
|
||||||
double rounded = x + magic;
|
|
||||||
rounded -= magic;
|
|
||||||
return (long int) rounded;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (long int) x;
|
|
||||||
}
|
|
||||||
|
|
||||||
long int lrintf (float x)
|
|
||||||
{
|
|
||||||
float absx = fabsf(x);
|
|
||||||
|
|
||||||
if( x >= (float) LONG_MAX )
|
|
||||||
return LONG_MAX;
|
|
||||||
|
|
||||||
if( absx < 8388608.0f /* 0x1.0p23f */ )
|
|
||||||
{
|
|
||||||
float magic = copysignf( 8388608.0f /* 0x1.0p23f */, x );
|
|
||||||
float rounded = x + magic;
|
|
||||||
rounded -= magic;
|
|
||||||
return (long int) rounded;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (long int) x;
|
|
||||||
}
|
|
||||||
|
|
||||||
int usleep(int usec)
|
int usleep(int usec)
|
||||||
{
|
{
|
||||||
@@ -728,22 +765,10 @@ int usleep(int usec)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int fetestexcept(int excepts)
|
unsigned int sleep( unsigned int sec )
|
||||||
{
|
{
|
||||||
unsigned int status = _statusfp();
|
Sleep( sec * 1000 );
|
||||||
return excepts & (
|
|
||||||
((status & _SW_INEXACT) ? FE_INEXACT : 0) |
|
|
||||||
((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0) |
|
|
||||||
((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0) |
|
|
||||||
((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0) |
|
|
||||||
((status & _SW_INVALID) ? FE_INVALID : 0)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
int feclearexcept(int excepts)
|
|
||||||
{
|
|
||||||
_clearfp();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //defined(_WIN32)
|
#endif // defined( _MSC_VER )
|
||||||
|
|||||||
@@ -59,8 +59,14 @@ static void * align_malloc(size_t size, size_t alignment)
|
|||||||
return _aligned_malloc(size, alignment);
|
return _aligned_malloc(size, alignment);
|
||||||
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
#elif defined(__linux__) || defined (linux) || defined(__APPLE__)
|
||||||
void * ptr = NULL;
|
void * ptr = NULL;
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
ptr = memalign(alignment, size);
|
||||||
|
if ( ptr )
|
||||||
|
return ptr;
|
||||||
|
#else
|
||||||
if (0 == posix_memalign(&ptr, alignment, size))
|
if (0 == posix_memalign(&ptr, alignment, size))
|
||||||
return ptr;
|
return ptr;
|
||||||
|
#endif
|
||||||
return NULL;
|
return NULL;
|
||||||
#elif defined(__MINGW32__)
|
#elif defined(__MINGW32__)
|
||||||
return __mingw_aligned_malloc(size, alignment);
|
return __mingw_aligned_malloc(size, alignment);
|
||||||
|
|||||||
564
test_common/harness/os_helpers.cpp
Normal file
564
test_common/harness/os_helpers.cpp
Normal file
@@ -0,0 +1,564 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#include "os_helpers.h"
|
||||||
|
#include "errorHelpers.h"
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// C++ interface.
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
#include <cerrno> // errno, error constants
|
||||||
|
#include <climits> // PATH_MAX
|
||||||
|
#include <cstdlib> // abort, _splitpath, _makepath
|
||||||
|
#include <cstring> // strdup, strerror_r
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#define CHECK_PTR( ptr ) \
|
||||||
|
if ( (ptr) == NULL ) { \
|
||||||
|
abort(); \
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef std::vector< char > buffer_t;
|
||||||
|
|
||||||
|
#if ! defined( PATH_MAX )
|
||||||
|
#define PATH_MAX 1000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int const _size = PATH_MAX + 1; // Initial buffer size for path.
|
||||||
|
int const _count = 8; // How many times we will try to double buffer size.
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
// MacOS X
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined( __APPLE__ )
|
||||||
|
|
||||||
|
|
||||||
|
#include <mach-o/dyld.h> // _NSGetExecutablePath
|
||||||
|
#include <libgen.h> // dirname
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
std::string
|
||||||
|
_err_msg(
|
||||||
|
int err, // Error number (e. g. errno).
|
||||||
|
int level // Nesting level, for avoiding infinite recursion.
|
||||||
|
) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
There are 3 incompatible versions of strerror_r:
|
||||||
|
|
||||||
|
char * strerror_r( int, char *, size_t ); // GNU version
|
||||||
|
int strerror_r( int, char *, size_t ); // BSD version
|
||||||
|
int strerror_r( int, char *, size_t ); // XSI version
|
||||||
|
|
||||||
|
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
// BSD version of strerror_r.
|
||||||
|
buffer_t buffer( 100 );
|
||||||
|
int count = _count;
|
||||||
|
for ( ; ; ) {
|
||||||
|
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
||||||
|
if ( rc == EINVAL ) {
|
||||||
|
// Error code is not recognized, but anyway we got the message.
|
||||||
|
return & buffer.front();
|
||||||
|
} else if ( rc == ERANGE ) {
|
||||||
|
// Buffer is not enough.
|
||||||
|
if ( count > 0 ) {
|
||||||
|
// Enlarge the buffer.
|
||||||
|
-- count;
|
||||||
|
buffer.resize( buffer.size() * 2 );
|
||||||
|
} else {
|
||||||
|
std::stringstream ostr;
|
||||||
|
ostr
|
||||||
|
<< "Error " << err << " "
|
||||||
|
<< "(Getting error message failed: "
|
||||||
|
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
||||||
|
<< ")";
|
||||||
|
return ostr.str();
|
||||||
|
}; // if
|
||||||
|
} else if ( rc == 0 ) {
|
||||||
|
// We got the message.
|
||||||
|
return & buffer.front();
|
||||||
|
} else {
|
||||||
|
std::stringstream ostr;
|
||||||
|
ostr
|
||||||
|
<< "Error " << err << " "
|
||||||
|
<< "(Getting error message failed: "
|
||||||
|
<< ( level < 2 ? _err_msg( rc, level + 1 ) : "Oops" )
|
||||||
|
<< ")";
|
||||||
|
return ostr.str();
|
||||||
|
}; // if
|
||||||
|
}; // forever
|
||||||
|
|
||||||
|
} // _err_msg
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
dir_sep(
|
||||||
|
) {
|
||||||
|
return "/";
|
||||||
|
} // dir_sep
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
exe_path(
|
||||||
|
) {
|
||||||
|
buffer_t path( _size );
|
||||||
|
int count = _count;
|
||||||
|
for ( ; ; ) {
|
||||||
|
uint32_t size = path.size();
|
||||||
|
int rc = _NSGetExecutablePath( & path.front(), & size );
|
||||||
|
if ( rc == 0 ) {
|
||||||
|
break;
|
||||||
|
}; // if
|
||||||
|
if ( count > 0 ) {
|
||||||
|
-- count;
|
||||||
|
path.resize( size );
|
||||||
|
} else {
|
||||||
|
log_error(
|
||||||
|
"ERROR: Getting executable path failed: "
|
||||||
|
"_NSGetExecutablePath failed: Buffer of %lu bytes is still too small\n",
|
||||||
|
(unsigned long) path.size()
|
||||||
|
);
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
}; // forever
|
||||||
|
return & path.front();
|
||||||
|
} // exe_path
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
exe_dir(
|
||||||
|
) {
|
||||||
|
std::string path = exe_path();
|
||||||
|
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
||||||
|
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
||||||
|
return dirname( & buffer.front() );
|
||||||
|
} // exe_dir
|
||||||
|
|
||||||
|
|
||||||
|
#endif // __APPLE__
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
// Linux
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined( __linux__ )
|
||||||
|
|
||||||
|
|
||||||
|
#include <cerrno> // errno
|
||||||
|
#include <libgen.h> // dirname
|
||||||
|
#include <unistd.h> // readlink
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
std::string
|
||||||
|
_err_msg(
|
||||||
|
int err,
|
||||||
|
int level
|
||||||
|
) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
There are 3 incompatible versions of strerror_r:
|
||||||
|
|
||||||
|
char * strerror_r( int, char *, size_t ); // GNU version
|
||||||
|
int strerror_r( int, char *, size_t ); // BSD version
|
||||||
|
int strerror_r( int, char *, size_t ); // XSI version
|
||||||
|
|
||||||
|
BSD version returns error code, while XSI version returns 0 or -1 and sets errno.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(__ANDROID__) || ( ( _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 ) && ! _GNU_SOURCE )
|
||||||
|
|
||||||
|
// XSI version of strerror_r.
|
||||||
|
#warning Not tested!
|
||||||
|
buffer_t buffer( 200 );
|
||||||
|
int count = _count;
|
||||||
|
for ( ; ; ) {
|
||||||
|
int rc = strerror_r( err, & buffer.front(), buffer.size() );
|
||||||
|
if ( rc == -1 ) {
|
||||||
|
int _err = errno;
|
||||||
|
if ( _err == ERANGE ) {
|
||||||
|
if ( count > 0 ) {
|
||||||
|
// Enlarge the buffer.
|
||||||
|
-- count;
|
||||||
|
buffer.resize( buffer.size() * 2 );
|
||||||
|
} else {
|
||||||
|
std::stringstream ostr;
|
||||||
|
ostr
|
||||||
|
<< "Error " << err << " "
|
||||||
|
<< "(Getting error message failed: "
|
||||||
|
<< "Buffer of " << buffer.size() << " bytes is still too small"
|
||||||
|
<< ")";
|
||||||
|
return ostr.str();
|
||||||
|
}; // if
|
||||||
|
} else {
|
||||||
|
std::stringstream ostr;
|
||||||
|
ostr
|
||||||
|
<< "Error " << err << " "
|
||||||
|
<< "(Getting error message failed: "
|
||||||
|
<< ( level < 2 ? _err_msg( _err, level + 1 ) : "Oops" )
|
||||||
|
<< ")";
|
||||||
|
return ostr.str();
|
||||||
|
}; // if
|
||||||
|
} else {
|
||||||
|
// We got the message.
|
||||||
|
return & buffer.front();
|
||||||
|
}; // if
|
||||||
|
}; // forever
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// GNU version of strerror_r.
|
||||||
|
char buffer[ 2000 ];
|
||||||
|
return strerror_r( err, buffer, sizeof( buffer ) );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // _err_msg
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
dir_sep(
|
||||||
|
) {
|
||||||
|
return "/";
|
||||||
|
} // dir_sep
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
exe_path(
|
||||||
|
) {
|
||||||
|
|
||||||
|
static std::string const exe = "/proc/self/exe";
|
||||||
|
|
||||||
|
buffer_t path( _size );
|
||||||
|
int count = _count; // Max number of iterations.
|
||||||
|
|
||||||
|
for ( ; ; ) {
|
||||||
|
|
||||||
|
ssize_t len = readlink( exe.c_str(), & path.front(), path.size() );
|
||||||
|
|
||||||
|
if ( len < 0 ) {
|
||||||
|
// Oops.
|
||||||
|
int err = errno;
|
||||||
|
log_error(
|
||||||
|
"ERROR: Getting executable path failed: "
|
||||||
|
"Reading symlink `%s' failed: %s\n",
|
||||||
|
exe.c_str(), err_msg( err ).c_str()
|
||||||
|
);
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
if ( len < path.size() ) {
|
||||||
|
// We got the path.
|
||||||
|
path.resize( len );
|
||||||
|
break;
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
// Oops, buffer is too small.
|
||||||
|
if ( count > 0 ) {
|
||||||
|
-- count;
|
||||||
|
// Enlarge the buffer.
|
||||||
|
path.resize( path.size() * 2 );
|
||||||
|
} else {
|
||||||
|
log_error(
|
||||||
|
"ERROR: Getting executable path failed: "
|
||||||
|
"Reading symlink `%s' failed: Buffer of %lu bytes is still too small\n",
|
||||||
|
exe.c_str(),
|
||||||
|
(unsigned long) path.size()
|
||||||
|
);
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
}; // forever
|
||||||
|
|
||||||
|
return std::string( & path.front(), path.size() );
|
||||||
|
|
||||||
|
} // exe_path
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
exe_dir(
|
||||||
|
) {
|
||||||
|
std::string path = exe_path();
|
||||||
|
// We cannot pass path.c_str() to `dirname' bacause `dirname' modifies its argument.
|
||||||
|
buffer_t buffer( path.c_str(), path.c_str() + path.size() + 1 ); // Copy with trailing zero.
|
||||||
|
return dirname( & buffer.front() );
|
||||||
|
} // exe_dir
|
||||||
|
|
||||||
|
#endif // __linux__
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
// MS Windows
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#if defined( _WIN32 )
|
||||||
|
|
||||||
|
|
||||||
|
#include <windows.h>
|
||||||
|
#if defined( max )
|
||||||
|
#undef max
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <cctype>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
std::string
|
||||||
|
_err_msg(
|
||||||
|
int err,
|
||||||
|
int level
|
||||||
|
) {
|
||||||
|
|
||||||
|
std::string msg;
|
||||||
|
|
||||||
|
LPSTR buffer = NULL;
|
||||||
|
DWORD flags =
|
||||||
|
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||||
|
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||||
|
FORMAT_MESSAGE_IGNORE_INSERTS;
|
||||||
|
|
||||||
|
DWORD len =
|
||||||
|
FormatMessageA(
|
||||||
|
flags,
|
||||||
|
NULL,
|
||||||
|
err,
|
||||||
|
LANG_USER_DEFAULT,
|
||||||
|
reinterpret_cast< LPSTR >( & buffer ),
|
||||||
|
0,
|
||||||
|
NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
if ( buffer == NULL || len == 0 ) {
|
||||||
|
|
||||||
|
int _err = GetLastError();
|
||||||
|
char str[1024] = { 0 };
|
||||||
|
snprintf(str, sizeof(str), "Error 0x%08x (Getting error message failed: %s )", err, ( level < 2 ? _err_msg( _err, level + 1 ).c_str() : "Oops" ));
|
||||||
|
msg = std::string(str);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// Trim trailing whitespace (including `\r' and `\n').
|
||||||
|
while ( len > 0 && isspace( buffer[ len - 1 ] ) ) {
|
||||||
|
-- len;
|
||||||
|
}; // while
|
||||||
|
|
||||||
|
// Drop trailing full stop.
|
||||||
|
if ( len > 0 && buffer[ len - 1 ] == '.' ) {
|
||||||
|
-- len;
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
msg.assign( buffer, len );
|
||||||
|
|
||||||
|
}; //if
|
||||||
|
|
||||||
|
if ( buffer != NULL ) {
|
||||||
|
LocalFree( buffer );
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
|
||||||
|
} // _get_err_msg
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
dir_sep(
|
||||||
|
) {
|
||||||
|
return "\\";
|
||||||
|
} // dir_sep
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
exe_path(
|
||||||
|
) {
|
||||||
|
|
||||||
|
buffer_t path( _size );
|
||||||
|
int count = _count;
|
||||||
|
|
||||||
|
for ( ; ; ) {
|
||||||
|
|
||||||
|
DWORD len = GetModuleFileNameA( NULL, & path.front(), path.size() );
|
||||||
|
|
||||||
|
if ( len == 0 ) {
|
||||||
|
int err = GetLastError();
|
||||||
|
log_error( "ERROR: Getting executable path failed: %s\n", err_msg( err ).c_str() );
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
if ( len < path.size() ) {
|
||||||
|
path.resize( len );
|
||||||
|
break;
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
// Buffer too small.
|
||||||
|
if ( count > 0 ) {
|
||||||
|
-- count;
|
||||||
|
path.resize( path.size() * 2 );
|
||||||
|
} else {
|
||||||
|
log_error(
|
||||||
|
"ERROR: Getting executable path failed: "
|
||||||
|
"Buffer of %lu bytes is still too small\n",
|
||||||
|
(unsigned long) path.size()
|
||||||
|
);
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
|
||||||
|
}; // forever
|
||||||
|
|
||||||
|
return std::string( & path.front(), path.size() );
|
||||||
|
|
||||||
|
} // exe_path
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
exe_dir(
|
||||||
|
) {
|
||||||
|
|
||||||
|
std::string exe = exe_path();
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
|
// Splitting path into components.
|
||||||
|
buffer_t drv( _MAX_DRIVE );
|
||||||
|
buffer_t dir( _MAX_DIR );
|
||||||
|
count = _count;
|
||||||
|
#if defined(_MSC_VER)
|
||||||
|
for ( ; ; ) {
|
||||||
|
int rc =
|
||||||
|
_splitpath_s(
|
||||||
|
exe.c_str(),
|
||||||
|
& drv.front(), drv.size(),
|
||||||
|
& dir.front(), dir.size(),
|
||||||
|
NULL, 0, // We need neither name
|
||||||
|
NULL, 0 // nor extension
|
||||||
|
);
|
||||||
|
if ( rc == 0 ) {
|
||||||
|
break;
|
||||||
|
} else if ( rc == ERANGE ) {
|
||||||
|
if ( count > 0 ) {
|
||||||
|
-- count;
|
||||||
|
// Buffer is too small, but it is not clear which one.
|
||||||
|
// So we have to enlarge all.
|
||||||
|
drv.resize( drv.size() * 2 );
|
||||||
|
dir.resize( dir.size() * 2 );
|
||||||
|
} else {
|
||||||
|
log_error(
|
||||||
|
"ERROR: Getting executable path failed: "
|
||||||
|
"Splitting path `%s' to components failed: "
|
||||||
|
"Buffers of %lu and %lu bytes are still too small\n",
|
||||||
|
exe.c_str(),
|
||||||
|
(unsigned long) drv.size(),
|
||||||
|
(unsigned long) dir.size()
|
||||||
|
);
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
} else {
|
||||||
|
log_error(
|
||||||
|
"ERROR: Getting executable path failed: "
|
||||||
|
"Splitting path `%s' to components failed: %s\n",
|
||||||
|
exe.c_str(),
|
||||||
|
err_msg( rc ).c_str()
|
||||||
|
);
|
||||||
|
exit( 2 );
|
||||||
|
}; // if
|
||||||
|
}; // forever
|
||||||
|
|
||||||
|
#else // __MINGW32__
|
||||||
|
|
||||||
|
// MinGW does not have the "secure" _splitpath_s, use the insecure version instead.
|
||||||
|
_splitpath(
|
||||||
|
exe.c_str(),
|
||||||
|
& drv.front(),
|
||||||
|
& dir.front(),
|
||||||
|
NULL, // We need neither name
|
||||||
|
NULL // nor extension
|
||||||
|
);
|
||||||
|
#endif // __MINGW32__
|
||||||
|
|
||||||
|
// Combining components back to path.
|
||||||
|
// I failed with "secure" `_makepath_s'. If buffer is too small, instead of returning
|
||||||
|
// ERANGE, `_makepath_s' pops up dialog box and offers to debug the program. D'oh!
|
||||||
|
// So let us try to guess the size of result and go with insecure `_makepath'.
|
||||||
|
buffer_t path( std::max( drv.size() + dir.size(), size_t( _MAX_PATH ) ) + 10 );
|
||||||
|
_makepath( & path.front(), & drv.front(), & dir.front(), NULL, NULL );
|
||||||
|
|
||||||
|
return & path.front();
|
||||||
|
|
||||||
|
} // exe_dir
|
||||||
|
|
||||||
|
|
||||||
|
#endif // _WIN32
|
||||||
|
|
||||||
|
|
||||||
|
std::string
|
||||||
|
err_msg(
|
||||||
|
int err
|
||||||
|
) {
|
||||||
|
|
||||||
|
return _err_msg( err, 0 );
|
||||||
|
|
||||||
|
} // err_msg
|
||||||
|
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// C interface.
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
|
||||||
|
char *
|
||||||
|
get_err_msg(
|
||||||
|
int err
|
||||||
|
) {
|
||||||
|
char * msg = strdup( err_msg( err ).c_str() );
|
||||||
|
CHECK_PTR( msg );
|
||||||
|
return msg;
|
||||||
|
} // get_err_msg
|
||||||
|
|
||||||
|
|
||||||
|
char *
|
||||||
|
get_dir_sep(
|
||||||
|
) {
|
||||||
|
char * sep = strdup( dir_sep().c_str() );
|
||||||
|
CHECK_PTR( sep );
|
||||||
|
return sep;
|
||||||
|
} // get_dir_sep
|
||||||
|
|
||||||
|
|
||||||
|
char *
|
||||||
|
get_exe_path(
|
||||||
|
) {
|
||||||
|
char * path = strdup( exe_path().c_str() );
|
||||||
|
CHECK_PTR( path );
|
||||||
|
return path;
|
||||||
|
} // get_exe_path
|
||||||
|
|
||||||
|
|
||||||
|
char *
|
||||||
|
get_exe_dir(
|
||||||
|
) {
|
||||||
|
char * dir = strdup( exe_dir().c_str() );
|
||||||
|
CHECK_PTR( dir );
|
||||||
|
return dir;
|
||||||
|
} // get_exe_dir
|
||||||
|
|
||||||
|
|
||||||
|
// end of file //
|
||||||
53
test_common/harness/os_helpers.h
Normal file
53
test_common/harness/os_helpers.h
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#ifndef __os_helpers_h__
|
||||||
|
#define __os_helpers_h__
|
||||||
|
|
||||||
|
#include "compat.h"
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
// C++ interface.
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
std::string err_msg( int err );
|
||||||
|
std::string dir_sep();
|
||||||
|
std::string exe_path();
|
||||||
|
std::string exe_dir();
|
||||||
|
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
// C interface.
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
|
char * get_err_msg( int err ); // Returns system error message. Subject to free.
|
||||||
|
char * get_dir_sep(); // Returns dir separator. Subject to free.
|
||||||
|
char * get_exe_path(); // Returns path of current executable. Subject to free.
|
||||||
|
char * get_exe_dir(); // Returns dir of current executable. Subject to free.
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif // __cplusplus
|
||||||
|
|
||||||
|
#endif // __os_helpers_h__
|
||||||
42
test_common/harness/parseParameters.cpp
Normal file
42
test_common/harness/parseParameters.cpp
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#include "parseParameters.h"
|
||||||
|
#include "errorHelpers.h"
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
bool is_power_of_two(int number)
|
||||||
|
{
|
||||||
|
return number && !(number & (number - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor)
|
||||||
|
{
|
||||||
|
const char *arg_temp = strchr(&arg[1], ']');
|
||||||
|
if (arg_temp != 0)
|
||||||
|
{
|
||||||
|
int new_factor = atoi(&arg[1]);
|
||||||
|
arg = arg_temp; // Advance until ']'
|
||||||
|
if (is_power_of_two(new_factor))
|
||||||
|
{
|
||||||
|
log_info("\n Wimpy reduction factor changed from %d to %d \n", wimpyReductionFactor, new_factor);
|
||||||
|
wimpyReductionFactor = new_factor;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info("\n WARNING: Incorrect wimpy reduction factor %d, must be power of 2. The default value will be used.\n", new_factor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
24
test_common/harness/parseParameters.h
Normal file
24
test_common/harness/parseParameters.h
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2017 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
#ifndef _parseParameters_h
|
||||||
|
#define _parseParameters_h
|
||||||
|
|
||||||
|
#include "compat.h"
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
extern void parseWimpyReductionFactor(const char *&arg, int &wimpyReductionFactor);
|
||||||
|
|
||||||
|
#endif // _parseParameters_h
|
||||||
@@ -15,7 +15,69 @@
|
|||||||
//
|
//
|
||||||
#include "rounding_mode.h"
|
#include "rounding_mode.h"
|
||||||
|
|
||||||
#if !(defined(_WIN32) && defined(_MSC_VER))
|
#if (defined( __arm__ ) || defined(__aarch64__))
|
||||||
|
#define FPSCR_FZ (1 << 24) // Flush-To-Zero mode
|
||||||
|
#define FPSCR_ROUND_MASK (3 << 22) // Rounding mode:
|
||||||
|
|
||||||
|
#define _ARM_FE_FTZ 0x1000000
|
||||||
|
#define _ARM_FE_NFTZ 0x0
|
||||||
|
#if defined(__aarch64__)
|
||||||
|
#define _FPU_GETCW(cw) __asm__ ("MRS %0,FPCR" : "=r" (cw))
|
||||||
|
#define _FPU_SETCW(cw) __asm__ ("MSR FPCR,%0" : :"ri" (cw))
|
||||||
|
#else
|
||||||
|
#define _FPU_GETCW(cw) __asm__ ("VMRS %0,FPSCR" : "=r" (cw))
|
||||||
|
#define _FPU_SETCW(cw) __asm__ ("VMSR FPSCR,%0" : :"ri" (cw))
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined( __arm__ ) || defined(__aarch64__)) && defined( __GNUC__ )
|
||||||
|
#define _ARM_FE_TONEAREST 0x0
|
||||||
|
#define _ARM_FE_UPWARD 0x400000
|
||||||
|
#define _ARM_FE_DOWNWARD 0x800000
|
||||||
|
#define _ARM_FE_TOWARDZERO 0xc00000
|
||||||
|
RoundingMode set_round( RoundingMode r, Type outType )
|
||||||
|
{
|
||||||
|
static const int flt_rounds[ kRoundingModeCount ] = { _ARM_FE_TONEAREST,
|
||||||
|
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
||||||
|
static const int int_rounds[ kRoundingModeCount ] = { _ARM_FE_TOWARDZERO,
|
||||||
|
_ARM_FE_TONEAREST, _ARM_FE_UPWARD, _ARM_FE_DOWNWARD, _ARM_FE_TOWARDZERO };
|
||||||
|
const int *p = int_rounds;
|
||||||
|
if( outType == kfloat || outType == kdouble )
|
||||||
|
p = flt_rounds;
|
||||||
|
|
||||||
|
int fpscr = 0;
|
||||||
|
RoundingMode oldRound = get_round();
|
||||||
|
|
||||||
|
_FPU_GETCW(fpscr);
|
||||||
|
_FPU_SETCW( p[r] | (fpscr & ~FPSCR_ROUND_MASK));
|
||||||
|
|
||||||
|
return oldRound;
|
||||||
|
}
|
||||||
|
|
||||||
|
RoundingMode get_round( void )
|
||||||
|
{
|
||||||
|
int fpscr;
|
||||||
|
int oldRound;
|
||||||
|
|
||||||
|
_FPU_GETCW(fpscr);
|
||||||
|
oldRound = (fpscr & FPSCR_ROUND_MASK);
|
||||||
|
|
||||||
|
switch( oldRound )
|
||||||
|
{
|
||||||
|
case _ARM_FE_TONEAREST:
|
||||||
|
return kRoundToNearestEven;
|
||||||
|
case _ARM_FE_UPWARD:
|
||||||
|
return kRoundUp;
|
||||||
|
case _ARM_FE_DOWNWARD:
|
||||||
|
return kRoundDown;
|
||||||
|
case _ARM_FE_TOWARDZERO:
|
||||||
|
return kRoundTowardZero;
|
||||||
|
}
|
||||||
|
|
||||||
|
return kDefaultRoundingMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif !(defined(_WIN32) && defined(_MSC_VER))
|
||||||
RoundingMode set_round( RoundingMode r, Type outType )
|
RoundingMode set_round( RoundingMode r, Type outType )
|
||||||
{
|
{
|
||||||
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
static const int flt_rounds[ kRoundingModeCount ] = { FE_TONEAREST, FE_TONEAREST, FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO };
|
||||||
@@ -135,8 +197,10 @@ void *FlushToZero( void )
|
|||||||
union{ int i; void *p; }u = { _mm_getcsr() };
|
union{ int i; void *p; }u = { _mm_getcsr() };
|
||||||
_mm_setcsr( u.i | 0x8040 );
|
_mm_setcsr( u.i | 0x8040 );
|
||||||
return u.p;
|
return u.p;
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ ) || defined(__aarch64__)
|
||||||
// processor is already in FTZ mode -- do nothing
|
int fpscr;
|
||||||
|
_FPU_GETCW(fpscr);
|
||||||
|
_FPU_SETCW(fpscr | FPSCR_FZ);
|
||||||
return NULL;
|
return NULL;
|
||||||
#elif defined( __PPC__ )
|
#elif defined( __PPC__ )
|
||||||
fpu_control_t flags = 0;
|
fpu_control_t flags = 0;
|
||||||
@@ -159,8 +223,10 @@ void UnFlushToZero( void *p)
|
|||||||
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
#if defined( __i386__ ) || defined( __x86_64__ ) || defined(_MSC_VER)
|
||||||
union{ void *p; int i; }u = { p };
|
union{ void *p; int i; }u = { p };
|
||||||
_mm_setcsr( u.i );
|
_mm_setcsr( u.i );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ ) || defined(__aarch64__)
|
||||||
// processor is already in FTZ mode -- do nothing
|
int fpscr;
|
||||||
|
_FPU_GETCW(fpscr);
|
||||||
|
_FPU_SETCW(fpscr & ~FPSCR_FZ);
|
||||||
#elif defined( __PPC__)
|
#elif defined( __PPC__)
|
||||||
fpu_control_t flags = 0;
|
fpu_control_t flags = 0;
|
||||||
_FPU_GETCW(flags);
|
_FPU_GETCW(flags);
|
||||||
|
|||||||
@@ -16,15 +16,11 @@
|
|||||||
#ifndef __ROUNDING_MODE_H__
|
#ifndef __ROUNDING_MODE_H__
|
||||||
#define __ROUNDING_MODE_H__
|
#define __ROUNDING_MODE_H__
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include "compat.h"
|
||||||
|
|
||||||
#if (defined(_WIN32) && defined (_MSC_VER))
|
#if (defined(_WIN32) && defined (_MSC_VER))
|
||||||
// need for _controlfp_s and rouinding modes in RoundingMode
|
|
||||||
#include <float.h>
|
|
||||||
#include "errorHelpers.h"
|
#include "errorHelpers.h"
|
||||||
#include "testHarness.h"
|
#include "testHarness.h"
|
||||||
#else
|
|
||||||
#include <fenv.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
|
|||||||
@@ -13,16 +13,10 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
//
|
//
|
||||||
|
#include "compat.h"
|
||||||
#include "threadTesting.h"
|
#include "threadTesting.h"
|
||||||
#include "errorHelpers.h"
|
#include "errorHelpers.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
#if !defined(_WIN32)
|
|
||||||
#include <stdbool.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <math.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#if !defined(_WIN32)
|
#if !defined(_WIN32)
|
||||||
|
|||||||
8
test_common/miniz/CMakeLists.txt
Normal file
8
test_common/miniz/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
set(TARGET_NAME miniz)
|
||||||
|
|
||||||
|
add_library(
|
||||||
|
${TARGET_NAME}
|
||||||
|
STATIC
|
||||||
|
miniz.c
|
||||||
|
miniz.h
|
||||||
|
)
|
||||||
4153
test_common/miniz/miniz.c
Normal file
4153
test_common/miniz/miniz.c
Normal file
File diff suppressed because it is too large
Load Diff
749
test_common/miniz/miniz.h
Normal file
749
test_common/miniz/miniz.h
Normal file
@@ -0,0 +1,749 @@
|
|||||||
|
#ifndef MINIZ_HEADER_INCLUDED
|
||||||
|
#define MINIZ_HEADER_INCLUDED
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
|
||||||
|
// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux
|
||||||
|
#define MINIZ_NO_TIME
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
|
||||||
|
#include <time.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
|
||||||
|
// MINIZ_X86_OR_X64_CPU is only used to help set the below macros.
|
||||||
|
#define MINIZ_X86_OR_X64_CPU 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (__BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU
|
||||||
|
// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian.
|
||||||
|
#define MINIZ_LITTLE_ENDIAN 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MINIZ_X86_OR_X64_CPU
|
||||||
|
// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses.
|
||||||
|
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
|
||||||
|
// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions).
|
||||||
|
#define MINIZ_HAS_64BIT_REGISTERS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Return status codes. MZ_PARAM_ERROR is non-standard.
|
||||||
|
enum {
|
||||||
|
MZ_OK = 0,
|
||||||
|
MZ_STREAM_END = 1,
|
||||||
|
MZ_NEED_DICT = 2,
|
||||||
|
MZ_ERRNO = -1,
|
||||||
|
MZ_STREAM_ERROR = -2,
|
||||||
|
MZ_DATA_ERROR = -3,
|
||||||
|
MZ_MEM_ERROR = -4,
|
||||||
|
MZ_BUF_ERROR = -5,
|
||||||
|
MZ_VERSION_ERROR = -6,
|
||||||
|
MZ_PARAM_ERROR = -10000
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef unsigned long mz_ulong;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------- zlib-style API Definitions.
|
||||||
|
|
||||||
|
// mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap.
|
||||||
|
void mz_free(void *p);
|
||||||
|
|
||||||
|
#define MZ_ADLER32_INIT (1)
|
||||||
|
// mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL.
|
||||||
|
mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);
|
||||||
|
|
||||||
|
#define MZ_CRC32_INIT (0)
|
||||||
|
// mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL.
|
||||||
|
mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);
|
||||||
|
|
||||||
|
// Compression strategies.
|
||||||
|
enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 };
|
||||||
|
|
||||||
|
// Method
|
||||||
|
#define MZ_DEFLATED 8
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_ZLIB_APIS
|
||||||
|
|
||||||
|
// Heap allocation callbacks.
|
||||||
|
// Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long.
|
||||||
|
typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
|
||||||
|
typedef void (*mz_free_func)(void *opaque, void *address);
|
||||||
|
typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);
|
||||||
|
|
||||||
|
#define MZ_VERSION "9.1.15"
|
||||||
|
#define MZ_VERNUM 0x91F0
|
||||||
|
#define MZ_VER_MAJOR 9
|
||||||
|
#define MZ_VER_MINOR 1
|
||||||
|
#define MZ_VER_REVISION 15
|
||||||
|
#define MZ_VER_SUBREVISION 0
|
||||||
|
|
||||||
|
// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs).
|
||||||
|
enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 };
|
||||||
|
|
||||||
|
// Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL.
|
||||||
|
enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 };
|
||||||
|
|
||||||
|
// Window bits
|
||||||
|
#define MZ_DEFAULT_WINDOW_BITS 15
|
||||||
|
|
||||||
|
struct mz_internal_state;
|
||||||
|
|
||||||
|
// Compression/decompression stream struct.
|
||||||
|
typedef struct mz_stream_s
|
||||||
|
{
|
||||||
|
const unsigned char *next_in; // pointer to next byte to read
|
||||||
|
unsigned int avail_in; // number of bytes available at next_in
|
||||||
|
mz_ulong total_in; // total number of bytes consumed so far
|
||||||
|
|
||||||
|
unsigned char *next_out; // pointer to next byte to write
|
||||||
|
unsigned int avail_out; // number of bytes that can be written to next_out
|
||||||
|
mz_ulong total_out; // total number of bytes produced so far
|
||||||
|
|
||||||
|
char *msg; // error msg (unused)
|
||||||
|
struct mz_internal_state *state; // internal state, allocated by zalloc/zfree
|
||||||
|
|
||||||
|
mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc)
|
||||||
|
mz_free_func zfree; // optional heap free function (defaults to free)
|
||||||
|
void *opaque; // heap alloc function user pointer
|
||||||
|
|
||||||
|
int data_type; // data_type (unused)
|
||||||
|
mz_ulong adler; // adler32 of the source or uncompressed data
|
||||||
|
mz_ulong reserved; // not used
|
||||||
|
} mz_stream;
|
||||||
|
|
||||||
|
typedef mz_stream *mz_streamp;
|
||||||
|
|
||||||
|
// Returns the version string of miniz.c.
|
||||||
|
const char *mz_version(void);
|
||||||
|
|
||||||
|
// mz_deflateInit() initializes a compressor with default options:
|
||||||
|
// Parameters:
|
||||||
|
// pStream must point to an initialized mz_stream struct.
|
||||||
|
// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION].
|
||||||
|
// level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio.
|
||||||
|
// (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.)
|
||||||
|
// Return values:
|
||||||
|
// MZ_OK on success.
|
||||||
|
// MZ_STREAM_ERROR if the stream is bogus.
|
||||||
|
// MZ_PARAM_ERROR if the input parameters are bogus.
|
||||||
|
// MZ_MEM_ERROR on out of memory.
|
||||||
|
int mz_deflateInit(mz_streamp pStream, int level);
|
||||||
|
|
||||||
|
// mz_deflateInit2() is like mz_deflate(), except with more control:
|
||||||
|
// Additional parameters:
|
||||||
|
// method must be MZ_DEFLATED
|
||||||
|
// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer)
|
||||||
|
// mem_level must be between [1, 9] (it's checked but ignored by miniz.c)
|
||||||
|
int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);
|
||||||
|
|
||||||
|
// Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2().
|
||||||
|
int mz_deflateReset(mz_streamp pStream);
|
||||||
|
|
||||||
|
// mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible.
|
||||||
|
// Parameters:
|
||||||
|
// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
|
||||||
|
// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH.
|
||||||
|
// Return values:
|
||||||
|
// MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full).
|
||||||
|
// MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore.
|
||||||
|
// MZ_STREAM_ERROR if the stream is bogus.
|
||||||
|
// MZ_PARAM_ERROR if one of the parameters is invalid.
|
||||||
|
// MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.)
|
||||||
|
int mz_deflate(mz_streamp pStream, int flush);
|
||||||
|
|
||||||
|
// mz_deflateEnd() deinitializes a compressor:
|
||||||
|
// Return values:
|
||||||
|
// MZ_OK on success.
|
||||||
|
// MZ_STREAM_ERROR if the stream is bogus.
|
||||||
|
int mz_deflateEnd(mz_streamp pStream);
|
||||||
|
|
||||||
|
// mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH.
|
||||||
|
mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);
|
||||||
|
|
||||||
|
// Single-call compression functions mz_compress() and mz_compress2():
|
||||||
|
// Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure.
|
||||||
|
int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
|
||||||
|
int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);
|
||||||
|
|
||||||
|
// mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress().
|
||||||
|
mz_ulong mz_compressBound(mz_ulong source_len);
|
||||||
|
|
||||||
|
// Initializes a decompressor.
|
||||||
|
int mz_inflateInit(mz_streamp pStream);
|
||||||
|
|
||||||
|
// mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer:
|
||||||
|
// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate).
|
||||||
|
int mz_inflateInit2(mz_streamp pStream, int window_bits);
|
||||||
|
|
||||||
|
// Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible.
|
||||||
|
// Parameters:
|
||||||
|
// pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members.
|
||||||
|
// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH.
|
||||||
|
// On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster).
|
||||||
|
// MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data.
|
||||||
|
// Return values:
|
||||||
|
// MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full.
|
||||||
|
// MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified.
|
||||||
|
// MZ_STREAM_ERROR if the stream is bogus.
|
||||||
|
// MZ_DATA_ERROR if the deflate stream is invalid.
|
||||||
|
// MZ_PARAM_ERROR if one of the parameters is invalid.
|
||||||
|
// MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again
|
||||||
|
// with more input data, or with more room in the output buffer (except when using single call decompression, described above).
|
||||||
|
int mz_inflate(mz_streamp pStream, int flush);
|
||||||
|
|
||||||
|
// Deinitializes a decompressor.
|
||||||
|
int mz_inflateEnd(mz_streamp pStream);
|
||||||
|
|
||||||
|
// Single-call decompression.
|
||||||
|
// Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure.
|
||||||
|
int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
|
||||||
|
|
||||||
|
// Returns a string description of the specified error code, or NULL if the error code is invalid.
|
||||||
|
const char *mz_error(int err);
|
||||||
|
|
||||||
|
// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports.
|
||||||
|
// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project.
|
||||||
|
#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
|
||||||
|
typedef unsigned char Byte;
|
||||||
|
typedef unsigned int uInt;
|
||||||
|
typedef mz_ulong uLong;
|
||||||
|
typedef Byte Bytef;
|
||||||
|
typedef uInt uIntf;
|
||||||
|
typedef char charf;
|
||||||
|
typedef int intf;
|
||||||
|
typedef void *voidpf;
|
||||||
|
typedef uLong uLongf;
|
||||||
|
typedef void *voidp;
|
||||||
|
typedef void *const voidpc;
|
||||||
|
#define Z_NULL 0
|
||||||
|
#define Z_NO_FLUSH MZ_NO_FLUSH
|
||||||
|
#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
|
||||||
|
#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
|
||||||
|
#define Z_FULL_FLUSH MZ_FULL_FLUSH
|
||||||
|
#define Z_FINISH MZ_FINISH
|
||||||
|
#define Z_BLOCK MZ_BLOCK
|
||||||
|
#define Z_OK MZ_OK
|
||||||
|
#define Z_STREAM_END MZ_STREAM_END
|
||||||
|
#define Z_NEED_DICT MZ_NEED_DICT
|
||||||
|
#define Z_ERRNO MZ_ERRNO
|
||||||
|
#define Z_STREAM_ERROR MZ_STREAM_ERROR
|
||||||
|
#define Z_DATA_ERROR MZ_DATA_ERROR
|
||||||
|
#define Z_MEM_ERROR MZ_MEM_ERROR
|
||||||
|
#define Z_BUF_ERROR MZ_BUF_ERROR
|
||||||
|
#define Z_VERSION_ERROR MZ_VERSION_ERROR
|
||||||
|
#define Z_PARAM_ERROR MZ_PARAM_ERROR
|
||||||
|
#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
|
||||||
|
#define Z_BEST_SPEED MZ_BEST_SPEED
|
||||||
|
#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
|
||||||
|
#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
|
||||||
|
#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
|
||||||
|
#define Z_FILTERED MZ_FILTERED
|
||||||
|
#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
|
||||||
|
#define Z_RLE MZ_RLE
|
||||||
|
#define Z_FIXED MZ_FIXED
|
||||||
|
#define Z_DEFLATED MZ_DEFLATED
|
||||||
|
#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
|
||||||
|
#define alloc_func mz_alloc_func
|
||||||
|
#define free_func mz_free_func
|
||||||
|
#define internal_state mz_internal_state
|
||||||
|
#define z_stream mz_stream
|
||||||
|
#define deflateInit mz_deflateInit
|
||||||
|
#define deflateInit2 mz_deflateInit2
|
||||||
|
#define deflateReset mz_deflateReset
|
||||||
|
#define deflate mz_deflate
|
||||||
|
#define deflateEnd mz_deflateEnd
|
||||||
|
#define deflateBound mz_deflateBound
|
||||||
|
#define compress mz_compress
|
||||||
|
#define compress2 mz_compress2
|
||||||
|
#define compressBound mz_compressBound
|
||||||
|
#define inflateInit mz_inflateInit
|
||||||
|
#define inflateInit2 mz_inflateInit2
|
||||||
|
#define inflate mz_inflate
|
||||||
|
#define inflateEnd mz_inflateEnd
|
||||||
|
#define uncompress mz_uncompress
|
||||||
|
#define crc32 mz_crc32
|
||||||
|
#define adler32 mz_adler32
|
||||||
|
#define MAX_WBITS 15
|
||||||
|
#define MAX_MEM_LEVEL 9
|
||||||
|
#define zError mz_error
|
||||||
|
#define ZLIB_VERSION MZ_VERSION
|
||||||
|
#define ZLIB_VERNUM MZ_VERNUM
|
||||||
|
#define ZLIB_VER_MAJOR MZ_VER_MAJOR
|
||||||
|
#define ZLIB_VER_MINOR MZ_VER_MINOR
|
||||||
|
#define ZLIB_VER_REVISION MZ_VER_REVISION
|
||||||
|
#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
|
||||||
|
#define zlibVersion mz_version
|
||||||
|
#define zlib_version mz_version()
|
||||||
|
#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
|
||||||
|
|
||||||
|
#endif // MINIZ_NO_ZLIB_APIS
|
||||||
|
|
||||||
|
// ------------------- Types and macros
|
||||||
|
|
||||||
|
typedef unsigned char mz_uint8;
|
||||||
|
typedef signed short mz_int16;
|
||||||
|
typedef unsigned short mz_uint16;
|
||||||
|
typedef unsigned int mz_uint32;
|
||||||
|
typedef unsigned int mz_uint;
|
||||||
|
typedef long long mz_int64;
|
||||||
|
typedef unsigned long long mz_uint64;
|
||||||
|
typedef int mz_bool;
|
||||||
|
|
||||||
|
#define MZ_FALSE (0)
|
||||||
|
#define MZ_TRUE (1)
|
||||||
|
|
||||||
|
// An attempt to work around MSVC's spammy "warning C4127: conditional expression is constant" message.
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define MZ_MACRO_END while (0, 0)
|
||||||
|
#else
|
||||||
|
#define MZ_MACRO_END while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ------------------- ZIP archive reading/writing
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_ARCHIVE_APIS
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
MZ_ZIP_MAX_IO_BUF_SIZE = 64*1024,
|
||||||
|
MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260,
|
||||||
|
MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
mz_uint32 m_file_index;
|
||||||
|
mz_uint32 m_central_dir_ofs;
|
||||||
|
mz_uint16 m_version_made_by;
|
||||||
|
mz_uint16 m_version_needed;
|
||||||
|
mz_uint16 m_bit_flag;
|
||||||
|
mz_uint16 m_method;
|
||||||
|
#ifndef MINIZ_NO_TIME
|
||||||
|
time_t m_time;
|
||||||
|
#endif
|
||||||
|
mz_uint32 m_crc32;
|
||||||
|
mz_uint64 m_comp_size;
|
||||||
|
mz_uint64 m_uncomp_size;
|
||||||
|
mz_uint16 m_internal_attr;
|
||||||
|
mz_uint32 m_external_attr;
|
||||||
|
mz_uint64 m_local_header_ofs;
|
||||||
|
mz_uint32 m_comment_size;
|
||||||
|
char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];
|
||||||
|
char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];
|
||||||
|
} mz_zip_archive_file_stat;
|
||||||
|
|
||||||
|
typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
|
||||||
|
typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
|
||||||
|
|
||||||
|
struct mz_zip_internal_state_tag;
|
||||||
|
typedef struct mz_zip_internal_state_tag mz_zip_internal_state;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
MZ_ZIP_MODE_INVALID = 0,
|
||||||
|
MZ_ZIP_MODE_READING = 1,
|
||||||
|
MZ_ZIP_MODE_WRITING = 2,
|
||||||
|
MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
|
||||||
|
} mz_zip_mode;
|
||||||
|
|
||||||
|
typedef struct mz_zip_archive_tag
|
||||||
|
{
|
||||||
|
mz_uint64 m_archive_size;
|
||||||
|
mz_uint64 m_central_directory_file_ofs;
|
||||||
|
mz_uint m_total_files;
|
||||||
|
mz_zip_mode m_zip_mode;
|
||||||
|
|
||||||
|
mz_uint m_file_offset_alignment;
|
||||||
|
|
||||||
|
mz_alloc_func m_pAlloc;
|
||||||
|
mz_free_func m_pFree;
|
||||||
|
mz_realloc_func m_pRealloc;
|
||||||
|
void *m_pAlloc_opaque;
|
||||||
|
|
||||||
|
mz_file_read_func m_pRead;
|
||||||
|
mz_file_write_func m_pWrite;
|
||||||
|
void *m_pIO_opaque;
|
||||||
|
|
||||||
|
mz_zip_internal_state *m_pState;
|
||||||
|
|
||||||
|
} mz_zip_archive;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
|
||||||
|
MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
|
||||||
|
MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
|
||||||
|
MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800
|
||||||
|
} mz_zip_flags;
|
||||||
|
|
||||||
|
// ZIP archive reading
|
||||||
|
|
||||||
|
// Inits a ZIP archive reader.
|
||||||
|
// These functions read and validate the archive's central directory.
|
||||||
|
mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint32 flags);
|
||||||
|
mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint32 flags);
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_STDIO
|
||||||
|
mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Returns the total number of files in the archive.
|
||||||
|
mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);
|
||||||
|
|
||||||
|
// Returns detailed information about an archive file entry.
|
||||||
|
mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);
|
||||||
|
|
||||||
|
// Determines if an archive file entry is a directory entry.
|
||||||
|
mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);
|
||||||
|
mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);
|
||||||
|
|
||||||
|
// Retrieves the filename of an archive file entry.
|
||||||
|
// Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename.
|
||||||
|
mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);
|
||||||
|
|
||||||
|
// Attempts to locates a file in the archive's central directory.
|
||||||
|
// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH
|
||||||
|
// Returns -1 if the file cannot be found.
|
||||||
|
int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
|
||||||
|
|
||||||
|
// Extracts a archive file to a memory buffer using no memory allocation.
|
||||||
|
mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
|
||||||
|
mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
|
||||||
|
|
||||||
|
// Extracts a archive file to a memory buffer.
|
||||||
|
mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
|
||||||
|
mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);
|
||||||
|
|
||||||
|
// Extracts a archive file to a dynamically allocated heap buffer.
|
||||||
|
void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
|
||||||
|
void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);
|
||||||
|
|
||||||
|
// Extracts a archive file using a callback function to output the file's data.
|
||||||
|
mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
|
||||||
|
mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_STDIO
|
||||||
|
// Extracts a archive file to a disk file and sets its last accessed and modified times.
|
||||||
|
// This function only extracts files, not archive directory records.
|
||||||
|
mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
|
||||||
|
mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used.
|
||||||
|
mz_bool mz_zip_reader_end(mz_zip_archive *pZip);
|
||||||
|
|
||||||
|
// ZIP archive writing
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
|
||||||
|
|
||||||
|
// Inits a ZIP archive writer.
|
||||||
|
mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
|
||||||
|
mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_STDIO
|
||||||
|
mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive.
|
||||||
|
// For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called.
|
||||||
|
// For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it).
|
||||||
|
// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL.
|
||||||
|
// Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before
|
||||||
|
// the archive is finalized the file's central directory will be hosed.
|
||||||
|
mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
|
||||||
|
|
||||||
|
// Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive.
|
||||||
|
// To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer.
|
||||||
|
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||||
|
mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);
|
||||||
|
mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);
|
||||||
|
|
||||||
|
#ifndef MINIZ_NO_STDIO
|
||||||
|
// Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive.
|
||||||
|
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||||
|
mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Adds a file to an archive by fully cloning the data from another archive.
|
||||||
|
// This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields.
|
||||||
|
mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint file_index);
|
||||||
|
|
||||||
|
// Finalizes the archive by writing the central directory records followed by the end of central directory record.
|
||||||
|
// After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end().
|
||||||
|
// An archive must be manually finalized by calling this function for it to be valid.
|
||||||
|
mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);
|
||||||
|
mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, size_t *pSize);
|
||||||
|
|
||||||
|
// Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used.
|
||||||
|
// Note for the archive to be valid, it must have been finalized before ending.
|
||||||
|
mz_bool mz_zip_writer_end(mz_zip_archive *pZip);
|
||||||
|
|
||||||
|
// Misc. high-level helper functions:
|
||||||
|
|
||||||
|
// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive.
|
||||||
|
// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION.
|
||||||
|
mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
|
||||||
|
|
||||||
|
// Reads a single file from an archive into a heap block.
|
||||||
|
// Returns NULL on failure.
|
||||||
|
void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags);
|
||||||
|
|
||||||
|
#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
|
||||||
|
|
||||||
|
#endif // #ifndef MINIZ_NO_ARCHIVE_APIS
|
||||||
|
|
||||||
|
// ------------------- Low-level Decompression API Definitions
|
||||||
|
|
||||||
|
// Decompression flags used by tinfl_decompress().
|
||||||
|
// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream.
|
||||||
|
// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input.
|
||||||
|
// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB).
|
||||||
|
// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes.
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
|
||||||
|
TINFL_FLAG_HAS_MORE_INPUT = 2,
|
||||||
|
TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
|
||||||
|
TINFL_FLAG_COMPUTE_ADLER32 = 8
|
||||||
|
};
|
||||||
|
|
||||||
|
// High level decompression functions:
|
||||||
|
// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc().
|
||||||
|
// On entry:
|
||||||
|
// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress.
|
||||||
|
// On return:
|
||||||
|
// Function returns a pointer to the decompressed data, or NULL on failure.
|
||||||
|
// *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data.
|
||||||
|
// The caller must call mz_free() on the returned block when it's no longer needed.
|
||||||
|
void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
|
||||||
|
|
||||||
|
// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory.
|
||||||
|
// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success.
|
||||||
|
#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
|
||||||
|
size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
|
||||||
|
|
||||||
|
// tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer.
|
||||||
|
// Returns 1 on success or 0 on failure.
|
||||||
|
typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
|
||||||
|
int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||||
|
|
||||||
|
struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor;
|
||||||
|
|
||||||
|
// Max size of LZ dictionary.
|
||||||
|
#define TINFL_LZ_DICT_SIZE 32768
|
||||||
|
|
||||||
|
// Return status.
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
TINFL_STATUS_BAD_PARAM = -3,
|
||||||
|
TINFL_STATUS_ADLER32_MISMATCH = -2,
|
||||||
|
TINFL_STATUS_FAILED = -1,
|
||||||
|
TINFL_STATUS_DONE = 0,
|
||||||
|
TINFL_STATUS_NEEDS_MORE_INPUT = 1,
|
||||||
|
TINFL_STATUS_HAS_MORE_OUTPUT = 2
|
||||||
|
} tinfl_status;
|
||||||
|
|
||||||
|
// Initializes the decompressor to its initial state.
|
||||||
|
#define tinfl_init(r) do { (r)->m_state = 0; } MZ_MACRO_END
|
||||||
|
#define tinfl_get_adler32(r) (r)->m_check_adler32
|
||||||
|
|
||||||
|
// Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability.
|
||||||
|
// This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output.
|
||||||
|
tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);
|
||||||
|
|
||||||
|
// Internal/private bits follow.
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19,
|
||||||
|
TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0];
|
||||||
|
mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
|
||||||
|
} tinfl_huff_table;
|
||||||
|
|
||||||
|
#if MINIZ_HAS_64BIT_REGISTERS
|
||||||
|
#define TINFL_USE_64BIT_BITBUF 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if TINFL_USE_64BIT_BITBUF
|
||||||
|
typedef mz_uint64 tinfl_bit_buf_t;
|
||||||
|
#define TINFL_BITBUF_SIZE (64)
|
||||||
|
#else
|
||||||
|
typedef mz_uint32 tinfl_bit_buf_t;
|
||||||
|
#define TINFL_BITBUF_SIZE (32)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct tinfl_decompressor_tag
|
||||||
|
{
|
||||||
|
mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
|
||||||
|
tinfl_bit_buf_t m_bit_buf;
|
||||||
|
size_t m_dist_from_out_buf_start;
|
||||||
|
tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES];
|
||||||
|
mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
|
||||||
|
};
|
||||||
|
|
||||||
|
// ------------------- Low-level Compression API Definitions
|
||||||
|
|
||||||
|
// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently).
|
||||||
|
#define TDEFL_LESS_MEMORY 0
|
||||||
|
|
||||||
|
// tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search):
|
||||||
|
// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression).
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF
|
||||||
|
};
|
||||||
|
|
||||||
|
// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data.
|
||||||
|
// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers).
|
||||||
|
// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing.
|
||||||
|
// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory).
|
||||||
|
// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1)
|
||||||
|
// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled.
|
||||||
|
// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables.
|
||||||
|
// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks.
|
||||||
|
// The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK).
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
TDEFL_WRITE_ZLIB_HEADER = 0x01000,
|
||||||
|
TDEFL_COMPUTE_ADLER32 = 0x02000,
|
||||||
|
TDEFL_GREEDY_PARSING_FLAG = 0x04000,
|
||||||
|
TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
|
||||||
|
TDEFL_RLE_MATCHES = 0x10000,
|
||||||
|
TDEFL_FILTER_MATCHES = 0x20000,
|
||||||
|
TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
|
||||||
|
TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
|
||||||
|
};
|
||||||
|
|
||||||
|
// High level compression functions:
|
||||||
|
// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc().
|
||||||
|
// On entry:
|
||||||
|
// pSrc_buf, src_buf_len: Pointer and size of source block to compress.
|
||||||
|
// flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression.
|
||||||
|
// On return:
|
||||||
|
// Function returns a pointer to the compressed data, or NULL on failure.
|
||||||
|
// *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data.
|
||||||
|
// The caller must free() the returned block when it's no longer needed.
|
||||||
|
void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);
|
||||||
|
|
||||||
|
// tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory.
|
||||||
|
// Returns 0 on failure.
|
||||||
|
size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);
|
||||||
|
|
||||||
|
// Compresses an image to a compressed PNG file in memory.
|
||||||
|
// On entry:
|
||||||
|
// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4.
|
||||||
|
// The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory.
|
||||||
|
// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL
|
||||||
|
// If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps).
|
||||||
|
// On return:
|
||||||
|
// Function returns a pointer to the compressed data, or NULL on failure.
|
||||||
|
// *pLen_out will be set to the size of the PNG image file.
|
||||||
|
// The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed.
|
||||||
|
void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
|
||||||
|
void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);
|
||||||
|
|
||||||
|
// Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time.
|
||||||
|
typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void *pUser);
|
||||||
|
|
||||||
|
// tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally.
|
||||||
|
mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||||
|
|
||||||
|
enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 };
|
||||||
|
|
||||||
|
// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes).
|
||||||
|
#if TDEFL_LESS_MEMORY
|
||||||
|
enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
|
||||||
|
#else
|
||||||
|
enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13 ) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions.
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
TDEFL_STATUS_BAD_PARAM = -2,
|
||||||
|
TDEFL_STATUS_PUT_BUF_FAILED = -1,
|
||||||
|
TDEFL_STATUS_OKAY = 0,
|
||||||
|
TDEFL_STATUS_DONE = 1,
|
||||||
|
} tdefl_status;
|
||||||
|
|
||||||
|
// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
TDEFL_NO_FLUSH = 0,
|
||||||
|
TDEFL_SYNC_FLUSH = 2,
|
||||||
|
TDEFL_FULL_FLUSH = 3,
|
||||||
|
TDEFL_FINISH = 4
|
||||||
|
} tdefl_flush;
|
||||||
|
|
||||||
|
// tdefl's compression state structure.
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
tdefl_put_buf_func_ptr m_pPut_buf_func;
|
||||||
|
void *m_pPut_buf_user;
|
||||||
|
mz_uint m_flags, m_max_probes[2];
|
||||||
|
int m_greedy_parsing;
|
||||||
|
mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
|
||||||
|
mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
|
||||||
|
mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
|
||||||
|
mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
|
||||||
|
tdefl_status m_prev_return_status;
|
||||||
|
const void *m_pIn_buf;
|
||||||
|
void *m_pOut_buf;
|
||||||
|
size_t *m_pIn_buf_size, *m_pOut_buf_size;
|
||||||
|
tdefl_flush m_flush;
|
||||||
|
const mz_uint8 *m_pSrc;
|
||||||
|
size_t m_src_buf_left, m_out_buf_ofs;
|
||||||
|
mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
|
||||||
|
mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||||
|
mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||||
|
mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
|
||||||
|
mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
|
||||||
|
mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
|
||||||
|
mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
|
||||||
|
mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
|
||||||
|
} tdefl_compressor;
|
||||||
|
|
||||||
|
// Initializes the compressor.
|
||||||
|
// There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory.
|
||||||
|
// pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression.
|
||||||
|
// If pBut_buf_func is NULL the user should always call the tdefl_compress() API.
|
||||||
|
// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.)
|
||||||
|
tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);
|
||||||
|
|
||||||
|
// Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible.
|
||||||
|
tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);
|
||||||
|
|
||||||
|
// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr.
|
||||||
|
// tdefl_compress_buffer() always consumes the entire input buffer.
|
||||||
|
tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);
|
||||||
|
|
||||||
|
tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
|
||||||
|
mz_uint32 tdefl_get_adler32(tdefl_compressor *d);
|
||||||
|
|
||||||
|
// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros.
|
||||||
|
#ifndef MINIZ_NO_ZLIB_APIS
|
||||||
|
// Create tdefl_compress() flags given zlib-style compression parameters.
|
||||||
|
// level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files)
|
||||||
|
// window_bits may be -15 (raw deflate) or 15 (zlib)
|
||||||
|
// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED
|
||||||
|
mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);
|
||||||
|
#endif // #ifndef MINIZ_NO_ZLIB_APIS
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // MINIZ_HEADER_INCLUDED
|
||||||
@@ -65,3 +65,6 @@ add_subdirectory(select)
|
|||||||
add_subdirectory(thread_dimensions)
|
add_subdirectory(thread_dimensions)
|
||||||
add_subdirectory(vec_align)
|
add_subdirectory(vec_align)
|
||||||
add_subdirectory(vec_step)
|
add_subdirectory(vec_step)
|
||||||
|
|
||||||
|
# Add any extension folders
|
||||||
|
add_subdirectory(spir)
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id de
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height) {
|
int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
|
||||||
size_t max_width, max_height, num_pixels, found_width, found_height;
|
size_t max_width, max_height, num_pixels, found_width, found_height;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
@@ -48,34 +48,44 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t
|
|||||||
|
|
||||||
num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
|
num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
|
||||||
|
|
||||||
if (num_pixels > (max_width*max_height))
|
if (num_pixels > (max_width*max_height)) {
|
||||||
|
if(NULL != max_size) {
|
||||||
|
*max_size = max_width * max_height * sizeof(cl_uint) * 4;
|
||||||
|
}
|
||||||
return FAILED_TOO_BIG;
|
return FAILED_TOO_BIG;
|
||||||
|
}
|
||||||
|
|
||||||
// We want a close-to-square aspect ratio.
|
// We want a close-to-square aspect ratio.
|
||||||
// Note that this implicitly assumes that max width >= max height
|
// Note that this implicitly assumes that max width >= max height
|
||||||
found_width = (int)sqrt( (double) num_pixels );
|
found_width = (int)sqrt( (double) num_pixels );
|
||||||
if (found_width == 0)
|
|
||||||
found_width = 1;
|
|
||||||
if( found_width > max_width ) {
|
if( found_width > max_width ) {
|
||||||
found_width = max_width;
|
found_width = max_width;
|
||||||
}
|
}
|
||||||
|
if (found_width == 0)
|
||||||
|
found_width = 1;
|
||||||
|
|
||||||
found_height = (size_t)num_pixels/found_width;
|
found_height = (size_t)num_pixels/found_width;
|
||||||
if (found_height > max_height) {
|
if (found_height > max_height) {
|
||||||
found_height = max_height;
|
found_height = max_height;
|
||||||
}
|
}
|
||||||
|
if (found_height == 0)
|
||||||
|
found_height = 1;
|
||||||
|
|
||||||
*width = found_width;
|
*width = found_width;
|
||||||
*height = found_height;
|
*height = found_height;
|
||||||
|
|
||||||
|
if(NULL != max_size) {
|
||||||
|
*max_size = found_width * found_height * sizeof(cl_uint) * 4;
|
||||||
|
}
|
||||||
|
|
||||||
return SUCCEEDED;
|
return SUCCEEDED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
|
||||||
size_t width, height;
|
size_t width, height;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height);
|
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
|
||||||
if (error != SUCCEEDED)
|
if (error != SUCCEEDED)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
@@ -91,7 +101,7 @@ int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_devic
|
|||||||
size_t width, height;
|
size_t width, height;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = find_good_image_size(device_id, size_to_allocate, &width, &height);
|
error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
|
||||||
if (error != SUCCEEDED)
|
if (error != SUCCEEDED)
|
||||||
return error;
|
return error;
|
||||||
|
|
||||||
@@ -121,7 +131,6 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
int error, result;
|
int error, result;
|
||||||
size_t amount_allocated;
|
size_t amount_allocated;
|
||||||
size_t reduction_amount;
|
size_t reduction_amount;
|
||||||
size_t min_allocation_allowed;
|
|
||||||
int current_allocation;
|
int current_allocation;
|
||||||
size_t allocation_this_time, actual_allocation;
|
size_t allocation_this_time, actual_allocation;
|
||||||
|
|
||||||
@@ -129,13 +138,17 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
*number_of_mems = 0;
|
*number_of_mems = 0;
|
||||||
|
|
||||||
error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
|
error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
|
||||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
test_error_abort(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
|
||||||
error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
|
error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
|
||||||
test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
test_error_abort(error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
|
||||||
|
|
||||||
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
if (global_mem_size > (cl_ulong)SIZE_MAX) {
|
||||||
// max_individual_allocation_size, toMB(max_individual_allocation_size),
|
global_mem_size = (cl_ulong)SIZE_MAX;
|
||||||
// global_mem_size, toMB(global_mem_size));
|
}
|
||||||
|
|
||||||
|
// log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
|
||||||
|
// max_individual_allocation_size, toMB(max_individual_allocation_size),
|
||||||
|
// global_mem_size, toMB(global_mem_size));
|
||||||
|
|
||||||
if (size_to_allocate > global_mem_size) {
|
if (size_to_allocate > global_mem_size) {
|
||||||
log_error("Can not allocate more than the global memory size.\n");
|
log_error("Can not allocate more than the global memory size.\n");
|
||||||
@@ -144,18 +157,32 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
|
|
||||||
amount_allocated = 0;
|
amount_allocated = 0;
|
||||||
current_allocation = 0;
|
current_allocation = 0;
|
||||||
reduction_amount = (size_t)max_individual_allocation_size/16;
|
|
||||||
min_allocation_allowed = (size_t)max_individual_allocation_size/4;
|
// If allocating for images, reduce the maximum allocation size to the maximum image size.
|
||||||
if (min_allocation_allowed > size_to_allocate)
|
// If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
|
||||||
min_allocation_allowed = size_to_allocate/4;
|
// than the maximum image size on systems with 16GB or RAM or more. In this case, we
|
||||||
|
// succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
|
||||||
|
// (min_allocation_allowed) and thus we fail the allocation below.
|
||||||
|
if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
|
||||||
|
size_t width;
|
||||||
|
size_t height;
|
||||||
|
size_t max_size;
|
||||||
|
error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
|
||||||
|
if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
|
||||||
|
return error;
|
||||||
|
if (max_size < max_individual_allocation_size)
|
||||||
|
max_individual_allocation_size = max_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
reduction_amount = (size_t)max_individual_allocation_size / 16;
|
||||||
|
|
||||||
if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
|
if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
|
||||||
else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
|
else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||||
else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
|
else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
|
||||||
|
|
||||||
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
|
// log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
|
||||||
// toMB(reduction_amount), toMB(min_allocation_allowed));
|
// toMB(reduction_amount), toMB(min_allocation_allowed));
|
||||||
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
|
// if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
|
||||||
|
|
||||||
// If we are only doing a single allocation, only allow 1
|
// If we are only doing a single allocation, only allow 1
|
||||||
int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
|
int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
|
||||||
@@ -168,10 +195,10 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
|
|
||||||
cl_uint max_image_args;
|
cl_uint max_image_args;
|
||||||
error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
|
error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
|
||||||
test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
|
test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
|
||||||
|
|
||||||
if ((int)max_image_args < max_to_allocate) {
|
if ((int)max_image_args < max_to_allocate) {
|
||||||
log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
|
log_info("\t\tMaximum number of images per kernel limited to %d\n", (int)max_image_args);
|
||||||
max_to_allocate = max_image_args;
|
max_to_allocate = max_image_args;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -179,24 +206,32 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
|
|
||||||
// Try to allocate the requested amount.
|
// Try to allocate the requested amount.
|
||||||
while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
|
while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
|
||||||
|
|
||||||
|
// Determine how much more is needed
|
||||||
allocation_this_time = size_to_allocate - amount_allocated;
|
allocation_this_time = size_to_allocate - amount_allocated;
|
||||||
|
|
||||||
|
// Bound by the individual allocation size
|
||||||
if (allocation_this_time > max_individual_allocation_size)
|
if (allocation_this_time > max_individual_allocation_size)
|
||||||
allocation_this_time = (size_t)max_individual_allocation_size;
|
allocation_this_time = (size_t)max_individual_allocation_size;
|
||||||
|
|
||||||
// Try to allocate a chunk of memory
|
// Allocate the largest object possible
|
||||||
result = FAILED_TOO_BIG;
|
result = FAILED_TOO_BIG;
|
||||||
//log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
|
//log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
|
||||||
while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
|
while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
|
||||||
|
|
||||||
|
// Create the object
|
||||||
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
|
result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
|
||||||
if (result == SUCCEEDED) {
|
if (result == SUCCEEDED) {
|
||||||
// Allocation succeeded, another memory object was added to the array
|
// Allocation succeeded, another memory object was added to the array
|
||||||
*number_of_mems = (current_allocation+1);
|
*number_of_mems = (current_allocation + 1);
|
||||||
|
|
||||||
// Verify the size is correct to within 1MB.
|
// Verify the size is correct to within 1MB.
|
||||||
actual_allocation = get_actual_allocation_size(mems[current_allocation]);
|
actual_allocation = get_actual_allocation_size(mems[current_allocation]);
|
||||||
if (fabs((double)(allocation_this_time - actual_allocation)) > 1024.0*1024.0) {
|
if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
|
||||||
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
|
log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB(actual_allocation));
|
||||||
return FAILED_ABORT;
|
return FAILED_ABORT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are filling the allocation for verification do so
|
// If we are filling the allocation for verification do so
|
||||||
if (force_fill) {
|
if (force_fill) {
|
||||||
//log_info("\t\t\tWriting random values to object and calculating checksum.\n");
|
//log_info("\t\t\tWriting random values to object and calculating checksum.\n");
|
||||||
@@ -207,10 +242,15 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
|
result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If creation failed, try to create a smaller object
|
||||||
if (result == FAILED_TOO_BIG) {
|
if (result == FAILED_TOO_BIG) {
|
||||||
//log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
|
//log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
|
||||||
if (allocation_this_time > reduction_amount)
|
if (allocation_this_time > reduction_amount)
|
||||||
allocation_this_time -= reduction_amount;
|
allocation_this_time -= reduction_amount;
|
||||||
|
else if (reduction_amount > 1) {
|
||||||
|
reduction_amount /= 2;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
allocation_this_time = 0;
|
allocation_this_time = 0;
|
||||||
}
|
}
|
||||||
@@ -223,8 +263,8 @@ int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id devi
|
|||||||
return FAILED_ABORT;
|
return FAILED_ABORT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allocation_this_time < min_allocation_allowed && allocation_this_time < (size_to_allocate-amount_allocated)) {
|
if (!allocation_this_time) {
|
||||||
log_info("\t\tFailed to allocate an individual allocation of more than %gMB.\n", toMB(min_allocation_allowed));
|
log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
|
||||||
return FAILED_TOO_BIG;
|
return FAILED_TOO_BIG;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ add_executable(conformance_test_api
|
|||||||
test_platform.cpp
|
test_platform.cpp
|
||||||
test_retain.cpp
|
test_retain.cpp
|
||||||
test_device_min_data_type_align_size_alignment.cpp
|
test_device_min_data_type_align_size_alignment.cpp
|
||||||
|
test_queue_properties.cpp
|
||||||
test_mem_objects.cpp
|
test_mem_objects.cpp
|
||||||
test_bool.c
|
test_bool.c
|
||||||
test_null_buffer_arg.c
|
test_null_buffer_arg.c
|
||||||
|
|||||||
@@ -112,6 +112,7 @@ basefn basefn_list[] = {
|
|||||||
test_get_image1d_info,
|
test_get_image1d_info,
|
||||||
test_get_image1d_array_info,
|
test_get_image1d_array_info,
|
||||||
test_get_image2d_array_info,
|
test_get_image2d_array_info,
|
||||||
|
test_queue_properties,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -199,7 +200,7 @@ const char *basefn_names[] = {
|
|||||||
"get_image1d_info",
|
"get_image1d_info",
|
||||||
"get_image1d_array_info",
|
"get_image1d_array_info",
|
||||||
"get_image2d_array_info",
|
"get_image2d_array_info",
|
||||||
|
"queue_properties",
|
||||||
"all",
|
"all",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -105,4 +105,5 @@ extern int test_get_image1d_info( cl_device_id deviceID, cl_context context
|
|||||||
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
extern int test_get_image1d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||||
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
extern int test_get_image2d_array_info( cl_device_id deviceID, cl_context context, cl_command_queue ignoreQueue, int num_elements );
|
||||||
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
extern int test_get_kernel_arg_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||||
|
extern int test_queue_properties( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
174
test_conformance/api/test_queue_properties.cpp
Normal file
174
test_conformance/api/test_queue_properties.cpp
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
//
|
||||||
|
// Copyright (c) 2018 The Khronos Group Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
|
||||||
|
|
||||||
|
#include "testBase.h"
|
||||||
|
#include "../../test_common/harness/typeWrappers.h"
|
||||||
|
#include "../../test_common/harness/conversions.h"
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
/*
|
||||||
|
The test against cl_khr_create_command_queue extension. It validates if devices with Opencl 1.X can use clCreateCommandQueueWithPropertiesKHR function.
|
||||||
|
Based on device capabilities test will create queue with NULL properties, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE property and
|
||||||
|
CL_QUEUE_PROFILING_ENABLE property. Finally simple kernel will be executed on such queue.
|
||||||
|
*/
|
||||||
|
|
||||||
|
const char *queue_test_kernel[] = {
|
||||||
|
"__kernel void vec_cpy(__global int *src, __global int *dst)\n"
|
||||||
|
"{\n"
|
||||||
|
" int tid = get_global_id(0);\n"
|
||||||
|
"\n"
|
||||||
|
" dst[tid] = src[tid];\n"
|
||||||
|
"\n"
|
||||||
|
"}\n" };
|
||||||
|
|
||||||
|
int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop_def, cl_device_id deviceID, clKernelWrapper& kernel, size_t num_elements)
|
||||||
|
{
|
||||||
|
clMemWrapper streams[2];
|
||||||
|
int error;
|
||||||
|
std::vector<int> buf(num_elements);
|
||||||
|
clCreateCommandQueueWithPropertiesKHR_fn clCreateCommandQueueWithPropertiesKHR = NULL;
|
||||||
|
cl_platform_id platform;
|
||||||
|
clEventWrapper event;
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL);
|
||||||
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
|
||||||
|
|
||||||
|
clCreateCommandQueueWithPropertiesKHR = (clCreateCommandQueueWithPropertiesKHR_fn) clGetExtensionFunctionAddressForPlatform(platform, "clCreateCommandQueueWithPropertiesKHR");
|
||||||
|
if (clCreateCommandQueueWithPropertiesKHR == NULL)
|
||||||
|
{
|
||||||
|
log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error);
|
||||||
|
test_error(error, "clCreateCommandQueueWithPropertiesKHR failed");
|
||||||
|
|
||||||
|
for (int i = 0; i < num_elements; ++i)
|
||||||
|
{
|
||||||
|
buf[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, num_elements * sizeof(int), buf.data(), &error);
|
||||||
|
test_error( error, "clCreateBuffer failed." );
|
||||||
|
streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, num_elements * sizeof(int), NULL, &error);
|
||||||
|
test_error( error, "clCreateBuffer failed." );
|
||||||
|
|
||||||
|
error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
|
||||||
|
test_error( error, "clSetKernelArg failed." );
|
||||||
|
|
||||||
|
error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
|
||||||
|
test_error( error, "clSetKernelArg failed." );
|
||||||
|
|
||||||
|
error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &num_elements, NULL, 0, NULL, &event);
|
||||||
|
test_error( error, "clEnqueueNDRangeKernel failed." );
|
||||||
|
|
||||||
|
error = clWaitForEvents(1, &event);
|
||||||
|
test_error(error, "clWaitForEvents failed.");
|
||||||
|
|
||||||
|
error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL);
|
||||||
|
test_error( error, "clEnqueueReadBuffer failed." );
|
||||||
|
|
||||||
|
for (int i = 0; i < num_elements; ++i)
|
||||||
|
{
|
||||||
|
if (buf[i] != i)
|
||||||
|
{
|
||||||
|
log_error("ERROR: Incorrect vector copy result.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
||||||
|
{
|
||||||
|
if (num_elements <= 0)
|
||||||
|
{
|
||||||
|
num_elements = 128;
|
||||||
|
}
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
clProgramWrapper program;
|
||||||
|
clKernelWrapper kernel;
|
||||||
|
size_t strSize;
|
||||||
|
std::string strExt(0, '\0');
|
||||||
|
cl_queue_properties_khr device_props = NULL;
|
||||||
|
cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 };
|
||||||
|
|
||||||
|
// Query extension
|
||||||
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, 0, NULL, &strSize);
|
||||||
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
|
||||||
|
strExt.resize(strSize);
|
||||||
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_EXTENSIONS, strExt.size(), &strExt[0], NULL);
|
||||||
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_EXTENSIONS failed");
|
||||||
|
log_info("CL_DEVICE_EXTENSIONS:\n%s\n\n", strExt.c_str());
|
||||||
|
|
||||||
|
if (strExt.find("cl_khr_create_command_queue") == string::npos)
|
||||||
|
{
|
||||||
|
log_info("extension cl_khr_create_command_queue is not supported.\n");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = create_single_kernel_helper(context, &program, &kernel, 1, queue_test_kernel, "vec_cpy");
|
||||||
|
test_error(error, "create_single_kernel_helper failed");
|
||||||
|
|
||||||
|
log_info("Queue property NULL. Testing ... \n");
|
||||||
|
error = enqueue_kernel(context, NULL,deviceID, kernel, (size_t)num_elements);
|
||||||
|
test_error(error, "enqueue_kernel failed");
|
||||||
|
|
||||||
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES, sizeof(device_props), &device_props, NULL);
|
||||||
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
|
||||||
|
|
||||||
|
if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
|
||||||
|
{
|
||||||
|
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE supported. Testing ... \n");
|
||||||
|
queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
|
||||||
|
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
|
||||||
|
test_error(error, "enqueue_kernel failed");
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE not supported \n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device_props & CL_QUEUE_PROFILING_ENABLE)
|
||||||
|
{
|
||||||
|
log_info("Queue property CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
|
||||||
|
queue_prop_def[1] = CL_QUEUE_PROFILING_ENABLE;
|
||||||
|
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
|
||||||
|
test_error(error, "enqueue_kernel failed");
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
log_info("Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE && device_props & CL_QUEUE_PROFILING_ENABLE)
|
||||||
|
{
|
||||||
|
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE & CL_QUEUE_PROFILING_ENABLE supported. Testing ... \n");
|
||||||
|
queue_prop_def[1] = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_PROFILING_ENABLE;
|
||||||
|
error = enqueue_kernel(context, queue_prop_def, deviceID, kernel, (size_t)num_elements);
|
||||||
|
test_error(error, "enqueue_kernel failed");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
log_info("Queue property CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE or CL_QUEUE_PROFILING_ENABLE not supported \n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
0
test_conformance/basic/run_array
Normal file → Executable file
0
test_conformance/basic/run_array
Normal file → Executable file
0
test_conformance/basic/run_array_image_copy
Normal file → Executable file
0
test_conformance/basic/run_array_image_copy
Normal file → Executable file
0
test_conformance/basic/run_image
Normal file → Executable file
0
test_conformance/basic/run_image
Normal file → Executable file
0
test_conformance/basic/run_multi_read_image
Normal file → Executable file
0
test_conformance/basic/run_multi_read_image
Normal file → Executable file
@@ -207,13 +207,12 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
|
|||||||
log_error( "ERROR: Results of copy did not validate!\n" );
|
log_error( "ERROR: Results of copy did not validate!\n" );
|
||||||
sprintf(values + strlen( values), "%d -> [", i);
|
sprintf(values + strlen( values), "%d -> [", i);
|
||||||
for (int j=0; j<(int)elementSize; j++)
|
for (int j=0; j<(int)elementSize; j++)
|
||||||
sprintf(values + strlen( values), "%2x ", inchar[i*elementSize+j]);
|
sprintf(values + strlen( values), "%2x ", inchar[j]);
|
||||||
sprintf(values + strlen(values), "] != [");
|
sprintf(values + strlen(values), "] != [");
|
||||||
for (int j=0; j<(int)elementSize; j++)
|
for (int j=0; j<(int)elementSize; j++)
|
||||||
sprintf(values + strlen( values), "%2x ", outchar[i*elementSize+j]);
|
sprintf(values + strlen( values), "%2x ", outchar[j]);
|
||||||
sprintf(values + strlen(values), "]");
|
sprintf(values + strlen(values), "]");
|
||||||
log_error("%s\n", values);
|
log_error("%s\n", values);
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -126,7 +126,7 @@ const size_table vector_table[] =
|
|||||||
|
|
||||||
const char *ptr_table[] =
|
const char *ptr_table[] =
|
||||||
{
|
{
|
||||||
"void*",
|
"global void*",
|
||||||
"size_t",
|
"size_t",
|
||||||
"sizeof(int)", // check return type of sizeof
|
"sizeof(int)", // check return type of sizeof
|
||||||
"ptrdiff_t"
|
"ptrdiff_t"
|
||||||
@@ -356,7 +356,7 @@ int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue,
|
|||||||
err = get_type_size( context, queue, "half", &test );
|
err = get_type_size( context, queue, "half", &test );
|
||||||
if( err )
|
if( err )
|
||||||
return err;
|
return err;
|
||||||
if( test != 8 )
|
if( test != 2 )
|
||||||
{
|
{
|
||||||
log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
|
log_error( "\nFAILED: half has size %lld, but must be 2!\n", test );
|
||||||
return -1;
|
return -1;
|
||||||
|
|||||||
@@ -143,8 +143,8 @@ const char* compile_static_var = "static constant float foo = 2.78;\n";
|
|||||||
const char* compile_static_struct = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n";
|
const char* compile_static_struct = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n";
|
||||||
const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n";
|
const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n";
|
||||||
|
|
||||||
const char* compile_regular_var = "constant float foo;\n";
|
const char* compile_regular_var = "constant float foo = 4.0f;\n";
|
||||||
const char* compile_regular_struct = "constant struct bar {float x, y, z, r; int color; } foo;\n";
|
const char* compile_regular_struct = "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, 0.f, 0};\n";
|
||||||
const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n";
|
const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n";
|
||||||
|
|
||||||
const char* link_static_var_access = // use with compile_static_var
|
const char* link_static_var_access = // use with compile_static_var
|
||||||
|
|||||||
@@ -34,6 +34,7 @@ const char *known_extensions[] = {
|
|||||||
"cl_khr_3d_image_writes",
|
"cl_khr_3d_image_writes",
|
||||||
"cl_khr_byte_addressable_store",
|
"cl_khr_byte_addressable_store",
|
||||||
"cl_khr_fp16",
|
"cl_khr_fp16",
|
||||||
|
"cl_khr_spir",
|
||||||
|
|
||||||
//API-only extensions after this point. If you add above here, modify first_API_extension below.
|
//API-only extensions after this point. If you add above here, modify first_API_extension below.
|
||||||
"cl_khr_gl_sharing",
|
"cl_khr_gl_sharing",
|
||||||
@@ -42,13 +43,20 @@ const char *known_extensions[] = {
|
|||||||
"cl_khr_icd",
|
"cl_khr_icd",
|
||||||
"cl_khr_egl_image",
|
"cl_khr_egl_image",
|
||||||
"cl_khr_egl_event",
|
"cl_khr_egl_event",
|
||||||
|
"cl_khr_create_command_queue",
|
||||||
|
"cl_khr_priority_hints",
|
||||||
|
"cl_khr_throttle_hints",
|
||||||
|
"cl_khr_il_program",
|
||||||
|
"cl_khr_mipmap_image",
|
||||||
|
"cl_khr_mipmap_image_writes",
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
|
size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
|
||||||
size_t first_API_extension = 10;
|
size_t first_API_extension = 11;
|
||||||
|
|
||||||
const char *known_embedded_extensions[] = {
|
const char *known_embedded_extensions[] = {
|
||||||
"cles_khr_int64",
|
"cles_khr_int64",
|
||||||
|
"cles_khr_2d_image_array_writes",
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -56,7 +56,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
|
||||||
#include <xmmintrin.h>
|
#include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__PPC__)
|
#if defined(__PPC__)
|
||||||
@@ -396,6 +396,8 @@ static void PrintArch( void )
|
|||||||
vlog( "\tARCH:\tx86_64\n" );
|
vlog( "\tARCH:\tx86_64\n" );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ )
|
||||||
vlog( "\tARCH:\tarm\n" );
|
vlog( "\tARCH:\tarm\n" );
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
vlog( "\tARCH:\taarch64\n" );
|
||||||
#else
|
#else
|
||||||
vlog( "\tARCH:\tunknown\n" );
|
vlog( "\tARCH:\tunknown\n" );
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -17,9 +17,9 @@ add_executable(conformance_test_conversions
|
|||||||
../../test_common/harness/msvc9.c
|
../../test_common/harness/msvc9.c
|
||||||
../../test_common/harness/mingw_compat.c
|
../../test_common/harness/mingw_compat.c
|
||||||
../../test_common/harness/errorHelpers.c
|
../../test_common/harness/errorHelpers.c
|
||||||
|
../../test_common/harness/parseParameters.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
set_source_files_properties(
|
set_source_files_properties(
|
||||||
Sleep.c test_conversions.c basic_test_conversions.c
|
Sleep.c test_conversions.c basic_test_conversions.c
|
||||||
../../test_common/harness/ThreadPool.c
|
../../test_common/harness/ThreadPool.c
|
||||||
@@ -28,7 +28,6 @@ set_source_files_properties(
|
|||||||
../../test_common/harness/msvc9.c
|
../../test_common/harness/msvc9.c
|
||||||
../../test_common/harness/errorHelpers.c
|
../../test_common/harness/errorHelpers.c
|
||||||
PROPERTIES LANGUAGE CXX)
|
PROPERTIES LANGUAGE CXX)
|
||||||
endif(WIN32)
|
|
||||||
|
|
||||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)")
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)")
|
||||||
if(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
|
if(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
|
||||||
|
|||||||
@@ -743,16 +743,11 @@ static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((
|
|||||||
static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
|
static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
|
||||||
static void ulong2float( void *out, void *in)
|
static void ulong2float( void *out, void *in)
|
||||||
{
|
{
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER) && defined(_M_X64)
|
||||||
cl_ulong l = ((cl_ulong*) in)[0];
|
cl_ulong l = ((cl_ulong*) in)[0];
|
||||||
float result;
|
float result;
|
||||||
|
|
||||||
cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
|
cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
|
||||||
#if defined(_M_X64)
|
|
||||||
_mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
|
_mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
|
||||||
#else
|
|
||||||
result = sl;
|
|
||||||
#endif
|
|
||||||
((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
|
((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
|
||||||
#else
|
#else
|
||||||
cl_ulong l = ((cl_ulong*) in)[0];
|
cl_ulong l = ((cl_ulong*) in)[0];
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
#include "../../test_common/harness/compat.h"
|
#include "../../test_common/harness/compat.h"
|
||||||
#include "../../test_common/harness/rounding_mode.h"
|
#include "../../test_common/harness/rounding_mode.h"
|
||||||
#include "../../test_common/harness/ThreadPool.h"
|
#include "../../test_common/harness/ThreadPool.h"
|
||||||
|
#include "../../test_common/harness/parseParameters.h"
|
||||||
#if defined (_WIN32)
|
#if defined (_WIN32)
|
||||||
#define MAX(x,y) ((x>y)?x:y);
|
#define MAX(x,y) ((x>y)?x:y);
|
||||||
#define MIN(x,y) ((x<y)?x:y);
|
#define MIN(x,y) ((x<y)?x:y);
|
||||||
@@ -103,6 +104,7 @@ cl_mem gOutBuffers[ kCallStyleCount ];
|
|||||||
size_t gComputeDevices = 0;
|
size_t gComputeDevices = 0;
|
||||||
uint32_t gDeviceFrequency = 0;
|
uint32_t gDeviceFrequency = 0;
|
||||||
int gWimpyMode = 0;
|
int gWimpyMode = 0;
|
||||||
|
int gWimpyReductionFactor = 128;
|
||||||
int gSkipTesting = 0;
|
int gSkipTesting = 0;
|
||||||
int gForceFTZ = 0;
|
int gForceFTZ = 0;
|
||||||
int gMultithread = 1;
|
int gMultithread = 1;
|
||||||
@@ -414,6 +416,9 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
case 'w':
|
case 'w':
|
||||||
gWimpyMode ^= 1;
|
gWimpyMode ^= 1;
|
||||||
break;
|
break;
|
||||||
|
case '[':
|
||||||
|
parseWimpyReductionFactor(arg, gWimpyReductionFactor);
|
||||||
|
break;
|
||||||
case 'z':
|
case 'z':
|
||||||
gForceFTZ ^= 1;
|
gForceFTZ ^= 1;
|
||||||
break;
|
break;
|
||||||
@@ -509,6 +514,7 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
|
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
|
||||||
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
|
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
|
||||||
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
||||||
|
vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -535,6 +541,7 @@ static void PrintUsage( void )
|
|||||||
vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
|
vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
|
||||||
vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
|
vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
|
||||||
vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
|
vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
|
||||||
|
vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
|
||||||
vlog( "\t\t-z\tToggle flush to zero mode (Default: per device)\n" );
|
vlog( "\t\t-z\tToggle flush to zero mode (Default: per device)\n" );
|
||||||
vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
|
vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
|
||||||
vlog( "\n" );
|
vlog( "\n" );
|
||||||
@@ -556,6 +563,8 @@ static void PrintArch( void )
|
|||||||
vlog( "ARCH:\tx86_64\n" );
|
vlog( "ARCH:\tx86_64\n" );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ )
|
||||||
vlog( "ARCH:\tarm\n" );
|
vlog( "ARCH:\tarm\n" );
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
vlog( "ARCH:\taarch64\n" );
|
||||||
#elif defined (_WIN32)
|
#elif defined (_WIN32)
|
||||||
vlog( "ARCH:\tWindows\n" );
|
vlog( "ARCH:\tWindows\n" );
|
||||||
#else
|
#else
|
||||||
@@ -1136,6 +1145,7 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r
|
|||||||
|
|
||||||
gTestCount++;
|
gTestCount++;
|
||||||
size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] );
|
size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] );
|
||||||
|
size_t step = blockCount;
|
||||||
uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
|
uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
|
||||||
cl_event writeInputBuffer = NULL;
|
cl_event writeInputBuffer = NULL;
|
||||||
|
|
||||||
@@ -1194,15 +1204,12 @@ static int DoTest( Type outType, Type inType, SaturationMode sat, RoundingMode r
|
|||||||
if( 8*gTypeSizes[ inType ] > 32 )
|
if( 8*gTypeSizes[ inType ] > 32 )
|
||||||
lastCase = 0x100000000ULL;
|
lastCase = 0x100000000ULL;
|
||||||
|
|
||||||
|
if ( gWimpyMode )
|
||||||
|
step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
|
||||||
vlog( "Testing... " );
|
vlog( "Testing... " );
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
for( i = 0; i < (uint64_t)lastCase; i += blockCount )
|
for( i = 0; i < (uint64_t)lastCase; i += step )
|
||||||
{
|
{
|
||||||
if (gWimpyMode) {
|
|
||||||
uint64_t blockIndex = (i / blockCount) & 0xFF;
|
|
||||||
if (blockIndex != 0 && blockIndex != 0xFF)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( 0 == ( i & ((lastCase >> 3) -1))) {
|
if( 0 == ( i & ((lastCase >> 3) -1))) {
|
||||||
vlog(".");
|
vlog(".");
|
||||||
|
|||||||
@@ -200,7 +200,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
|
|||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
/* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
|
/* Generate some streams. Note: deliberately do some random data in w to verify that it gets ignored */
|
||||||
for( i = 0; i < TEST_SIZE * vecsize; i++ )
|
for( i = 0; i < size * vecsize; i++ )
|
||||||
{
|
{
|
||||||
inDataA[ i ] = get_random_double( -512.f, 512.f, d );
|
inDataA[ i ] = get_random_double( -512.f, 512.f, d );
|
||||||
inDataB[ i ] = get_random_double( -512.f, 512.f, d );
|
inDataB[ i ] = get_random_double( -512.f, 512.f, d );
|
||||||
@@ -234,7 +234,7 @@ int test_geom_cross_double(cl_device_id deviceID, cl_context context, cl_command
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Run the kernel */
|
/* Run the kernel */
|
||||||
threads[0] = TEST_SIZE;
|
threads[0] = size;
|
||||||
|
|
||||||
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
|
||||||
test_error( error, "Unable to get work group size to use" );
|
test_error( error, "Unable to get work group size to use" );
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ static const char *kernelpattern_image_write_1Darray_half =
|
|||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
static const char *kernelpattern_image_write_3D =
|
static const char *kernelpattern_image_write_3D =
|
||||||
|
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
|
||||||
"__kernel void sample_test( __global %s4 *source, write_only image3d_t dest )\n"
|
"__kernel void sample_test( __global %s4 *source, write_only image3d_t dest )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int tidX = get_global_id(0);\n"
|
" int tidX = get_global_id(0);\n"
|
||||||
@@ -96,6 +97,7 @@ static const char *kernelpattern_image_write_3D =
|
|||||||
"}\n";
|
"}\n";
|
||||||
|
|
||||||
static const char *kernelpattern_image_write_3D_half =
|
static const char *kernelpattern_image_write_3D_half =
|
||||||
|
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable \n"
|
||||||
"__kernel void sample_test( __global half4 *source, write_only image3d_t dest )\n"
|
"__kernel void sample_test( __global half4 *source, write_only image3d_t dest )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
" int tidX = get_global_id(0);\n"
|
" int tidX = get_global_id(0);\n"
|
||||||
|
|||||||
@@ -4,16 +4,15 @@ add_executable(conformance_test_half
|
|||||||
../../test_common/harness/msvc9.c
|
../../test_common/harness/msvc9.c
|
||||||
../../test_common/harness/mingw_compat.c
|
../../test_common/harness/mingw_compat.c
|
||||||
../../test_common/harness/errorHelpers.c
|
../../test_common/harness/errorHelpers.c
|
||||||
|
../../test_common/harness/parseParameters.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
if(MSVC)
|
|
||||||
set_source_files_properties(
|
set_source_files_properties(
|
||||||
cl_utils.c Test_vLoadHalf.c Test_roundTrip.c
|
cl_utils.c Test_vLoadHalf.c Test_roundTrip.c
|
||||||
Test_vStoreHalf.c main.c
|
Test_vStoreHalf.c main.c
|
||||||
../../test_common/harness/mingw_compat.c
|
../../test_common/harness/mingw_compat.c
|
||||||
../../test_common/harness/msvc9.c
|
../../test_common/harness/msvc9.c
|
||||||
PROPERTIES LANGUAGE CXX)
|
PROPERTIES LANGUAGE CXX)
|
||||||
endif(MSVC)
|
|
||||||
TARGET_LINK_LIBRARIES(conformance_test_half
|
TARGET_LINK_LIBRARIES(conformance_test_half
|
||||||
${CLConform_LIBRARIES})
|
${CLConform_LIBRARIES})
|
||||||
|
|
||||||
|
|||||||
@@ -848,7 +848,7 @@ int Test_vStoreHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const c
|
|||||||
size_t stride = blockCount;
|
size_t stride = blockCount;
|
||||||
|
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
stride = 0x10000000U;
|
stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
|
||||||
|
|
||||||
// we handle 64-bit types a bit differently.
|
// we handle 64-bit types a bit differently.
|
||||||
if( lastCase == 0 )
|
if( lastCase == 0 )
|
||||||
@@ -1502,7 +1502,7 @@ int Test_vStoreaHalf_private( f2h referenceFunc, d2h doubleReferenceFunc, const
|
|||||||
size_t stride = blockCount;
|
size_t stride = blockCount;
|
||||||
|
|
||||||
if (gWimpyMode)
|
if (gWimpyMode)
|
||||||
stride = 0x10000000U;
|
stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
|
||||||
|
|
||||||
// we handle 64-bit types a bit differently.
|
// we handle 64-bit types a bit differently.
|
||||||
if( lastCase == 0 )
|
if( lastCase == 0 )
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ size_t gWorkGroupSize = 0;
|
|||||||
int gTestCount = 0;
|
int gTestCount = 0;
|
||||||
int gFailCount = 0;
|
int gFailCount = 0;
|
||||||
bool gWimpyMode = false;
|
bool gWimpyMode = false;
|
||||||
|
int gWimpyReductionFactor = 512;
|
||||||
int gTestDouble = 0;
|
int gTestDouble = 0;
|
||||||
uint32_t gDeviceIndex = 0;
|
uint32_t gDeviceIndex = 0;
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,7 @@ extern int gReportTimes;
|
|||||||
// size of 32 bit ranges to a much smaller set. This is meant to be used
|
// size of 32 bit ranges to a much smaller set. This is meant to be used
|
||||||
// as a smoke test
|
// as a smoke test
|
||||||
extern bool gWimpyMode;
|
extern bool gWimpyMode;
|
||||||
|
extern int gWimpyReductionFactor;
|
||||||
|
|
||||||
uint64_t ReadTime( void );
|
uint64_t ReadTime( void );
|
||||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "../../test_common/harness/mingw_compat.h"
|
#include "../../test_common/harness/mingw_compat.h"
|
||||||
|
#include "../../test_common/harness/parseParameters.h"
|
||||||
#if defined (__MINGW32__)
|
#if defined (__MINGW32__)
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#endif
|
#endif
|
||||||
@@ -217,7 +218,9 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
case 'w': // Wimpy mode
|
case 'w': // Wimpy mode
|
||||||
gWimpyMode = true;
|
gWimpyMode = true;
|
||||||
break;
|
break;
|
||||||
|
case '[':
|
||||||
|
parseWimpyReductionFactor( arg, gWimpyReductionFactor);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
|
vlog_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
|
||||||
PrintUsage();
|
PrintUsage();
|
||||||
@@ -253,6 +256,7 @@ static int ParseArgs( int argc, const char **argv )
|
|||||||
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
|
vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
|
||||||
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
|
vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
|
||||||
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
|
||||||
|
vlog( "*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -263,6 +267,7 @@ static void PrintUsage( void )
|
|||||||
vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
|
vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
|
||||||
vlog( "\t\t-t\tToggle reporting performance data.\n" );
|
vlog( "\t\t-t\tToggle reporting performance data.\n" );
|
||||||
vlog( "\t\t-w\tRun in wimpy mode\n" );
|
vlog( "\t\t-w\tRun in wimpy mode\n" );
|
||||||
|
vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
|
||||||
vlog( "\t\t-h\tHelp\n" );
|
vlog( "\t\t-h\tHelp\n" );
|
||||||
vlog( "\n" );
|
vlog( "\n" );
|
||||||
}
|
}
|
||||||
@@ -282,6 +287,8 @@ static void PrintArch( void )
|
|||||||
vlog( "ARCH:\tx86_64\n" );
|
vlog( "ARCH:\tx86_64\n" );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ )
|
||||||
vlog( "ARCH:\tarm\n" );
|
vlog( "ARCH:\tarm\n" );
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
vlog( "\tARCH:\taarch64\n" );
|
||||||
#else
|
#else
|
||||||
#error unknown arch
|
#error unknown arch
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -74,7 +74,6 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
|
|||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo = { 0 };
|
image_descriptor imageInfo = { 0 };
|
||||||
RandomSeed seed(gRandomSeed);
|
RandomSeed seed(gRandomSeed);
|
||||||
size_t rowPadding = gEnablePitch ? 48 : 0;
|
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
imageInfo.format = format;
|
imageInfo.format = format;
|
||||||
@@ -87,10 +86,15 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
@@ -119,6 +123,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
|
|||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
imageInfo.width = sizes[ idx ][ 0 ];
|
imageInfo.width = sizes[ idx ][ 0 ];
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|
||||||
@@ -142,6 +147,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_image_format *format )
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -77,7 +77,6 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
|
|||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo = { 0 };
|
image_descriptor imageInfo = { 0 };
|
||||||
RandomSeed seed(gRandomSeed);
|
RandomSeed seed(gRandomSeed);
|
||||||
size_t rowPadding = gEnablePitch ? 48 : 0;
|
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
imageInfo.format = format;
|
imageInfo.format = format;
|
||||||
@@ -90,10 +89,15 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 1D array size from device" );
|
test_error( error, "Unable to get max image 1D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
@@ -126,6 +130,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
|
|||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
imageInfo.width = sizes[ idx ][ 0 ];
|
imageInfo.width = sizes[ idx ][ 0 ];
|
||||||
imageInfo.arraySize = sizes[ idx ][ 2 ];
|
imageInfo.arraySize = sizes[ idx ][ 2 ];
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -151,6 +156,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_image_format *format )
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -77,7 +77,6 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
|
|||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo = { 0 };
|
image_descriptor imageInfo = { 0 };
|
||||||
RandomSeed seed(gRandomSeed);
|
RandomSeed seed(gRandomSeed);
|
||||||
size_t rowPadding = gEnablePitch ? 48 : 0;
|
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
imageInfo.format = format;
|
imageInfo.format = format;
|
||||||
@@ -90,10 +89,15 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
@@ -125,6 +129,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
|
|||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
imageInfo.width = sizes[ idx ][ 0 ];
|
imageInfo.width = sizes[ idx ][ 0 ];
|
||||||
imageInfo.height = sizes[ idx ][ 1 ];
|
imageInfo.height = sizes[ idx ][ 1 ];
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -149,6 +154,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_image_format *format )
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
|
size_t rowPadding = gEnablePitch ? 48 : 0;
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -142,8 +142,6 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
image_descriptor srcImageInfo = { 0 };
|
image_descriptor srcImageInfo = { 0 };
|
||||||
image_descriptor dstImageInfo = { 0 };
|
image_descriptor dstImageInfo = { 0 };
|
||||||
RandomSeed seed( gRandomSeed );
|
RandomSeed seed( gRandomSeed );
|
||||||
size_t rowPadding = gEnablePitch ? 256 : 0;
|
|
||||||
size_t slicePadding = gEnablePitch ? 3 : 0;
|
|
||||||
|
|
||||||
srcImageInfo.format = dstImageInfo.format = format;
|
srcImageInfo.format = dstImageInfo.format = format;
|
||||||
|
|
||||||
@@ -154,6 +152,10 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
|
for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
|
||||||
@@ -162,6 +164,9 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
{
|
{
|
||||||
for( dstImageInfo.arraySize = 4; dstImageInfo.arraySize < 9; dstImageInfo.arraySize++ )
|
for( dstImageInfo.arraySize = 4; dstImageInfo.arraySize < 9; dstImageInfo.arraySize++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 256 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.arraySize, rowPadding, slicePadding );
|
set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.arraySize, rowPadding, slicePadding );
|
||||||
set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
|
set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
|
||||||
if( gDebugTrace )
|
if( gDebugTrace )
|
||||||
@@ -185,18 +190,27 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
else if( gTestMaxImages )
|
else if( gTestMaxImages )
|
||||||
{
|
{
|
||||||
// Try a specific set of maximum sizes
|
// Try a specific set of maximum sizes
|
||||||
size_t numbeOfSizes;
|
size_t numberOfSizes2DArray, numberOfSizes2D;
|
||||||
size_t sizes[100][3];
|
size_t sizes2DArray[100][3], sizes2D[100][3];
|
||||||
|
|
||||||
// Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
|
// Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
|
||||||
get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D_ARRAY, dstImageInfo.format);
|
get_max_sizes(&numberOfSizes2DArray, 100, sizes2DArray, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D_ARRAY, dstImageInfo.format);
|
||||||
|
get_max_sizes(&numberOfSizes2D, 100, sizes2D, maxWidth, maxHeight, 1, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D, dstImageInfo.format);
|
||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t i = 0; i < numberOfSizes2D; i++ )
|
||||||
{
|
{
|
||||||
set_image_dimensions( &dstImageInfo, sizes[ idx ][ 0 ], sizes[ idx ][ 1 ], sizes[ idx ][ 2 ], rowPadding, slicePadding );
|
for( size_t j = 0; j < numberOfSizes2DArray; j++ )
|
||||||
set_image_dimensions( &srcImageInfo, (size_t)random_in_range( (int)sizes[ idx ][ 0 ], (int)maxWidth/4, seed ), (size_t)random_in_range( (int)sizes[ idx ][ 1 ], (int)maxHeight/4, seed ), 0, rowPadding, slicePadding );
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 256 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
|
set_image_dimensions( &dstImageInfo, sizes2DArray[ j ][ 0 ], sizes2DArray[ j ][ 1 ], sizes2DArray[ j ][ 2 ], rowPadding, slicePadding );
|
||||||
|
set_image_dimensions( &srcImageInfo, sizes2D[ i ][ 0 ], sizes2D[ i ][ 1 ], 0, rowPadding, slicePadding );
|
||||||
|
|
||||||
cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.arraySize * 4;
|
cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.arraySize * 4;
|
||||||
if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) )
|
cl_ulong srcSize = (cl_ulong)srcImageInfo.rowPitch * (cl_ulong)srcImageInfo.height * 4;
|
||||||
|
|
||||||
|
if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) && srcSize < maxAllocSize && srcSize < ( memSize / 3 ) )
|
||||||
{
|
{
|
||||||
if (reverse)
|
if (reverse)
|
||||||
log_info( "Testing %d x %d x %d to %d x %d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
|
log_info( "Testing %d x %d x %d to %d x %d\n", (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize, (int)srcImageInfo.width, (int)srcImageInfo.height );
|
||||||
@@ -227,7 +241,7 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
log_info("Not testing max size %d x %d to %d x %d x %d due to memory constraints.\n",
|
log_info("Not testing max size %d x %d to %d x %d x %d due to memory constraints.\n",
|
||||||
(int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize);
|
(int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.arraySize);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -235,6 +249,9 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong srcSize, dstSize;
|
cl_ulong srcSize, dstSize;
|
||||||
|
size_t rowPadding = gEnablePitch ? 256 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -138,8 +138,6 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
|
|||||||
image_descriptor srcImageInfo = { 0 };
|
image_descriptor srcImageInfo = { 0 };
|
||||||
image_descriptor dstImageInfo = { 0 };
|
image_descriptor dstImageInfo = { 0 };
|
||||||
RandomSeed seed( gRandomSeed );
|
RandomSeed seed( gRandomSeed );
|
||||||
size_t rowPadding = gEnablePitch ? 256 : 0;
|
|
||||||
size_t slicePadding = gEnablePitch ? 3 : 0;
|
|
||||||
|
|
||||||
srcImageInfo.format = dstImageInfo.format = format;
|
srcImageInfo.format = dstImageInfo.format = format;
|
||||||
|
|
||||||
@@ -152,6 +150,10 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D or 3D size from device" );
|
test_error( error, "Unable to get max image 2D or 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
|
for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
|
||||||
@@ -160,6 +162,9 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
|
|||||||
{
|
{
|
||||||
for( dstImageInfo.depth = 4; dstImageInfo.depth < 9; dstImageInfo.depth++ )
|
for( dstImageInfo.depth = 4; dstImageInfo.depth < 9; dstImageInfo.depth++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 256 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth, rowPadding, slicePadding );
|
set_image_dimensions( &dstImageInfo, dstImageInfo.width, dstImageInfo.height, dstImageInfo.depth, rowPadding, slicePadding );
|
||||||
set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
|
set_image_dimensions( &srcImageInfo, dstImageInfo.width, dstImageInfo.height, 0, rowPadding, slicePadding );
|
||||||
if( gDebugTrace )
|
if( gDebugTrace )
|
||||||
@@ -179,18 +184,27 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
|
|||||||
else if( gTestMaxImages )
|
else if( gTestMaxImages )
|
||||||
{
|
{
|
||||||
// Try a specific set of maximum sizes
|
// Try a specific set of maximum sizes
|
||||||
size_t numbeOfSizes;
|
size_t numberOfSizes3D, numberOfSizes2D;
|
||||||
size_t sizes[100][3];
|
size_t sizes3D[100][3], sizes2D[100][3];
|
||||||
|
|
||||||
// Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
|
// Try to allocate a bit smaller images because we need the 2D ones as well for the copy.
|
||||||
get_max_sizes(&numbeOfSizes, 100, sizes, max3DWidth, max3DHeight, max3DDepth, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, dstImageInfo.format);
|
get_max_sizes(&numberOfSizes3D, 100, sizes3D, max3DWidth, max3DHeight, max3DDepth, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE3D, dstImageInfo.format);
|
||||||
|
get_max_sizes(&numberOfSizes2D, 100, sizes2D, maxWidth, maxHeight, 1, 1, maxAllocSize/2, memSize/2, CL_MEM_OBJECT_IMAGE2D, srcImageInfo.format);
|
||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t i = 0; i < numberOfSizes2D; i++ )
|
||||||
|
for( size_t j = 0; j < numberOfSizes3D; j++ )
|
||||||
{
|
{
|
||||||
set_image_dimensions( &dstImageInfo, sizes[ idx ][ 0 ], sizes[ idx ][ 1 ], sizes[ idx ][ 2 ], rowPadding, slicePadding );
|
size_t rowPadding = gEnablePitch ? 256 : 0;
|
||||||
set_image_dimensions( &srcImageInfo, (size_t)random_in_range( (int)sizes[ idx ][ 0 ], (int)maxWidth/4, seed ), (size_t)random_in_range( (int)sizes[ idx ][ 1 ], (int)maxHeight/4, seed ), 0, rowPadding, slicePadding );
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
|
|
||||||
|
set_image_dimensions( &dstImageInfo, sizes3D[ j ][ 0 ], sizes3D[ j ][ 1 ], sizes3D[ j ][ 2 ], rowPadding, slicePadding );
|
||||||
|
set_image_dimensions( &srcImageInfo, sizes2D[ i ][ 0 ], sizes2D[ i ][ 1 ], 0, rowPadding, slicePadding );
|
||||||
|
|
||||||
cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.depth * 4;
|
cl_ulong dstSize = (cl_ulong)dstImageInfo.slicePitch * (cl_ulong)dstImageInfo.depth * 4;
|
||||||
if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) )
|
cl_ulong srcSize = (cl_ulong)srcImageInfo.rowPitch * (cl_ulong)srcImageInfo.height * 4;
|
||||||
|
|
||||||
|
if( dstSize < maxAllocSize && dstSize < ( memSize / 3 ) && srcSize < maxAllocSize && srcSize < ( memSize / 3 ) )
|
||||||
{
|
{
|
||||||
log_info( "Testing %d x %d to %d x %d x %d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
|
log_info( "Testing %d x %d to %d x %d x %d\n", (int)srcImageInfo.width, (int)srcImageInfo.height, (int)dstImageInfo.width, (int)dstImageInfo.height, (int)dstImageInfo.depth );
|
||||||
if( gDebugTrace )
|
if( gDebugTrace )
|
||||||
@@ -216,6 +230,9 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_image_format *format, boo
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong srcSize, dstSize;
|
cl_ulong srcSize, dstSize;
|
||||||
|
size_t rowPadding = gEnablePitch ? 256 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -42,8 +42,6 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
|
|||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo = { 0 };
|
image_descriptor imageInfo = { 0 };
|
||||||
RandomSeed seed( gRandomSeed );
|
RandomSeed seed( gRandomSeed );
|
||||||
size_t rowPadding = gEnablePitch ? 80 : 0;
|
|
||||||
size_t slicePadding = gEnablePitch ? 3 : 0;
|
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
imageInfo.format = format;
|
imageInfo.format = format;
|
||||||
@@ -57,10 +55,17 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D array size from device" );
|
test_error( error, "Unable to get max image 2D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 80 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
@@ -95,6 +100,9 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
|
|||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 80 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
imageInfo.width = sizes[ idx ][ 0 ];
|
imageInfo.width = sizes[ idx ][ 0 ];
|
||||||
imageInfo.height = sizes[ idx ][ 1 ];
|
imageInfo.height = sizes[ idx ][ 1 ];
|
||||||
imageInfo.arraySize = sizes[ idx ][ 2 ];
|
imageInfo.arraySize = sizes[ idx ][ 2 ];
|
||||||
@@ -108,7 +116,7 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
|
|||||||
} while ((imageInfo.rowPitch % pixelSize) != 0);
|
} while ((imageInfo.rowPitch % pixelSize) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
imageInfo.slicePitch = imageInfo.height * (imageInfo.rowPitch + slicePadding);
|
imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
|
||||||
log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
||||||
if( gDebugTrace )
|
if( gDebugTrace )
|
||||||
log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
||||||
@@ -121,6 +129,9 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_image_format *format )
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
|
size_t rowPadding = gEnablePitch ? 80 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -42,8 +42,6 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
|
|||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo = { 0 };
|
image_descriptor imageInfo = { 0 };
|
||||||
RandomSeed seed( gRandomSeed );
|
RandomSeed seed( gRandomSeed );
|
||||||
size_t rowPadding = gEnablePitch ? 80 : 0;
|
|
||||||
size_t slicePadding = gEnablePitch ? 3 : 0;
|
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
imageInfo.format = format;
|
imageInfo.format = format;
|
||||||
@@ -57,10 +55,17 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 80 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
@@ -94,6 +99,9 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
|
|||||||
|
|
||||||
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
for( size_t idx = 0; idx < numbeOfSizes; idx++ )
|
||||||
{
|
{
|
||||||
|
size_t rowPadding = gEnablePitch ? 80 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
imageInfo.width = sizes[ idx ][ 0 ];
|
imageInfo.width = sizes[ idx ][ 0 ];
|
||||||
imageInfo.height = sizes[ idx ][ 1 ];
|
imageInfo.height = sizes[ idx ][ 1 ];
|
||||||
imageInfo.depth = sizes[ idx ][ 2 ];
|
imageInfo.depth = sizes[ idx ][ 2 ];
|
||||||
@@ -107,7 +115,7 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
|
|||||||
} while ((imageInfo.rowPitch % pixelSize) != 0);
|
} while ((imageInfo.rowPitch % pixelSize) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
imageInfo.slicePitch = imageInfo.height * (imageInfo.rowPitch + slicePadding);
|
imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + slicePadding);
|
||||||
log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
log_info( "Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
||||||
if( gDebugTrace )
|
if( gDebugTrace )
|
||||||
log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
|
||||||
@@ -120,6 +128,9 @@ int test_copy_image_set_3D( cl_device_id device, cl_image_format *format )
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
|
size_t rowPadding = gEnablePitch ? 80 : 0;
|
||||||
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
|
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
|
|||||||
@@ -167,6 +167,10 @@ int test_copy_image_set_3D_2D_array( cl_device_id device, cl_image_format *forma
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D image array or 3D size from device" );
|
test_error( error, "Unable to get max image 2D image array or 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
|
for( dstImageInfo.width = 4; dstImageInfo.width < 17; dstImageInfo.width++ )
|
||||||
|
|||||||
@@ -72,9 +72,10 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
{
|
{
|
||||||
size_t maxWidth;
|
size_t maxWidth;
|
||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo;
|
image_descriptor imageInfo = {0};
|
||||||
RandomSeed seed(gRandomSeed);
|
RandomSeed seed(gRandomSeed);
|
||||||
size_t rowPadding = gEnablePitch ? 48 : 0;
|
size_t rowPadding_default = 48;
|
||||||
|
size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
memset(&imageInfo, 0x0, sizeof(image_descriptor));
|
memset(&imageInfo, 0x0, sizeof(image_descriptor));
|
||||||
@@ -87,6 +88,10 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if ( gTestSmallImages )
|
if ( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
@@ -95,6 +100,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -124,6 +130,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -152,6 +159,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|||||||
@@ -74,9 +74,10 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
{
|
{
|
||||||
size_t maxWidth, maxArraySize;
|
size_t maxWidth, maxArraySize;
|
||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo;
|
image_descriptor imageInfo = {0};
|
||||||
RandomSeed seed(gRandomSeed);
|
RandomSeed seed(gRandomSeed);
|
||||||
size_t rowPadding = gEnablePitch ? 48 : 0;
|
size_t rowPadding_default = 48;
|
||||||
|
size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
memset(&imageInfo, 0x0, sizeof(image_descriptor));
|
memset(&imageInfo, 0x0, sizeof(image_descriptor));
|
||||||
@@ -90,6 +91,10 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 1D array size from device" );
|
test_error( error, "Unable to get max image 1D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if ( gTestSmallImages )
|
if ( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
@@ -98,6 +103,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -132,6 +138,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -162,6 +169,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|||||||
@@ -74,9 +74,10 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
{
|
{
|
||||||
size_t maxWidth, maxHeight;
|
size_t maxWidth, maxHeight;
|
||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo;
|
image_descriptor imageInfo = {0};
|
||||||
RandomSeed seed(gRandomSeed);
|
RandomSeed seed(gRandomSeed);
|
||||||
size_t rowPadding = gEnablePitch ? 48 : 0;
|
size_t rowPadding_default = 48;
|
||||||
|
size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
memset(&imageInfo, 0x0, sizeof(image_descriptor));
|
memset(&imageInfo, 0x0, sizeof(image_descriptor));
|
||||||
@@ -90,6 +91,10 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if ( gTestSmallImages )
|
if ( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
@@ -98,6 +103,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -131,6 +137,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -160,6 +167,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|||||||
@@ -75,9 +75,10 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
{
|
{
|
||||||
size_t maxWidth, maxHeight, maxArraySize;
|
size_t maxWidth, maxHeight, maxArraySize;
|
||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo;
|
image_descriptor imageInfo = {0};
|
||||||
RandomSeed seed( gRandomSeed );
|
RandomSeed seed( gRandomSeed );
|
||||||
size_t rowPadding = gEnablePitch ? 80 : 0;
|
size_t rowPadding_default = 80;
|
||||||
|
size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
|
||||||
size_t slicePadding = gEnablePitch ? 3 : 0;
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
@@ -93,6 +94,10 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D array size from device" );
|
test_error( error, "Unable to get max image 2D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if ( gTestSmallImages )
|
if ( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
@@ -101,6 +106,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -137,6 +143,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -169,6 +176,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|||||||
@@ -75,9 +75,10 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
{
|
{
|
||||||
size_t maxWidth, maxHeight, maxDepth;
|
size_t maxWidth, maxHeight, maxDepth;
|
||||||
cl_ulong maxAllocSize, memSize;
|
cl_ulong maxAllocSize, memSize;
|
||||||
image_descriptor imageInfo;
|
image_descriptor imageInfo = {0};
|
||||||
RandomSeed seed( gRandomSeed );
|
RandomSeed seed( gRandomSeed );
|
||||||
size_t rowPadding = gEnablePitch ? 80 : 0;
|
size_t rowPadding_default = 80;
|
||||||
|
size_t rowPadding = gEnablePitch ? rowPadding_default : 0;
|
||||||
size_t slicePadding = gEnablePitch ? 3 : 0;
|
size_t slicePadding = gEnablePitch ? 3 : 0;
|
||||||
size_t pixelSize;
|
size_t pixelSize;
|
||||||
|
|
||||||
@@ -93,6 +94,10 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if ( gTestSmallImages )
|
if ( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for ( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
@@ -101,6 +106,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -138,6 +144,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
@@ -169,6 +176,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_image_format *format, Explic
|
|||||||
|
|
||||||
if (gEnablePitch)
|
if (gEnablePitch)
|
||||||
{
|
{
|
||||||
|
rowPadding = rowPadding_default;
|
||||||
do {
|
do {
|
||||||
rowPadding++;
|
rowPadding++;
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
imageInfo.rowPitch = imageInfo.width * pixelSize + rowPadding;
|
||||||
|
|||||||
@@ -49,6 +49,10 @@ int test_get_image_info_1D( cl_device_id device, cl_image_format *format, cl_mem
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 1D size from device" );
|
test_error( error, "Unable to get max image 1D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -47,6 +47,10 @@ int test_get_image_info_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 1D array size from device" );
|
test_error( error, "Unable to get max image 1D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -291,6 +291,10 @@ int test_get_image_info_2D( cl_device_id device, cl_image_format *format, cl_mem
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D width or max image 3D height or max memory allocation size or global memory size from device" );
|
test_error( error, "Unable to get max image 2D width or max image 3D height or max memory allocation size or global memory size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -50,6 +50,10 @@ int test_get_image_info_3D( cl_device_id device, cl_image_format *format, cl_mem
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -138,6 +138,10 @@ int test_read_image_set_1D( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -144,6 +144,10 @@ int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -144,6 +144,10 @@ int test_read_image_set_2D( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -118,6 +118,10 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -118,6 +118,10 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format )
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
if( gTestSmallImages )
|
if( gTestSmallImages )
|
||||||
{
|
{
|
||||||
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
|
||||||
|
|||||||
@@ -136,7 +136,7 @@ AddressFn ClampToEdgeLinearFn
|
|||||||
volatile float gFloatHome;
|
volatile float gFloatHome;
|
||||||
float RepeatNormalizedAddressFn( float fValue, size_t maxValue )
|
float RepeatNormalizedAddressFn( float fValue, size_t maxValue )
|
||||||
{
|
{
|
||||||
#ifndef _MSC_VER // Use original if not the VS compiler.
|
#if !defined( __i386__ ) && !defined( __x86_64__ ) // Use original if not the x86 compiler.
|
||||||
// General computation for repeat
|
// General computation for repeat
|
||||||
return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
|
return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
|
||||||
#else // Otherwise, use this instead:
|
#else // Otherwise, use this instead:
|
||||||
@@ -352,9 +352,14 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
|
|||||||
int x1_dim = (fixed_dim == 2) ? 1 : 2;
|
int x1_dim = (fixed_dim == 2) ? 1 : 2;
|
||||||
|
|
||||||
// Choose two other sizes for these dimensions
|
// Choose two other sizes for these dimensions
|
||||||
double x1 = fmin(fmin(A/M,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
|
|
||||||
double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
|
double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
|
||||||
|
// GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
|
||||||
|
// testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
|
||||||
|
// it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
|
||||||
|
// width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
|
||||||
|
if(x0_dim == 0 && x0 < 16)
|
||||||
|
x0 = 16;
|
||||||
|
double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);
|
||||||
// Store the size
|
// Store the size
|
||||||
sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
|
sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
|
||||||
sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
|
sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
|
||||||
@@ -384,13 +389,6 @@ void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int issubnormal(float a)
|
|
||||||
{
|
|
||||||
union { cl_int i; cl_float f; } u;
|
|
||||||
u.f = a;
|
|
||||||
return (u.i & 0x7f800000U) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
|
float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
|
||||||
if (sampler->filter_mode == CL_FILTER_NEAREST)
|
if (sampler->filter_mode == CL_FILTER_NEAREST)
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
@@ -1254,15 +1252,11 @@ bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset,
|
|||||||
case CL_ADDRESS_REPEAT:
|
case CL_ADDRESS_REPEAT:
|
||||||
x = RepeatNormalizedAddressFn( x, width );
|
x = RepeatNormalizedAddressFn( x, width );
|
||||||
if (height != 0) {
|
if (height != 0) {
|
||||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
||||||
y *= (float)height+yAddressOffset;
|
|
||||||
else
|
|
||||||
y = RepeatNormalizedAddressFn( y, height );
|
y = RepeatNormalizedAddressFn( y, height );
|
||||||
}
|
}
|
||||||
if (depth != 0) {
|
if (depth != 0) {
|
||||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
||||||
z *= (float)depth+zAddressOffset;
|
|
||||||
else
|
|
||||||
z = RepeatNormalizedAddressFn( z, depth );
|
z = RepeatNormalizedAddressFn( z, depth );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1298,15 +1292,11 @@ bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset,
|
|||||||
case CL_ADDRESS_MIRRORED_REPEAT:
|
case CL_ADDRESS_MIRRORED_REPEAT:
|
||||||
x = MirroredRepeatNormalizedAddressFn( x, width );
|
x = MirroredRepeatNormalizedAddressFn( x, width );
|
||||||
if (height != 0) {
|
if (height != 0) {
|
||||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
||||||
y *= (float)height+yAddressOffset;
|
|
||||||
else
|
|
||||||
y = MirroredRepeatNormalizedAddressFn( y, height );
|
y = MirroredRepeatNormalizedAddressFn( y, height );
|
||||||
}
|
}
|
||||||
if (depth != 0) {
|
if (depth != 0) {
|
||||||
if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
||||||
z *= (float)depth+zAddressOffset;
|
|
||||||
else
|
|
||||||
z = MirroredRepeatNormalizedAddressFn( z, depth );
|
z = MirroredRepeatNormalizedAddressFn( z, depth );
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1334,9 +1324,11 @@ bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset,
|
|||||||
default:
|
default:
|
||||||
// Also, remultiply to the original coords. This simulates any truncation in
|
// Also, remultiply to the original coords. This simulates any truncation in
|
||||||
// the pass to OpenCL
|
// the pass to OpenCL
|
||||||
x *= (float)width+xAddressOffset;
|
x = (x * (float)width) + xAddressOffset;
|
||||||
y *= (float)height+yAddressOffset;
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
|
||||||
z *= (float)depth+zAddressOffset;
|
y = (y * (float)height) + yAddressOffset;
|
||||||
|
if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
|
||||||
|
z = (z * (float)depth) + zAddressOffset;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1522,16 +1514,12 @@ FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *i
|
|||||||
// The image array types require special care:
|
// The image array types require special care:
|
||||||
|
|
||||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||||
y = unnormalize_coordinate("array index", y, yAddressOffset,
|
|
||||||
(float)imageInfo->arraySize, CL_ADDRESS_CLAMP_TO_EDGE, verbose);
|
|
||||||
z = 0; // don't care -- unused for 1D arrays
|
z = 0; // don't care -- unused for 1D arrays
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||||
y = unnormalize_coordinate("y", y, yAddressOffset, (float)imageInfo->height,
|
y = unnormalize_coordinate("y", y, yAddressOffset, (float)imageInfo->height,
|
||||||
imageSampler->addressing_mode, verbose);
|
imageSampler->addressing_mode, verbose);
|
||||||
z = unnormalize_coordinate("array index", z, zAddressOffset,
|
|
||||||
(float)imageInfo->arraySize, CL_ADDRESS_CLAMP_TO_EDGE, verbose);
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Everybody else:
|
// Everybody else:
|
||||||
|
|||||||
@@ -491,7 +491,6 @@ extern char *create_random_image_data( ExplicitType dataType, image_descriptor *
|
|||||||
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
|
extern void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine );
|
||||||
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
|
extern float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler);
|
||||||
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
|
extern float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter );
|
||||||
extern int issubnormal(float);
|
|
||||||
|
|
||||||
|
|
||||||
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
|
#define errMax( _x , _y ) ( (_x) != (_x) ? (_x) : (_x) > (_y) ? (_x) : (_y) )
|
||||||
|
|||||||
@@ -748,6 +748,10 @@ int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( outputType == kInt )
|
if( outputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
|
|||||||
@@ -855,6 +855,10 @@ int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D array size from device" );
|
test_error( error, "Unable to get max image 2D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( outputType == kInt )
|
if( outputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
|
|||||||
@@ -31,6 +31,28 @@ extern cl_mem_flags gMemFlagsToUse;
|
|||||||
#define MAX_TRIES 1
|
#define MAX_TRIES 1
|
||||||
#define MAX_CLAMPED 1
|
#define MAX_CLAMPED 1
|
||||||
|
|
||||||
|
// Utility function to clamp down image sizes for certain tests to avoid
|
||||||
|
// using too much memory.
|
||||||
|
static size_t reduceImageSizeRange(size_t maxDimSize) {
|
||||||
|
size_t DimSize = maxDimSize/128;
|
||||||
|
if (DimSize < (size_t) 16)
|
||||||
|
return 16;
|
||||||
|
else if (DimSize > (size_t) 64)
|
||||||
|
return 64;
|
||||||
|
else
|
||||||
|
return DimSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t reduceImageDepth(size_t maxDepth) {
|
||||||
|
size_t Depth = maxDepth/32;
|
||||||
|
if (Depth < (size_t) 8)
|
||||||
|
return 8;
|
||||||
|
else if (Depth > (size_t) 32)
|
||||||
|
return 32;
|
||||||
|
else
|
||||||
|
return Depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const char *read2DArrayKernelSourcePattern =
|
const char *read2DArrayKernelSourcePattern =
|
||||||
"__kernel void sample_kernel( read_only image2d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results )\n"
|
"__kernel void sample_kernel( read_only image2d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results )\n"
|
||||||
@@ -834,6 +856,10 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( outputType == kInt )
|
if( outputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
@@ -921,6 +947,9 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
|
||||||
|
int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
|
||||||
|
int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
|
||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
@@ -928,9 +957,9 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 128, seed );
|
imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
|
||||||
imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 128, seed );
|
imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
|
||||||
imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, seed );
|
imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, seed );
|
||||||
|
|
||||||
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
imageInfo.rowPitch = imageInfo.width * pixelSize;
|
||||||
imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
|
imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
|
||||||
|
|||||||
@@ -31,6 +31,24 @@ extern cl_mem_flags gMemFlagsToUse;
|
|||||||
#define MAX_TRIES 1
|
#define MAX_TRIES 1
|
||||||
#define MAX_CLAMPED 1
|
#define MAX_CLAMPED 1
|
||||||
|
|
||||||
|
// Utility function to clamp down image sizes for certain tests to avoid
|
||||||
|
// using too much memory.
|
||||||
|
static size_t reduceImageSizeRange(size_t maxDimSize, RandomSeed& seed) {
|
||||||
|
size_t DimSize = random_log_in_range(16, (int) maxDimSize/32, seed);
|
||||||
|
if (DimSize > (size_t) 128)
|
||||||
|
return 128;
|
||||||
|
else
|
||||||
|
return DimSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t reduceImageDepth(size_t maxDimSize, RandomSeed& seed) {
|
||||||
|
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
|
||||||
|
if (DimSize > (size_t) 32)
|
||||||
|
return 32;
|
||||||
|
else
|
||||||
|
return DimSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const char *read3DKernelSourcePattern =
|
const char *read3DKernelSourcePattern =
|
||||||
"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results )\n"
|
"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets, __global %s4 *results )\n"
|
||||||
@@ -837,6 +855,10 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( outputType == kInt )
|
if( outputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
@@ -931,9 +953,9 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_
|
|||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, seed );
|
imageInfo.width = reduceImageSizeRange(maxWidth, seed );
|
||||||
imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, seed );
|
imageInfo.height = reduceImageSizeRange(maxHeight, seed );
|
||||||
imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, seed );
|
imageInfo.depth = reduceImageDepth(maxDepth, seed );
|
||||||
|
|
||||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||||
imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
|
imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
|
||||||
|
|||||||
@@ -413,6 +413,10 @@ int test_write_image_1D_set( cl_device_id device, cl_image_format *format, Expli
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( inputType == kInt )
|
if( inputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
|
|||||||
@@ -422,6 +422,10 @@ int test_write_image_1D_array_set( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( inputType == kInt )
|
if( inputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
|
|||||||
@@ -30,6 +30,28 @@ extern cl_mem_flags gMemFlagsToUse;
|
|||||||
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
|
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
|
||||||
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
|
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
|
||||||
|
|
||||||
|
// Utility function to clamp down image sizes for certain tests to avoid
|
||||||
|
// using too much memory.
|
||||||
|
static size_t reduceImageSizeRange(size_t maxDimSize) {
|
||||||
|
size_t DimSize = maxDimSize/32;
|
||||||
|
if (DimSize < (size_t) 16)
|
||||||
|
return 16;
|
||||||
|
else if (DimSize > (size_t) 128)
|
||||||
|
return 128;
|
||||||
|
else
|
||||||
|
return DimSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t reduceImageDepth(size_t maxDepth) {
|
||||||
|
size_t Depth = maxDepth/32;
|
||||||
|
if (Depth < (size_t) 8)
|
||||||
|
return 8;
|
||||||
|
else if (Depth > (size_t) 32)
|
||||||
|
return 32;
|
||||||
|
else
|
||||||
|
return Depth;
|
||||||
|
}
|
||||||
|
|
||||||
const char *write2DArrayKernelSourcePattern =
|
const char *write2DArrayKernelSourcePattern =
|
||||||
"__kernel void sample_kernel( __global %s4 *input, write_only image2d_array_t output )\n"
|
"__kernel void sample_kernel( __global %s4 *input, write_only image2d_array_t output )\n"
|
||||||
"{\n"
|
"{\n"
|
||||||
@@ -398,6 +420,10 @@ int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( inputType == kInt )
|
if( inputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
@@ -472,13 +498,16 @@ int test_write_image_2D_array_set( cl_device_id device, cl_image_format *format,
|
|||||||
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
|
||||||
{
|
{
|
||||||
cl_ulong size;
|
cl_ulong size;
|
||||||
|
int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
|
||||||
|
int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
|
||||||
|
int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
|
||||||
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
// Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
|
||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
|
imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, d );
|
||||||
imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
|
imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, d );
|
||||||
imageInfo.arraySize = (size_t)random_log_in_range( 16, (int)maxArraySize / 32, d );
|
imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, d );
|
||||||
|
|
||||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||||
|
|||||||
@@ -30,6 +30,24 @@ extern cl_mem_flags gMemFlagsToUse;
|
|||||||
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
|
extern int verify_write_results( size_t &i, int &numTries, int &totalErrors, char *&imagePtr, void *resultValues, size_t y, size_t z,
|
||||||
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
|
ExplicitType inputType, image_descriptor *imageInfo, bool verifyRounding );
|
||||||
|
|
||||||
|
// Utility function to clamp down image sizes for certain tests to avoid
|
||||||
|
// using too much memory.
|
||||||
|
static size_t reduceImageSizeRange(size_t maxDimSize, MTdata& seed) {
|
||||||
|
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
|
||||||
|
if (DimSize > (size_t) 128)
|
||||||
|
return 128;
|
||||||
|
else
|
||||||
|
return DimSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
|
||||||
|
size_t DimSize = random_log_in_range(8, (int) maxDimSize/32, seed);
|
||||||
|
if (DimSize > (size_t) 32)
|
||||||
|
return 32;
|
||||||
|
else
|
||||||
|
return DimSize;
|
||||||
|
}
|
||||||
|
|
||||||
const char *write3DKernelSourcePattern =
|
const char *write3DKernelSourcePattern =
|
||||||
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
|
"#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
|
||||||
"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output )\n"
|
"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output )\n"
|
||||||
@@ -397,6 +415,10 @@ int test_write_image_3D_set( cl_device_id device, cl_image_format *format, Expli
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( inputType == kInt )
|
if( inputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
@@ -475,9 +497,9 @@ int test_write_image_3D_set( cl_device_id device, cl_image_format *format, Expli
|
|||||||
// image, the result array, plus offset arrays, will fit in the global ram space
|
// image, the result array, plus offset arrays, will fit in the global ram space
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
imageInfo.width = (size_t)random_log_in_range( 16, (int)maxWidth / 32, d );
|
imageInfo.width = reduceImageSizeRange(maxWidth, d );
|
||||||
imageInfo.height = (size_t)random_log_in_range( 16, (int)maxHeight / 32, d );
|
imageInfo.height = reduceImageSizeRange(maxHeight, d );
|
||||||
imageInfo.depth = (size_t)random_log_in_range( 16, (int)maxDepth / 32, d );
|
imageInfo.depth = reduceImageDepth(maxDepth, d );
|
||||||
|
|
||||||
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
imageInfo.rowPitch = imageInfo.width * get_pixel_size( imageInfo.format );
|
||||||
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
|
||||||
|
|||||||
@@ -422,6 +422,10 @@ int test_write_image_set( cl_device_id device, cl_image_format *format, Explicit
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if( inputType == kInt )
|
if( inputType == kInt )
|
||||||
readFormat = "i";
|
readFormat = "i";
|
||||||
|
|||||||
@@ -166,6 +166,10 @@ int test_read_image_set_1D( cl_device_id device, cl_image_format *format, image_
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 1D size from device" );
|
test_error( error, "Unable to get max image 1D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if ( outputType == kInt )
|
if ( outputType == kInt )
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -172,6 +172,10 @@ int test_read_image_set_1D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D size from device" );
|
test_error( error, "Unable to get max image 2D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if ( outputType == kInt )
|
if ( outputType == kInt )
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -185,6 +185,11 @@ int test_read_image_set_1D_buffer( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth1D, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth1D, NULL );
|
||||||
test_error( error, "Unable to get max image 1D buffer size from device" );
|
test_error( error, "Unable to get max image 1D buffer size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// note: image_buffer test uses image1D for results validation.
|
// note: image_buffer test uses image1D for results validation.
|
||||||
// So the test can't use the biggest possible size for image_buffer if it's bigger than the max image1D size
|
// So the test can't use the biggest possible size for image_buffer if it's bigger than the max image1D size
|
||||||
maxWidth = (maxWidth > maxWidth1D) ? maxWidth1D : maxWidth;
|
maxWidth = (maxWidth > maxWidth1D) ? maxWidth1D : maxWidth;
|
||||||
|
|||||||
@@ -156,6 +156,10 @@ int test_read_image_set_2D_array( cl_device_id device, cl_image_format *format,
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 2D array size from device" );
|
test_error( error, "Unable to get max image 2D array size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if ( outputType == kInt )
|
if ( outputType == kInt )
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -161,6 +161,10 @@ int test_read_image_set_3D( cl_device_id device, cl_image_format *format, image_
|
|||||||
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
|
||||||
test_error( error, "Unable to get max image 3D size from device" );
|
test_error( error, "Unable to get max image 3D size from device" );
|
||||||
|
|
||||||
|
if (memSize > (cl_ulong)SIZE_MAX) {
|
||||||
|
memSize = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
// Determine types
|
// Determine types
|
||||||
if ( outputType == kInt )
|
if ( outputType == kInt )
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -477,10 +477,42 @@ test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_
|
|||||||
{
|
{
|
||||||
globalThreadData * pThreadInfo = NULL;
|
globalThreadData * pThreadInfo = NULL;
|
||||||
cl_int result=0;
|
cl_int result=0;
|
||||||
|
cl_uint threadcount = GetThreadCount();
|
||||||
|
|
||||||
|
// This test will run threadcount threads concurrently; each thread will
|
||||||
|
// execute test_integer_ops() which will allocate 2 OpenCL buffers on the
|
||||||
|
// device; each buffer has size num_elements * type_size * vectorSize. We
|
||||||
|
// need to make sure that the total device memory allocated by all threads
|
||||||
|
// does not exceed the maximum memory on the device. If it does, we decrease
|
||||||
|
// num_elements until all threads combined will not over-subscribe device
|
||||||
|
// memory.
|
||||||
|
cl_ulong maxDeviceGlobalMem;
|
||||||
|
result =
|
||||||
|
clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
|
||||||
|
sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
|
||||||
|
if (result != CL_SUCCESS) {
|
||||||
|
log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n",
|
||||||
|
result);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
|
||||||
|
maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let's not take all device memory - reduce by 75%
|
||||||
|
maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
|
||||||
|
// Now reduce num_elements so that the total device memory usage does not
|
||||||
|
// exceed 75% of global device memory.
|
||||||
|
size_t type_size = get_explicit_type_size(type);
|
||||||
|
while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize >
|
||||||
|
maxDeviceGlobalMem) {
|
||||||
|
num_elements >>= 1;
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t startIndx = (uint64_t)0;
|
uint64_t startIndx = (uint64_t)0;
|
||||||
uint64_t endIndx = (1ULL<<num_runs_shift);
|
uint64_t endIndx = (1ULL<<num_runs_shift);
|
||||||
uint64_t jobcount = (endIndx-startIndx)/num_elements;
|
uint64_t jobcount = (endIndx-startIndx)/num_elements;
|
||||||
cl_uint threadcount = GetThreadCount();
|
|
||||||
|
|
||||||
if(jobcount==0)
|
if(jobcount==0)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -590,6 +590,8 @@ static void PrintArch( void )
|
|||||||
vlog( "\tARCH:\tx86_64\n" );
|
vlog( "\tARCH:\tx86_64\n" );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ )
|
||||||
vlog( "\tARCH:\tarm\n" );
|
vlog( "\tARCH:\tarm\n" );
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
vlog( "\tARCH:\taarch64\n" );
|
||||||
#else
|
#else
|
||||||
vlog( "\tARCH:\tunknown\n" );
|
vlog( "\tARCH:\tunknown\n" );
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1763,7 +1763,7 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
|
|||||||
// *signgamp = 1;
|
// *signgamp = 1;
|
||||||
ix = hx&0x7fffffff;
|
ix = hx&0x7fffffff;
|
||||||
if(ix>=0x7ff00000) return x*x;
|
if(ix>=0x7ff00000) return x*x;
|
||||||
if((ix|lx)==0) return one/zero;
|
if((ix|lx)==0) return INFINITY;
|
||||||
if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */
|
if(ix<0x3b900000) { /* |x|<2**-70, return -log(|x|) */
|
||||||
if(hx<0) {
|
if(hx<0) {
|
||||||
// *signgamp = -1;
|
// *signgamp = -1;
|
||||||
@@ -1772,9 +1772,10 @@ static const double //two52 = 4.50359962737049600000e+15, /* 0x43300000, 0x00000
|
|||||||
}
|
}
|
||||||
if(hx<0) {
|
if(hx<0) {
|
||||||
if(ix>=0x43300000) /* |x|>=2**52, must be -integer */
|
if(ix>=0x43300000) /* |x|>=2**52, must be -integer */
|
||||||
return one/zero;
|
return INFINITY;
|
||||||
t = reference_sinpi(x);
|
t = reference_sinpi(x);
|
||||||
if(t==zero) return one/zero; /* -integer */
|
if(t==zero)
|
||||||
|
return INFINITY; /* -integer */
|
||||||
nadj = reference_log(pi/reference_fabs(t*x));
|
nadj = reference_log(pi/reference_fabs(t*x));
|
||||||
// if(t<zero) *signgamp = -1;
|
// if(t<zero) *signgamp = -1;
|
||||||
x = -x;
|
x = -x;
|
||||||
@@ -5414,5 +5415,3 @@ int reference_notl( long double x )
|
|||||||
int r = !x;
|
int r = !x;
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
0
test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
Normal file → Executable file
0
test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
Normal file → Executable file
@@ -1,3 +1,5 @@
|
|||||||
|
add_compile_options(-std=c++11)
|
||||||
|
|
||||||
add_executable(conformance_test_printf
|
add_executable(conformance_test_printf
|
||||||
test_printf.c
|
test_printf.c
|
||||||
util_printf.c
|
util_printf.c
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#if ! defined( _WIN32)
|
#if ! defined( _WIN32)
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
@@ -349,9 +350,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
|
|||||||
//-----------------------------------------
|
//-----------------------------------------
|
||||||
static bool isLongSupported(cl_device_id device_id)
|
static bool isLongSupported(cl_device_id device_id)
|
||||||
{
|
{
|
||||||
//profile type && device extention for long support checking
|
|
||||||
char *profileType = NULL,*devExt = NULL;
|
|
||||||
|
|
||||||
size_t tempSize = 0;
|
size_t tempSize = 0;
|
||||||
cl_int status;
|
cl_int status;
|
||||||
bool extSupport = true;
|
bool extSupport = true;
|
||||||
@@ -370,7 +368,7 @@ static bool isLongSupported(cl_device_id device_id)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
profileType = new char[tempSize];
|
std::unique_ptr<char[]> profileType(new char[tempSize]);
|
||||||
if(profileType == NULL)
|
if(profileType == NULL)
|
||||||
{
|
{
|
||||||
log_error("Failed to allocate memory(profileType)");
|
log_error("Failed to allocate memory(profileType)");
|
||||||
@@ -381,11 +379,11 @@ static bool isLongSupported(cl_device_id device_id)
|
|||||||
device_id,
|
device_id,
|
||||||
CL_DEVICE_PROFILE,
|
CL_DEVICE_PROFILE,
|
||||||
sizeof(char) * tempSize,
|
sizeof(char) * tempSize,
|
||||||
profileType,
|
profileType.get(),
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
|
|
||||||
if(!strcmp("EMBEDDED_PROFILE",profileType))
|
if(!strcmp("EMBEDDED_PROFILE",profileType.get()))
|
||||||
{
|
{
|
||||||
// Device extention
|
// Device extention
|
||||||
status = clGetDeviceInfo(
|
status = clGetDeviceInfo(
|
||||||
@@ -401,7 +399,7 @@ static bool isLongSupported(cl_device_id device_id)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
devExt = new char[tempSize];
|
std::unique_ptr<char[]> devExt(new char[tempSize]);
|
||||||
if(devExt == NULL)
|
if(devExt == NULL)
|
||||||
{
|
{
|
||||||
log_error("Failed to allocate memory(devExt)");
|
log_error("Failed to allocate memory(devExt)");
|
||||||
@@ -412,16 +410,14 @@ static bool isLongSupported(cl_device_id device_id)
|
|||||||
device_id,
|
device_id,
|
||||||
CL_DEVICE_EXTENSIONS,
|
CL_DEVICE_EXTENSIONS,
|
||||||
sizeof(char) * tempSize,
|
sizeof(char) * tempSize,
|
||||||
devExt,
|
devExt.get(),
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
extSupport = (strstr(devExt,"cles_khr_int64") != NULL);
|
extSupport = (strstr(devExt.get(),"cles_khr_int64") != NULL);
|
||||||
|
|
||||||
delete devExt;
|
|
||||||
delete profileType;
|
|
||||||
}
|
}
|
||||||
return extSupport;
|
return extSupport;
|
||||||
}
|
}
|
||||||
|
|
||||||
//-----------------------------------------
|
//-----------------------------------------
|
||||||
// is64bAddressSpace
|
// is64bAddressSpace
|
||||||
//-----------------------------------------
|
//-----------------------------------------
|
||||||
@@ -455,7 +451,9 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
|
|||||||
int err;
|
int err;
|
||||||
cl_program program;
|
cl_program program;
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
cl_mem d_out;
|
cl_mem d_out, d_a;
|
||||||
|
int has_d_out = 0;
|
||||||
|
int has_d_a = 0;
|
||||||
char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
|
char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
|
||||||
|
|
||||||
// Define an index space (global work size) of threads for execution.
|
// Define an index space (global work size) of threads for execution.
|
||||||
@@ -474,12 +472,13 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
|
|||||||
if(isKernelArgument(allTestCase[testId],testNum))
|
if(isKernelArgument(allTestCase[testId],testNum))
|
||||||
{
|
{
|
||||||
int a = 2;
|
int a = 2;
|
||||||
cl_mem d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
|
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
|
||||||
sizeof(int), &a, &err);
|
sizeof(int), &a, &err);
|
||||||
if(err!= CL_SUCCESS || d_a == NULL) {
|
if(err!= CL_SUCCESS || d_a == NULL) {
|
||||||
log_error("clCreateBuffer failed\n");
|
log_error("clCreateBuffer failed\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
has_d_a = 1;
|
||||||
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
|
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
|
||||||
if(err!= CL_SUCCESS) {
|
if(err!= CL_SUCCESS) {
|
||||||
log_error("clSetKernelArg failed\n");
|
log_error("clSetKernelArg failed\n");
|
||||||
@@ -490,11 +489,12 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
|
|||||||
if(isKernelPFormat(allTestCase[testId],testNum))
|
if(isKernelPFormat(allTestCase[testId],testNum))
|
||||||
{
|
{
|
||||||
d_out = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
d_out = clCreateBuffer(context, CL_MEM_READ_WRITE,
|
||||||
sizeof(long), NULL, &err);
|
sizeof(cl_long), NULL, &err);
|
||||||
if(err!= CL_SUCCESS || d_out == NULL) {
|
if(err!= CL_SUCCESS || d_out == NULL) {
|
||||||
log_error("clCreateBuffer failed\n");
|
log_error("clCreateBuffer failed\n");
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
has_d_out = 1;
|
||||||
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_out);
|
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_out);
|
||||||
if(err!= CL_SUCCESS) {
|
if(err!= CL_SUCCESS) {
|
||||||
log_error("clSetKernelArg failed\n");
|
log_error("clSetKernelArg failed\n");
|
||||||
@@ -562,6 +562,12 @@ static int doTest(cl_command_queue queue, cl_context context, const unsigned int
|
|||||||
err = ++s_test_fail;
|
err = ++s_test_fail;
|
||||||
}
|
}
|
||||||
exit:
|
exit:
|
||||||
|
if(has_d_out)
|
||||||
|
if(clReleaseMemObject(d_out) != CL_SUCCESS)
|
||||||
|
log_error("clReleaseMemObject failed\n");
|
||||||
|
if(has_d_a)
|
||||||
|
if(clReleaseMemObject(d_a) != CL_SUCCESS)
|
||||||
|
log_error("clReleaseMemObject failed\n");
|
||||||
if(clReleaseKernel(kernel) != CL_SUCCESS)
|
if(clReleaseKernel(kernel) != CL_SUCCESS)
|
||||||
log_error("clReleaseKernel failed\n");
|
log_error("clReleaseKernel failed\n");
|
||||||
if(clReleaseProgram(program) != CL_SUCCESS)
|
if(clReleaseProgram(program) != CL_SUCCESS)
|
||||||
@@ -598,6 +604,8 @@ static void printArch( void )
|
|||||||
log_info( "ARCH:\tx86_64\n" );
|
log_info( "ARCH:\tx86_64\n" );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ )
|
||||||
log_info( "ARCH:\tarm\n" );
|
log_info( "ARCH:\tarm\n" );
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
vlog( "\tARCH:\taarch64\n" );
|
||||||
#else
|
#else
|
||||||
#error unknown arch
|
#error unknown arch
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -618,11 +618,6 @@ struct printDataGenParameters printStringGenParameters[] = {
|
|||||||
//%% specification
|
//%% specification
|
||||||
|
|
||||||
{"%s","\"%%\""},
|
{"%s","\"%%\""},
|
||||||
|
|
||||||
//null string
|
|
||||||
|
|
||||||
{"%s","(void*)0"}
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//---------------------------------------------------------
|
//---------------------------------------------------------
|
||||||
@@ -638,9 +633,6 @@ const char * correctBufferString[] = {
|
|||||||
"f",
|
"f",
|
||||||
|
|
||||||
"%%",
|
"%%",
|
||||||
|
|
||||||
"(null)"
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//---------------------------------------------------------
|
//---------------------------------------------------------
|
||||||
@@ -865,10 +857,8 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
|
|||||||
char* eCorrectBuffer = strstr((char*)pTestCase->_correctBuffer[testId],correctExp);
|
char* eCorrectBuffer = strstr((char*)pTestCase->_correctBuffer[testId],correctExp);
|
||||||
if(eCorrectBuffer == NULL)
|
if(eCorrectBuffer == NULL)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
eCorrectBuffer+=2;
|
eCorrectBuffer+=2;
|
||||||
exp += 2;
|
exp += 2;
|
||||||
|
|
||||||
//Exponent always contains at least two digits
|
//Exponent always contains at least two digits
|
||||||
if(strlen(exp) < 2)
|
if(strlen(exp) < 2)
|
||||||
return false;
|
return false;
|
||||||
@@ -878,7 +868,10 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
|
|||||||
return strcmp(eCorrectBuffer,exp);
|
return strcmp(eCorrectBuffer,exp);
|
||||||
}
|
}
|
||||||
if(!strcmp(pTestCase->_correctBuffer[testId],"inf"))
|
if(!strcmp(pTestCase->_correctBuffer[testId],"inf"))
|
||||||
return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity");
|
return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf");
|
||||||
|
if(!strcmp(pTestCase->_correctBuffer[testId],"nan") || !strcmp(pTestCase->_correctBuffer[testId],"-nan")) {
|
||||||
|
return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)");
|
||||||
|
}
|
||||||
return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]);
|
return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
0
test_conformance/run_conformance.py
Normal file → Executable file
0
test_conformance/run_conformance.py
Normal file → Executable file
@@ -5,6 +5,8 @@ add_executable(conformance_test_select
|
|||||||
../../test_common/harness/msvc9.c
|
../../test_common/harness/msvc9.c
|
||||||
../../test_common/harness/kernelHelpers.c
|
../../test_common/harness/kernelHelpers.c
|
||||||
../../test_common/harness/errorHelpers.c
|
../../test_common/harness/errorHelpers.c
|
||||||
|
../../test_common/harness/parseParameters.cpp
|
||||||
|
../../test_common/harness/testHarness.c
|
||||||
)
|
)
|
||||||
|
|
||||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" AND NOT MSVC)
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" AND NOT MSVC)
|
||||||
@@ -12,15 +14,14 @@ set_source_files_properties(
|
|||||||
COMPILE_FLAGS -msse2)
|
COMPILE_FLAGS -msse2)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
set_source_files_properties(
|
set_source_files_properties(
|
||||||
test_select.c
|
test_select.c
|
||||||
util_select.c
|
util_select.c
|
||||||
../../test_common/harness/msvc9.c
|
../../test_common/harness/msvc9.c
|
||||||
../../test_common/harness/kernelHelpers.c
|
../../test_common/harness/kernelHelpers.c
|
||||||
../../test_common/harness/errorHelpers.c
|
../../test_common/harness/errorHelpers.c
|
||||||
|
../../test_common/harness/testHarness.c
|
||||||
PROPERTIES LANGUAGE CXX)
|
PROPERTIES LANGUAGE CXX)
|
||||||
endif(WIN32)
|
|
||||||
|
|
||||||
TARGET_LINK_LIBRARIES(conformance_test_select
|
TARGET_LINK_LIBRARIES(conformance_test_select
|
||||||
${CLConform_LIBRARIES})
|
${CLConform_LIBRARIES})
|
||||||
|
|||||||
@@ -34,14 +34,11 @@
|
|||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include "test_select.h"
|
#include "test_select.h"
|
||||||
|
|
||||||
#if defined(_WIN32)
|
|
||||||
#include "../../test_common/harness/testHarness.h"
|
#include "../../test_common/harness/testHarness.h"
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "../../test_common/harness/kernelHelpers.h"
|
#include "../../test_common/harness/kernelHelpers.h"
|
||||||
#include "../../test_common/harness/mt19937.h"
|
#include "../../test_common/harness/mt19937.h"
|
||||||
cl_uint gRandomSeed = 0;
|
#include "../../test_common/harness/parseParameters.h"
|
||||||
cl_uint gIsEmbedded = 0;
|
|
||||||
|
|
||||||
//-----------------------------------------
|
//-----------------------------------------
|
||||||
// Static functions
|
// Static functions
|
||||||
@@ -79,6 +76,7 @@ static int doTest(cl_command_queue queue, cl_context context,
|
|||||||
// range. Otherwise, we test a subset of the range
|
// range. Otherwise, we test a subset of the range
|
||||||
// [-min_short, min_short]
|
// [-min_short, min_short]
|
||||||
static bool s_wimpy_mode = false;
|
static bool s_wimpy_mode = false;
|
||||||
|
static int s_wimpy_reduction_factor = 256;
|
||||||
|
|
||||||
// Tests are broken into the major test which is based on the
|
// Tests are broken into the major test which is based on the
|
||||||
// src and cmp type and their corresponding vector types and
|
// src and cmp type and their corresponding vector types and
|
||||||
@@ -352,7 +350,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
|
|||||||
|
|
||||||
cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
|
cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
|
||||||
size_t block_elements = BUFFER_SIZE / type_size[stype];
|
size_t block_elements = BUFFER_SIZE / type_size[stype];
|
||||||
size_t step = s_wimpy_mode ? 256 : 1;
|
size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1;
|
||||||
cl_ulong cmp_stride = block_elements * step;
|
cl_ulong cmp_stride = block_elements * step;
|
||||||
|
|
||||||
// It is more efficient to create the tests all at once since we
|
// It is more efficient to create the tests all at once since we
|
||||||
@@ -519,6 +517,7 @@ static void printUsage( void )
|
|||||||
log_info("test_select: [-cghw] [test_name|start_test_num] \n");
|
log_info("test_select: [-cghw] [test_name|start_test_num] \n");
|
||||||
log_info(" default is to run the full test on the default device\n");
|
log_info(" default is to run the full test on the default device\n");
|
||||||
log_info(" -w run in wimpy mode (smoke test)\n");
|
log_info(" -w run in wimpy mode (smoke test)\n");
|
||||||
|
log_info(" -[2^n] Set wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", s_wimpy_reduction_factor);
|
||||||
log_info(" test_name will run only one test of that name\n");
|
log_info(" test_name will run only one test of that name\n");
|
||||||
log_info(" start_test_num will start running from that num\n");
|
log_info(" start_test_num will start running from that num\n");
|
||||||
}
|
}
|
||||||
@@ -539,6 +538,8 @@ static void printArch( void )
|
|||||||
log_info( "ARCH:\tx86_64\n" );
|
log_info( "ARCH:\tx86_64\n" );
|
||||||
#elif defined( __arm__ )
|
#elif defined( __arm__ )
|
||||||
log_info( "ARCH:\tarm\n" );
|
log_info( "ARCH:\tarm\n" );
|
||||||
|
#elif defined( __aarch64__ )
|
||||||
|
log_info( "ARCH:\taarch64\n" );
|
||||||
#else
|
#else
|
||||||
#error unknown arch
|
#error unknown arch
|
||||||
#endif
|
#endif
|
||||||
@@ -554,12 +555,6 @@ static void printArch( void )
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data)
|
|
||||||
{
|
|
||||||
log_info( "%s\n", errinfo );
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//-----------------------------------------
|
//-----------------------------------------
|
||||||
// main
|
// main
|
||||||
//-----------------------------------------
|
//-----------------------------------------
|
||||||
@@ -620,6 +615,9 @@ int main(int argc, char* argv[]) {
|
|||||||
case 'w': // Wimpy mode
|
case 'w': // Wimpy mode
|
||||||
s_wimpy_mode = true;
|
s_wimpy_mode = true;
|
||||||
break;
|
break;
|
||||||
|
case '[':
|
||||||
|
parseWimpyReductionFactor(arg, s_wimpy_reduction_factor);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
|
log_error( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
|
||||||
printUsage();
|
printUsage();
|
||||||
@@ -694,6 +692,7 @@ int main(int argc, char* argv[]) {
|
|||||||
log_info("*** WARNING: Testing in Wimpy mode! ***\n");
|
log_info("*** WARNING: Testing in Wimpy mode! ***\n");
|
||||||
log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n");
|
log_info("*** Wimpy mode is not sufficient to verify correctness. ***\n");
|
||||||
log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
|
log_info("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
|
||||||
|
log_info("*** Wimpy Reduction Factor: %-27u ***\n\n", s_wimpy_reduction_factor);
|
||||||
}
|
}
|
||||||
|
|
||||||
cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL);
|
cl_context context = clCreateContext(NULL, 1, &device_id, notify_callback, NULL, NULL);
|
||||||
|
|||||||
94
test_conformance/spir/CMakeLists.txt
Normal file
94
test_conformance/spir/CMakeLists.txt
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
function (install_spir_artifacts suite_name)
|
||||||
|
install(FILES "${suite_name}.zip" DESTINATION "${CLConf_OUT_DIR}"
|
||||||
|
COMPONENT OpenCLCTS)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
add_executable(
|
||||||
|
conformance_test_spir
|
||||||
|
main.cpp
|
||||||
|
datagen.cpp
|
||||||
|
run_build_test.cpp
|
||||||
|
run_services.cpp
|
||||||
|
kernelargs.cpp
|
||||||
|
../../test_common/harness/errorHelpers.c
|
||||||
|
../../test_common/harness/kernelHelpers.c
|
||||||
|
../../test_common/harness/mt19937.c
|
||||||
|
../../test_common/harness/msvc9.c
|
||||||
|
../../test_common/harness/os_helpers.cpp
|
||||||
|
../../test_common/harness/testHarness.c
|
||||||
|
../../test_common/miniz/miniz.c)
|
||||||
|
|
||||||
|
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)" AND NOT MSVC)
|
||||||
|
set_source_files_properties(
|
||||||
|
COMPILE_FLAGS -msse2)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(UNIX)
|
||||||
|
set_target_properties(conformance_test_spir PROPERTIES
|
||||||
|
COMPILE_FLAGS "-fexceptions -frtti")
|
||||||
|
elseif(MSVC)
|
||||||
|
set_target_properties(conformance_test_spir PROPERTIES
|
||||||
|
COMPILE_FLAGS "/GR /EHs /EHc")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
TARGET_LINK_LIBRARIES(conformance_test_spir
|
||||||
|
${CLConform_LIBRARIES})
|
||||||
|
|
||||||
|
# Need to copy the spir zips to sit beside the executable
|
||||||
|
add_custom_command(TARGET conformance_test_spir POST_BUILD
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/khr.csv" "$<TARGET_FILE_DIR:conformance_test_spir>/khr.csv"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/api.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/api.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/atomics.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/atomics.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/basic.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/basic.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/compile_and_link.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/compile_and_link.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/commonfns.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/commonfns.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/conversions.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/conversions.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/geometrics.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/geometrics.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/enum_values.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/enum_values.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/half.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/half.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/kernel_attributes.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/kernel_attributes.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/kernel_image_methods.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/kernel_image_methods.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/images_kernel_read_write.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/images_kernel_read_write.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/images_samplerlessRead.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/images_samplerlessRead.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/integer_ops.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/integer_ops.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/math_brute_force.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/math_brute_force.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/printf.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/printf.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/profiling.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/profiling.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/relationals.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/relationals.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/select.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/select.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/sampler_enumeration.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/sampler_enumeration.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/vec_align.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/vec_align.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/vec_step.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/vec_step.zip"
|
||||||
|
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/binary_type.zip" "$<TARGET_FILE_DIR:conformance_test_spir>/binary_type.zip")
|
||||||
|
|
||||||
|
add_dependencies(OpenCLCTS conformance_test_spir)
|
||||||
|
install(TARGETS conformance_test_spir
|
||||||
|
DESTINATION "${CLConf_OUT_DIR}"
|
||||||
|
COMPONENT OpenCLCTS)
|
||||||
|
|
||||||
|
install_spir_artifacts(api)
|
||||||
|
install_spir_artifacts(atomics)
|
||||||
|
install_spir_artifacts(basic)
|
||||||
|
install_spir_artifacts(compile_and_link)
|
||||||
|
install_spir_artifacts(commonfns)
|
||||||
|
install_spir_artifacts(conversions)
|
||||||
|
install_spir_artifacts(geometrics)
|
||||||
|
install_spir_artifacts(enum_values)
|
||||||
|
install_spir_artifacts(half)
|
||||||
|
install_spir_artifacts(kernel_attributes)
|
||||||
|
install_spir_artifacts(kernel_image_methods)
|
||||||
|
install_spir_artifacts(images_kernel_read_write)
|
||||||
|
install_spir_artifacts(images_samplerlessRead)
|
||||||
|
install_spir_artifacts(integer_ops)
|
||||||
|
install_spir_artifacts(math_brute_force)
|
||||||
|
install_spir_artifacts(printf)
|
||||||
|
install_spir_artifacts(profiling)
|
||||||
|
install_spir_artifacts(relationals)
|
||||||
|
install_spir_artifacts(select)
|
||||||
|
install_spir_artifacts(sampler_enumeration)
|
||||||
|
install_spir_artifacts(vec_align)
|
||||||
|
install_spir_artifacts(vec_step)
|
||||||
|
install_spir_artifacts(binary_type)
|
||||||
|
install(FILES "khr.csv" DESTINATION "${CLConf_OUT_DIR}" COMPONENT OpenCLCTS)
|
||||||
|
#Add any other runtime directories you need here.
|
||||||
|
# end of file #
|
||||||
45
test_conformance/spir/Makefile
Normal file
45
test_conformance/spir/Makefile
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
ifdef BUILD_WITH_ATF
|
||||||
|
ATF = -framework ATF
|
||||||
|
USE_ATF = -DUSE_ATF
|
||||||
|
endif
|
||||||
|
|
||||||
|
SRCS = main.cpp datagen.cpp kernelargs.cpp run_build_test.cpp run_services.cpp \
|
||||||
|
../../test_common/miniz/miniz.c \
|
||||||
|
../../test_common/harness/testHarness.c \
|
||||||
|
../../test_common/harness/errorHelpers.c \
|
||||||
|
../../test_common/harness/typeWrappers.cpp \
|
||||||
|
../../test_common/harness/mt19937.c \
|
||||||
|
../../test_common/harness/os_helpers.c \
|
||||||
|
../../test_common/harness/kernelHelpers.c
|
||||||
|
|
||||||
|
SOURCES = $(abspath $(SRCS))
|
||||||
|
|
||||||
|
|
||||||
|
LIBPATH += -L/System/Library/Frameworks/OpenCL.framework/Libraries
|
||||||
|
LIBPATH += -L.
|
||||||
|
|
||||||
|
FRAMEWORK = ${SOURCES}
|
||||||
|
HEADERS =
|
||||||
|
TARGET = test_spir
|
||||||
|
INCLUDE =
|
||||||
|
COMPILERFLAGS = -c -Wall -g -Wshorten-64-to-32
|
||||||
|
#COMPILERFLAGS = -c -Wall -g -DUSE_LOCAL_THREADS
|
||||||
|
CC = c++
|
||||||
|
CFLAGS = $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||||
|
CXXFLAGS= $(COMPILERFLAGS) $(RC_CFLAGS) ${USE_ATF}
|
||||||
|
LIBRARIES = -framework OpenCL -framework OpenGL -framework GLUT -framework AppKit ${ATF}
|
||||||
|
|
||||||
|
OBJECTS := ${SOURCES:.c=.o}
|
||||||
|
OBJECTS := ${OBJECTS:.cpp=.o}
|
||||||
|
|
||||||
|
TARGETOBJECT =
|
||||||
|
all: $(TARGET)
|
||||||
|
|
||||||
|
$(TARGET): $(OBJECTS)
|
||||||
|
$(CC) $(RC_CFLAGS) $(OBJECTS) -o $@ $(LIBPATH) $(LIBRARIES)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f $(TARGET) $(OBJECTS)
|
||||||
|
|
||||||
|
.DEFAULT:
|
||||||
|
@echo The target \"$@\" does not exist in Makefile.
|
||||||
BIN
test_conformance/spir/api.zip
Normal file
BIN
test_conformance/spir/api.zip
Normal file
Binary file not shown.
BIN
test_conformance/spir/atomics.zip
Normal file
BIN
test_conformance/spir/atomics.zip
Normal file
Binary file not shown.
BIN
test_conformance/spir/basic.zip
Normal file
BIN
test_conformance/spir/basic.zip
Normal file
Binary file not shown.
BIN
test_conformance/spir/binary_type.zip
Normal file
BIN
test_conformance/spir/binary_type.zip
Normal file
Binary file not shown.
BIN
test_conformance/spir/commonfns.zip
Normal file
BIN
test_conformance/spir/commonfns.zip
Normal file
Binary file not shown.
BIN
test_conformance/spir/compile_and_link.zip
Normal file
BIN
test_conformance/spir/compile_and_link.zip
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user