mirror of
https://github.com/KhronosGroup/OpenCL-CTS.git
synced 2026-03-22 07:09:03 +00:00
Synchronise with Khronos-private Gitlab branch
The maintenance of the conformance tests is moving to Github. This commit contains all the changes that have been done in Gitlab since the first public release of the conformance tests. Signed-off-by: Kevin Petit <kevin.petit@arm.com>
This commit is contained in:
@@ -1,178 +1,178 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "FunctionList.h"
|
||||
#include "reference_math.h"
|
||||
|
||||
#define FTZ_ON 1
|
||||
#define FTZ_OFF 0
|
||||
#define EXACT 0.0f
|
||||
|
||||
#define STRINGIFY( _s) #_s
|
||||
|
||||
#define ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type ) { STRINGIFY(_name), STRINGIFY(_name), {reference_##_name}, {reference_##_name##l}, _ulp, _ulp, _embedded_ulp, _rmode, _type }
|
||||
#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type ) { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {reference_##_name}, {NULL}, _ulp, _ulp, _embedded_ulp, _rmode, _type }
|
||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type) { STRINGIFY(_name), _operator, {reference_##_name}, {reference_##_name##l}, _ulp, _ulp, _embedded_ulp, _rmode, _type }
|
||||
|
||||
#if defined( __cplusplus )
|
||||
extern "C" {
|
||||
#endif
|
||||
extern const vtbl _unary; // float foo( float )
|
||||
extern const vtbl _unary_u; // float foo( uint ), double foo( ulong )
|
||||
extern const vtbl _i_unary; // int foo( float )
|
||||
extern const vtbl _macro_unary; // int foo( float ), returns {0,1} for scalar, { 0, -1 } for vector
|
||||
extern const vtbl _binary; // float foo( float, float )
|
||||
extern const vtbl _binary_nextafter; // float foo( float, float ), special handling for nextafter
|
||||
extern const vtbl _binary_operator; // float .op. float
|
||||
extern const vtbl _macro_binary; // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
|
||||
extern const vtbl _binary_i; // float foo( float, int )
|
||||
extern const vtbl _ternary; // float foo( float, float, float )
|
||||
extern const vtbl _unary_two_results; // float foo( float, float * )
|
||||
extern const vtbl _unary_two_results_i; // float foo( float, int * )
|
||||
extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
|
||||
extern const vtbl _mad_tbl; // float mad( float, float, float )
|
||||
#if defined( __cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define unaryF &_unary
|
||||
#define i_unaryF &_i_unary
|
||||
#define unaryF_u &_unary_u
|
||||
#define macro_unaryF &_macro_unary
|
||||
#define binaryF &_binary
|
||||
#define binaryF_nextafter &_binary_nextafter
|
||||
#define binaryOperatorF &_binary_operator
|
||||
#define binaryF_i &_binary_i
|
||||
#define macro_binaryF &_macro_binary
|
||||
#define ternaryF &_ternary
|
||||
#define unaryF_two_results &_unary_two_results
|
||||
#define unaryF_two_results_i &_unary_two_results_i
|
||||
#define binaryF_two_results_i &_binary_two_results_i
|
||||
#define mad_function &_mad_tbl
|
||||
|
||||
|
||||
const Func functionList[] = {
|
||||
ENTRY( acos, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( asin, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atan, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( cos, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( cospi, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
// ENTRY( erfc, 16.0f, 16.0f, FTZ_OFF, unaryF), //disabled for 1.0 due to lack of reference implementation
|
||||
// ENTRY( erf, 16.0f, 16.0f, FTZ_OFF, unaryF), //disabled for 1.0 due to lack of reference implementation
|
||||
ENTRY( exp, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( exp2, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( exp10, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
|
||||
ENTRY( fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( fmod, 0.0f, 0.0f, FTZ_OFF, binaryF ),
|
||||
ENTRY( fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY( frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY( hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
|
||||
ENTRY( isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY( lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
|
||||
ENTRY( lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY( log, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( log2, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( log10, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( mad, INFINITY, INFINITY, FTZ_OFF, mad_function),
|
||||
ENTRY( maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF ),
|
||||
ENTRY( minmag, 0.0f, 0.0f, FTZ_OFF, binaryF ),
|
||||
ENTRY( modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results ),
|
||||
ENTRY( nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
|
||||
ENTRY( nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
|
||||
ENTRY( pow, 16.0f, 16.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY( powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
|
||||
// ENTRY( reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
||||
ENTRY( rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY( round, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( sin, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( sincos, 4.0f, 4.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY( sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( sinpi, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
{ "sqrt", "sqrt", {reference_sqrt}, {reference_sqrtl}, 3.0f, 0.0f, 4.0f, FTZ_OFF, unaryF },
|
||||
{ "sqrt_cr", "sqrt", {reference_sqrt}, {reference_sqrtl}, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF },
|
||||
ENTRY( tan, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
|
||||
// ENTRY( tgamma, 16.0f, 16.0f, FTZ_OFF, unaryF), // Commented this out until we can be sure this requirement is realistic
|
||||
ENTRY( trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
|
||||
HALF_ENTRY( cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
||||
HALF_ENTRY( exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
||||
HALF_ENTRY( recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
|
||||
// basic operations
|
||||
OPERATOR_ENTRY( add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY( subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
{ "divide", "/", {reference_divide}, {reference_dividel}, 2.5f, 0.0f, 3.0f, FTZ_OFF, binaryOperatorF },
|
||||
{ "divide_cr", "/", {reference_divide}, {reference_dividel}, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF },
|
||||
OPERATOR_ENTRY( multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY( assignment, "", 0.0f, 0.0f, FTZ_OFF, unaryF), // A simple copy operation
|
||||
OPERATOR_ENTRY( not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
};
|
||||
|
||||
const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "FunctionList.h"
|
||||
#include "reference_math.h"
|
||||
|
||||
#define FTZ_ON 1
|
||||
#define FTZ_OFF 0
|
||||
#define EXACT 0.0f
|
||||
|
||||
#define STRINGIFY( _s) #_s
|
||||
|
||||
#define ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type ) { STRINGIFY(_name), STRINGIFY(_name), {reference_##_name}, {reference_##_name##l}, _ulp, _ulp, _embedded_ulp, _rmode, _type }
|
||||
#define HALF_ENTRY( _name, _ulp, _embedded_ulp, _rmode, _type ) { "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), {reference_##_name}, {NULL}, _ulp, _ulp, _embedded_ulp, _rmode, _type }
|
||||
#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type) { STRINGIFY(_name), _operator, {reference_##_name}, {reference_##_name##l}, _ulp, _ulp, _embedded_ulp, _rmode, _type }
|
||||
|
||||
#if defined( __cplusplus )
|
||||
extern "C" {
|
||||
#endif
|
||||
extern const vtbl _unary; // float foo( float )
|
||||
extern const vtbl _unary_u; // float foo( uint ), double foo( ulong )
|
||||
extern const vtbl _i_unary; // int foo( float )
|
||||
extern const vtbl _macro_unary; // int foo( float ), returns {0,1} for scalar, { 0, -1 } for vector
|
||||
extern const vtbl _binary; // float foo( float, float )
|
||||
extern const vtbl _binary_nextafter; // float foo( float, float ), special handling for nextafter
|
||||
extern const vtbl _binary_operator; // float .op. float
|
||||
extern const vtbl _macro_binary; // int foo( float, float ), returns {0,1} for scalar, { 0, -1 } for vector
|
||||
extern const vtbl _binary_i; // float foo( float, int )
|
||||
extern const vtbl _ternary; // float foo( float, float, float )
|
||||
extern const vtbl _unary_two_results; // float foo( float, float * )
|
||||
extern const vtbl _unary_two_results_i; // float foo( float, int * )
|
||||
extern const vtbl _binary_two_results_i; // float foo( float, float, int * )
|
||||
extern const vtbl _mad_tbl; // float mad( float, float, float )
|
||||
#if defined( __cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define unaryF &_unary
|
||||
#define i_unaryF &_i_unary
|
||||
#define unaryF_u &_unary_u
|
||||
#define macro_unaryF &_macro_unary
|
||||
#define binaryF &_binary
|
||||
#define binaryF_nextafter &_binary_nextafter
|
||||
#define binaryOperatorF &_binary_operator
|
||||
#define binaryF_i &_binary_i
|
||||
#define macro_binaryF &_macro_binary
|
||||
#define ternaryF &_ternary
|
||||
#define unaryF_two_results &_unary_two_results
|
||||
#define unaryF_two_results_i &_unary_two_results_i
|
||||
#define binaryF_two_results_i &_binary_two_results_i
|
||||
#define mad_function &_mad_tbl
|
||||
|
||||
|
||||
const Func functionList[] = {
|
||||
ENTRY( acos, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( asin, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atan, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( cos, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( cospi, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
// ENTRY( erfc, 16.0f, 16.0f, FTZ_OFF, unaryF), //disabled for 1.0 due to lack of reference implementation
|
||||
// ENTRY( erf, 16.0f, 16.0f, FTZ_OFF, unaryF), //disabled for 1.0 due to lack of reference implementation
|
||||
ENTRY( exp, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( exp2, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( exp10, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
|
||||
ENTRY( fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( fmod, 0.0f, 0.0f, FTZ_OFF, binaryF ),
|
||||
ENTRY( fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY( frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY( hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
|
||||
ENTRY( isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
|
||||
ENTRY( ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY( lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
|
||||
ENTRY( lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
|
||||
ENTRY( log, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( log2, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( log10, 3.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( mad, INFINITY, INFINITY, FTZ_OFF, mad_function),
|
||||
ENTRY( maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF ),
|
||||
ENTRY( minmag, 0.0f, 0.0f, FTZ_OFF, binaryF ),
|
||||
ENTRY( modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results ),
|
||||
ENTRY( nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
|
||||
ENTRY( nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
|
||||
ENTRY( pow, 16.0f, 16.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY( powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
|
||||
// ENTRY( reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
|
||||
ENTRY( remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
|
||||
ENTRY( rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
|
||||
ENTRY( round, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
ENTRY( sin, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( sincos, 4.0f, 4.0f, FTZ_OFF, unaryF_two_results),
|
||||
ENTRY( sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( sinpi, 4.0f, 4.0f, FTZ_OFF, unaryF),
|
||||
{ "sqrt", "sqrt", {reference_sqrt}, {reference_sqrtl}, 3.0f, 0.0f, 4.0f, FTZ_OFF, unaryF },
|
||||
{ "sqrt_cr", "sqrt", {reference_sqrt}, {reference_sqrtl}, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF },
|
||||
ENTRY( tan, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
|
||||
ENTRY( tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
|
||||
// ENTRY( tgamma, 16.0f, 16.0f, FTZ_OFF, unaryF), // Commented this out until we can be sure this requirement is realistic
|
||||
ENTRY( trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
|
||||
|
||||
HALF_ENTRY( cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
||||
HALF_ENTRY( exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
|
||||
HALF_ENTRY( recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
HALF_ENTRY( tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
|
||||
|
||||
// basic operations
|
||||
OPERATOR_ENTRY( add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY( subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
{ "divide", "/", {reference_divide}, {reference_dividel}, 2.5f, 0.0f, 3.0f, FTZ_OFF, binaryOperatorF },
|
||||
{ "divide_cr", "/", {reference_divide}, {reference_dividel}, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF },
|
||||
OPERATOR_ENTRY( multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
|
||||
OPERATOR_ENTRY( assignment, "", 0.0f, 0.0f, FTZ_OFF, unaryF), // A simple copy operation
|
||||
OPERATOR_ENTRY( not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
|
||||
};
|
||||
|
||||
const size_t functionListCount = sizeof( functionList ) / sizeof( functionList[0] );
|
||||
|
||||
|
||||
|
||||
@@ -1,96 +1,96 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef FUNCTIONLIST_H
|
||||
#define FUNCTIONLIST_H
|
||||
|
||||
#include <math.h>
|
||||
#ifndef WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
typedef union fptr
|
||||
{
|
||||
void *p;
|
||||
double (*f_f)(double);
|
||||
double (*f_u)(cl_uint);
|
||||
int (*i_f)(double);
|
||||
int (*i_f_f)(float);
|
||||
float (*f_ff_f)(float, float);
|
||||
double (*f_ff)(double, double);
|
||||
int (*i_ff)(double, double);
|
||||
double (*f_fi)(double, int);
|
||||
double (*f_fpf)(double, double*);
|
||||
double (*f_fpI)(double, int*);
|
||||
double (*f_ffpI)(double, double, int*);
|
||||
double (*f_fff)(double, double, double );
|
||||
float (*f_fma)(float, float, float, int);
|
||||
}fptr;
|
||||
|
||||
typedef union dptr
|
||||
{
|
||||
void *p;
|
||||
long double (*f_f)(long double);
|
||||
long double (*f_u)(cl_ulong);
|
||||
int (*i_f)(long double);
|
||||
long double (*f_ff)(long double, long double);
|
||||
int (*i_ff)(long double, long double);
|
||||
long double (*f_fi)(long double, int);
|
||||
long double (*f_fpf)(long double, long double*);
|
||||
long double (*f_fpI)(long double, int*);
|
||||
long double (*f_ffpI)(long double, long double, int*);
|
||||
long double (*f_fff)(long double, long double, long double);
|
||||
}dptr;
|
||||
|
||||
struct Func;
|
||||
|
||||
typedef struct vtbl
|
||||
{
|
||||
const char *type_name;
|
||||
int (*TestFunc)( const struct Func *, MTdata );
|
||||
int (*DoubleTestFunc)( const struct Func *, MTdata); // may be NULL if function is single precision only
|
||||
}vtbl;
|
||||
|
||||
typedef struct Func
|
||||
{
|
||||
const char *name; // common name, to be used as an argument in the shell
|
||||
const char *nameInCode; // name as it appears in the __kernel, usually the same as name, but different for multiplication
|
||||
fptr func;
|
||||
dptr dfunc;
|
||||
float float_ulps;
|
||||
float double_ulps;
|
||||
float float_embedded_ulps;
|
||||
int ftz;
|
||||
const vtbl *vtbl;
|
||||
}Func;
|
||||
|
||||
|
||||
extern const Func functionList[];
|
||||
|
||||
extern const size_t functionListCount;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef FUNCTIONLIST_H
|
||||
#define FUNCTIONLIST_H
|
||||
|
||||
#include <math.h>
|
||||
#ifndef WIN32
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include "../../test_common/harness/mt19937.h"
|
||||
|
||||
typedef union fptr
|
||||
{
|
||||
void *p;
|
||||
double (*f_f)(double);
|
||||
double (*f_u)(cl_uint);
|
||||
int (*i_f)(double);
|
||||
int (*i_f_f)(float);
|
||||
float (*f_ff_f)(float, float);
|
||||
double (*f_ff)(double, double);
|
||||
int (*i_ff)(double, double);
|
||||
double (*f_fi)(double, int);
|
||||
double (*f_fpf)(double, double*);
|
||||
double (*f_fpI)(double, int*);
|
||||
double (*f_ffpI)(double, double, int*);
|
||||
double (*f_fff)(double, double, double );
|
||||
float (*f_fma)(float, float, float, int);
|
||||
}fptr;
|
||||
|
||||
typedef union dptr
|
||||
{
|
||||
void *p;
|
||||
long double (*f_f)(long double);
|
||||
long double (*f_u)(cl_ulong);
|
||||
int (*i_f)(long double);
|
||||
long double (*f_ff)(long double, long double);
|
||||
int (*i_ff)(long double, long double);
|
||||
long double (*f_fi)(long double, int);
|
||||
long double (*f_fpf)(long double, long double*);
|
||||
long double (*f_fpI)(long double, int*);
|
||||
long double (*f_ffpI)(long double, long double, int*);
|
||||
long double (*f_fff)(long double, long double, long double);
|
||||
}dptr;
|
||||
|
||||
struct Func;
|
||||
|
||||
typedef struct vtbl
|
||||
{
|
||||
const char *type_name;
|
||||
int (*TestFunc)( const struct Func *, MTdata );
|
||||
int (*DoubleTestFunc)( const struct Func *, MTdata); // may be NULL if function is single precision only
|
||||
}vtbl;
|
||||
|
||||
typedef struct Func
|
||||
{
|
||||
const char *name; // common name, to be used as an argument in the shell
|
||||
const char *nameInCode; // name as it appears in the __kernel, usually the same as name, but different for multiplication
|
||||
fptr func;
|
||||
dptr dfunc;
|
||||
float float_ulps;
|
||||
float double_ulps;
|
||||
float float_embedded_ulps;
|
||||
int ftz;
|
||||
const vtbl *vtbl;
|
||||
}Func;
|
||||
|
||||
|
||||
extern const Func functionList[];
|
||||
|
||||
extern const size_t functionListCount;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@@ -1,36 +1,36 @@
|
||||
project
|
||||
: requirements
|
||||
-<library>/harness//harness <use>/harness//harness
|
||||
<library>/Runtime//OpenCL.lib
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe bruteforce
|
||||
: binary.c
|
||||
binary_i.c
|
||||
binaryOperator.c
|
||||
binary_two_results_i.c
|
||||
FunctionList.c
|
||||
i_unary.c
|
||||
macro_binary.c
|
||||
macro_unary.c
|
||||
mad.c
|
||||
main.c
|
||||
reference_math.c
|
||||
Sleep.c
|
||||
ternary.c
|
||||
unary.c
|
||||
unary_two_results.c
|
||||
unary_two_results_i.c
|
||||
unary_u.c
|
||||
Utility.c
|
||||
/harness//mt19937.c
|
||||
: <target-os>windows:<source>/harness//msvc9.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: bruteforce
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/math_brute_force
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/math_brute_force
|
||||
;
|
||||
project
|
||||
: requirements
|
||||
-<library>/harness//harness <use>/harness//harness
|
||||
<library>/Runtime//OpenCL.lib
|
||||
# <toolset>gcc:<cflags>-xc++
|
||||
<toolset>msvc:<cflags>"/TP"
|
||||
;
|
||||
|
||||
exe bruteforce
|
||||
: binary.c
|
||||
binary_i.c
|
||||
binaryOperator.c
|
||||
binary_two_results_i.c
|
||||
FunctionList.c
|
||||
i_unary.c
|
||||
macro_binary.c
|
||||
macro_unary.c
|
||||
mad.c
|
||||
main.c
|
||||
reference_math.c
|
||||
Sleep.c
|
||||
ternary.c
|
||||
unary.c
|
||||
unary_two_results.c
|
||||
unary_two_results_i.c
|
||||
unary_u.c
|
||||
Utility.c
|
||||
/harness//mt19937.c
|
||||
: <target-os>windows:<source>/harness//msvc9.c
|
||||
;
|
||||
|
||||
install dist
|
||||
: bruteforce
|
||||
: <variant>debug:<location>$(DIST)/debug/tests/test_conformance/math_brute_force
|
||||
<variant>release:<location>$(DIST)/release/tests/test_conformance/math_brute_force
|
||||
;
|
||||
|
||||
@@ -1,33 +1,33 @@
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
CC = cc
|
||||
CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
|
||||
LIBRARIES = -framework OpenCL -framework ApplicationServices -framework IOKit -I/System/Library/Frameworks/OpenCL.framework/Headers ${RC_CFLAGS} ${ATF}
|
||||
|
||||
release:
|
||||
echo "Build Release"
|
||||
$(CC) *.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/ThreadPool.c -Os $(CFLAGS) -o bruteforce $(LIBRARIES)
|
||||
|
||||
debug:
|
||||
echo "Build Debug"
|
||||
$(CC) *.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/ThreadPool.c -O0 $(CFLAGS) -D_DEBUG=1 -o bruteforce_debug $(LIBRARIES)
|
||||
|
||||
test: release
|
||||
arch -i386 ./bruteforce -c > cpu.log &
|
||||
arch -i386 ./bruteforce -g > gpu.log &
|
||||
echo "Testing 32-bit mode in progress. This may take up to 1 day to complete. See cpu.log and gpu.log for results."
|
||||
|
||||
test64: release
|
||||
arch -x86_64 ./bruteforce -c > cpu64.log &
|
||||
arch -x86_64 ./bruteforce -g > gpu64.log &
|
||||
echo "Testing 64-bit mode in progress. This may take up to 1 day to complete. See cpu64.log and gpu64.log for results."
|
||||
|
||||
|
||||
clean:
|
||||
rm -f ./bruteforce_debug
|
||||
rm -f ./bruteforce
|
||||
|
||||
all: release
|
||||
ifdef BUILD_WITH_ATF
|
||||
ATF = -framework ATF
|
||||
USE_ATF = -DUSE_ATF
|
||||
endif
|
||||
|
||||
CC = cc
|
||||
CFLAGS = -g -Wall -Wshorten-64-to-32 $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF}
|
||||
LIBRARIES = -framework OpenCL -framework ApplicationServices -framework IOKit -I/System/Library/Frameworks/OpenCL.framework/Headers ${RC_CFLAGS} ${ATF}
|
||||
|
||||
release:
|
||||
echo "Build Release"
|
||||
$(CC) *.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/ThreadPool.c -Os $(CFLAGS) -o bruteforce $(LIBRARIES)
|
||||
|
||||
debug:
|
||||
echo "Build Debug"
|
||||
$(CC) *.c ../../test_common/harness/mt19937.c ../../test_common/harness/rounding_mode.c ../../test_common/harness/ThreadPool.c -O0 $(CFLAGS) -D_DEBUG=1 -o bruteforce_debug $(LIBRARIES)
|
||||
|
||||
test: release
|
||||
arch -i386 ./bruteforce -c > cpu.log &
|
||||
arch -i386 ./bruteforce -g > gpu.log &
|
||||
echo "Testing 32-bit mode in progress. This may take up to 1 day to complete. See cpu.log and gpu.log for results."
|
||||
|
||||
test64: release
|
||||
arch -x86_64 ./bruteforce -c > cpu64.log &
|
||||
arch -x86_64 ./bruteforce -g > gpu64.log &
|
||||
echo "Testing 64-bit mode in progress. This may take up to 1 day to complete. See cpu64.log and gpu64.log for results."
|
||||
|
||||
|
||||
clean:
|
||||
rm -f ./bruteforce_debug
|
||||
rm -f ./bruteforce
|
||||
|
||||
all: release
|
||||
|
||||
@@ -1,150 +1,150 @@
|
||||
Copyright: (c) 2009-2011 by Apple Inc. All Rights Reserved.
|
||||
|
||||
math_brute_force test Feb 24, 2009
|
||||
=====================
|
||||
|
||||
Usage:
|
||||
|
||||
Please run the executable with --help for usage information.
|
||||
|
||||
|
||||
|
||||
System Requirements:
|
||||
|
||||
This test requires support for correctly rounded single and double precision arithmetic.
|
||||
The current version also requires a reasonably accurate operating system math library to
|
||||
be present. The OpenCL implementation must be able to compile kernels online. The test assumes
|
||||
that the host system stores its floating point data according to the IEEE-754 binary single and
|
||||
double precision floating point formats.
|
||||
|
||||
|
||||
Test Completion Time:
|
||||
|
||||
This test takes a while. Modern desktop systems can usually finish it in 1-3
|
||||
days. Engineers doing OpenCL math library software development may find wimpy mode (-w)
|
||||
a useful screen to quickly look for problems in a new implementation, before committing
|
||||
to a lengthy test run. Likewise, it is possible to run just a range of tests, or specific
|
||||
tests. See Usage above.
|
||||
|
||||
|
||||
Test Design:
|
||||
|
||||
This test is designed to do a somewhat exhaustive examination of the single
|
||||
and double precision math library functions in OpenCL, for all vector lengths. Math
|
||||
library functions are compared against results from a higher precision reference
|
||||
function to determine correctness. All possible inputs are examined for unary
|
||||
single precision functions. Other functions are tested against a table of difficult
|
||||
values, followed by a few billion random values. If an error is found in a function,
|
||||
the test for that function terminates early, reports an error, and moves on to the
|
||||
next test, if any.
|
||||
|
||||
The test currently doesn't support half precision math functions covered in section
|
||||
9 of the OpenCL 1.0 specification, but does cover the half_func functions covered in
|
||||
section six. It also doesn't test the native_<funcname> functions, for which any result
|
||||
is conformant.
|
||||
|
||||
For the OpenCL 1.0 time frame, the reference library shall be the operating system
|
||||
math library, as modified by the test itself to conform to the OpenCL specification.
|
||||
That will help ensure that all devices on a particular operating system are returning
|
||||
similar results. Going forward to future OpenCL releases, it is planned to gradually
|
||||
introduce a reference math library directly into the test, so as to reduce inter-
|
||||
platform variance between OpenCL implementations.
|
||||
|
||||
Generally speaking, this test will consider a result correct if it is one of the following:
|
||||
|
||||
1) bitwise identical to the output of the reference function,
|
||||
rounded to the appropriate precision
|
||||
|
||||
2) within the allowed ulp error tolerance of the infinitely precise
|
||||
result (as estimated by the reference function)
|
||||
|
||||
3) If the reference result is a NaN, then any NaN is deemed correct.
|
||||
|
||||
4) if the devices is running in FTZ mode, then the result is also correct
|
||||
if the infinitely precise result (as estimated by the reference
|
||||
function) is subnormal, and the returned result is a zero
|
||||
|
||||
5) if the devices is running in FTZ mode, then we also calculate the
|
||||
estimate of the infinitely precise result with the reference function
|
||||
with subnormal inputs flushed to +- zero. If any of those results
|
||||
are within the error tolerance of the returned result, then it is
|
||||
deemed correct
|
||||
|
||||
6) half_func functions may flush per 4&5 above, even if the device is not
|
||||
in FTZ mode.
|
||||
|
||||
7) Functions are allowed to prematurely overflow to infinity, so long as
|
||||
the estimated infinitely precise result is within the stated ulp
|
||||
error limit of the maximum finite representable value of appropriate
|
||||
sign
|
||||
|
||||
8) Functions are allowed to prematurely underflow (and if in FTZ mode,
|
||||
have behavior covered by 4&5 above), so long as the estimated
|
||||
infinitely precise result is within the stated ulp error limit
|
||||
of the minimum normal representable value of appropriate sign
|
||||
|
||||
9) Some functions have limited range. Results of inputs outside that range
|
||||
are considered correct, so long as a result is returned.
|
||||
|
||||
10) Some functions have infinite error bounds. Results of these function
|
||||
are considered correct, so long as a result is returned.
|
||||
|
||||
11) The test currently does not discriminate based on the sign of zero
|
||||
We anticipate a later test will.
|
||||
|
||||
12) The test currently does not check to make sure that edge cases called
|
||||
out in the standard (e.g. pow(1.0, any) = 1.0) are exactly correct.
|
||||
We anticipate a later test will.
|
||||
|
||||
13) The test doesn't check IEEE flags or exceptions. See section 7.3 of the
|
||||
OpenCL standard.
|
||||
|
||||
|
||||
|
||||
Performance Measurement:
|
||||
|
||||
There is also some optional timing code available, currently turned off by default.
|
||||
These may be useful for tracking internal performance regressions, but is not required to
|
||||
be part of the conformance submission.
|
||||
|
||||
|
||||
If the test is believed to be in error:
|
||||
|
||||
The above correctness heuristics shall not be construed to be an alternative to the correctness
|
||||
criteria established by the OpenCL standard. An implementation shall be judged correct
|
||||
or not on appeal based on whether it is within prescribed error bounds of the infinitely
|
||||
precise result. (The ulp is defined in section 7.4 of the spec.) If the input value corresponds
|
||||
to an edge case listed in OpenCL specification sections covering edge case behavior, or
|
||||
similar sections in the C99 TC2 standard (section F.9 and G.6), the the function shall return
|
||||
exactly that result, and the sign of a zero result shall be correct. In the event that the test
|
||||
is found to be faulty, resulting in a spurious failure result, the committee shall make a reasonable
|
||||
attempt to fix the test. If no practical and timely remedy can be found, then the implementation
|
||||
shall be granted a waiver.
|
||||
|
||||
|
||||
Guidelines for reference function error tolerances:
|
||||
|
||||
Errors are measured in ulps, and stored in a single precision representation. So as
|
||||
to avoid introducing error into the error measurement due to error in the reference function
|
||||
itself, the reference function should attempt to deliver 24 bits more precision than the test
|
||||
function return type. (All functions are currently either required to be correctly rounded or
|
||||
may have >= 1 ulp of error. This places the 1's bit at the LSB of the result, with 23 bits of
|
||||
sub-ulp accuracy. One more bit is required to avoid accrual of extra error due to round-to-
|
||||
nearest behavior. If we start to require sub-ulp precision, then the accuracy requirements
|
||||
for reference functions increase.) Therefore reference functions for single precision should
|
||||
have 24+24=48 bits of accuracy, and reference functions for double precision should ideally
|
||||
have 53+24 = 77 bits of accuracy.
|
||||
|
||||
A double precision system math library function should be sufficient to safely verify a single
|
||||
precision OpenCL math library function. A long double precision math library function may or
|
||||
may not be sufficient to verify a double precision OpenCL math library function, depending on
|
||||
the precision of the long double type. A later version of these tests is expected to replace
|
||||
long double with a head+tail double double representation that can represent sufficient precision,
|
||||
on all platforms that support double.
|
||||
|
||||
|
||||
Revision history:
|
||||
|
||||
Feb 24, 2009 IRO Created README
|
||||
Added some reference functions so the test will run on Windows.
|
||||
|
||||
Copyright: (c) 2009-2011 by Apple Inc. All Rights Reserved.
|
||||
|
||||
math_brute_force test Feb 24, 2009
|
||||
=====================
|
||||
|
||||
Usage:
|
||||
|
||||
Please run the executable with --help for usage information.
|
||||
|
||||
|
||||
|
||||
System Requirements:
|
||||
|
||||
This test requires support for correctly rounded single and double precision arithmetic.
|
||||
The current version also requires a reasonably accurate operating system math library to
|
||||
be present. The OpenCL implementation must be able to compile kernels online. The test assumes
|
||||
that the host system stores its floating point data according to the IEEE-754 binary single and
|
||||
double precision floating point formats.
|
||||
|
||||
|
||||
Test Completion Time:
|
||||
|
||||
This test takes a while. Modern desktop systems can usually finish it in 1-3
|
||||
days. Engineers doing OpenCL math library software development may find wimpy mode (-w)
|
||||
a useful screen to quickly look for problems in a new implementation, before committing
|
||||
to a lengthy test run. Likewise, it is possible to run just a range of tests, or specific
|
||||
tests. See Usage above.
|
||||
|
||||
|
||||
Test Design:
|
||||
|
||||
This test is designed to do a somewhat exhaustive examination of the single
|
||||
and double precision math library functions in OpenCL, for all vector lengths. Math
|
||||
library functions are compared against results from a higher precision reference
|
||||
function to determine correctness. All possible inputs are examined for unary
|
||||
single precision functions. Other functions are tested against a table of difficult
|
||||
values, followed by a few billion random values. If an error is found in a function,
|
||||
the test for that function terminates early, reports an error, and moves on to the
|
||||
next test, if any.
|
||||
|
||||
The test currently doesn't support half precision math functions covered in section
|
||||
9 of the OpenCL 1.0 specification, but does cover the half_func functions covered in
|
||||
section six. It also doesn't test the native_<funcname> functions, for which any result
|
||||
is conformant.
|
||||
|
||||
For the OpenCL 1.0 time frame, the reference library shall be the operating system
|
||||
math library, as modified by the test itself to conform to the OpenCL specification.
|
||||
That will help ensure that all devices on a particular operating system are returning
|
||||
similar results. Going forward to future OpenCL releases, it is planned to gradually
|
||||
introduce a reference math library directly into the test, so as to reduce inter-
|
||||
platform variance between OpenCL implementations.
|
||||
|
||||
Generally speaking, this test will consider a result correct if it is one of the following:
|
||||
|
||||
1) bitwise identical to the output of the reference function,
|
||||
rounded to the appropriate precision
|
||||
|
||||
2) within the allowed ulp error tolerance of the infinitely precise
|
||||
result (as estimated by the reference function)
|
||||
|
||||
3) If the reference result is a NaN, then any NaN is deemed correct.
|
||||
|
||||
4) if the devices is running in FTZ mode, then the result is also correct
|
||||
if the infinitely precise result (as estimated by the reference
|
||||
function) is subnormal, and the returned result is a zero
|
||||
|
||||
5) if the devices is running in FTZ mode, then we also calculate the
|
||||
estimate of the infinitely precise result with the reference function
|
||||
with subnormal inputs flushed to +- zero. If any of those results
|
||||
are within the error tolerance of the returned result, then it is
|
||||
deemed correct
|
||||
|
||||
6) half_func functions may flush per 4&5 above, even if the device is not
|
||||
in FTZ mode.
|
||||
|
||||
7) Functions are allowed to prematurely overflow to infinity, so long as
|
||||
the estimated infinitely precise result is within the stated ulp
|
||||
error limit of the maximum finite representable value of appropriate
|
||||
sign
|
||||
|
||||
8) Functions are allowed to prematurely underflow (and if in FTZ mode,
|
||||
have behavior covered by 4&5 above), so long as the estimated
|
||||
infinitely precise result is within the stated ulp error limit
|
||||
of the minimum normal representable value of appropriate sign
|
||||
|
||||
9) Some functions have limited range. Results of inputs outside that range
|
||||
are considered correct, so long as a result is returned.
|
||||
|
||||
10) Some functions have infinite error bounds. Results of these function
|
||||
are considered correct, so long as a result is returned.
|
||||
|
||||
11) The test currently does not discriminate based on the sign of zero
|
||||
We anticipate a later test will.
|
||||
|
||||
12) The test currently does not check to make sure that edge cases called
|
||||
out in the standard (e.g. pow(1.0, any) = 1.0) are exactly correct.
|
||||
We anticipate a later test will.
|
||||
|
||||
13) The test doesn't check IEEE flags or exceptions. See section 7.3 of the
|
||||
OpenCL standard.
|
||||
|
||||
|
||||
|
||||
Performance Measurement:
|
||||
|
||||
There is also some optional timing code available, currently turned off by default.
|
||||
These may be useful for tracking internal performance regressions, but is not required to
|
||||
be part of the conformance submission.
|
||||
|
||||
|
||||
If the test is believed to be in error:
|
||||
|
||||
The above correctness heuristics shall not be construed to be an alternative to the correctness
|
||||
criteria established by the OpenCL standard. An implementation shall be judged correct
|
||||
or not on appeal based on whether it is within prescribed error bounds of the infinitely
|
||||
precise result. (The ulp is defined in section 7.4 of the spec.) If the input value corresponds
|
||||
to an edge case listed in OpenCL specification sections covering edge case behavior, or
|
||||
similar sections in the C99 TC2 standard (section F.9 and G.6), the the function shall return
|
||||
exactly that result, and the sign of a zero result shall be correct. In the event that the test
|
||||
is found to be faulty, resulting in a spurious failure result, the committee shall make a reasonable
|
||||
attempt to fix the test. If no practical and timely remedy can be found, then the implementation
|
||||
shall be granted a waiver.
|
||||
|
||||
|
||||
Guidelines for reference function error tolerances:
|
||||
|
||||
Errors are measured in ulps, and stored in a single precision representation. So as
|
||||
to avoid introducing error into the error measurement due to error in the reference function
|
||||
itself, the reference function should attempt to deliver 24 bits more precision than the test
|
||||
function return type. (All functions are currently either required to be correctly rounded or
|
||||
may have >= 1 ulp of error. This places the 1's bit at the LSB of the result, with 23 bits of
|
||||
sub-ulp accuracy. One more bit is required to avoid accrual of extra error due to round-to-
|
||||
nearest behavior. If we start to require sub-ulp precision, then the accuracy requirements
|
||||
for reference functions increase.) Therefore reference functions for single precision should
|
||||
have 24+24=48 bits of accuracy, and reference functions for double precision should ideally
|
||||
have 53+24 = 77 bits of accuracy.
|
||||
|
||||
A double precision system math library function should be sufficient to safely verify a single
|
||||
precision OpenCL math library function. A long double precision math library function may or
|
||||
may not be sufficient to verify a double precision OpenCL math library function, depending on
|
||||
the precision of the long double type. A later version of these tests is expected to replace
|
||||
long double with a head+tail double double representation that can represent sufficient precision,
|
||||
on all platforms that support double.
|
||||
|
||||
|
||||
Revision history:
|
||||
|
||||
Feb 24, 2009 IRO Created README
|
||||
Added some reference functions so the test will run on Windows.
|
||||
|
||||
|
||||
@@ -1,118 +1,118 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "Sleep.h"
|
||||
#include "Utility.h"
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <IOKit/pwr_mgt/IOPMLib.h>
|
||||
#include <IOKit/IOMessage.h>
|
||||
|
||||
struct
|
||||
{
|
||||
io_connect_t connection;
|
||||
IONotificationPortRef port;
|
||||
io_object_t iterator;
|
||||
}sleepInfo;
|
||||
|
||||
void sleepCallback( void * refcon,
|
||||
io_service_t service,
|
||||
natural_t messageType,
|
||||
void * messageArgument );
|
||||
|
||||
void sleepCallback( void * refcon UNUSED,
|
||||
io_service_t service UNUSED,
|
||||
natural_t messageType,
|
||||
void * messageArgument )
|
||||
{
|
||||
|
||||
IOReturn result;
|
||||
/*
|
||||
service -- The IOService whose state has changed.
|
||||
messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
|
||||
messageArgument -- An argument for the message, dependent on the messageType.
|
||||
*/
|
||||
switch ( messageType )
|
||||
{
|
||||
case kIOMessageSystemWillSleep:
|
||||
// Handle demand sleep (such as sleep caused by running out of
|
||||
// batteries, closing the lid of a laptop, or selecting
|
||||
// sleep from the Apple menu.
|
||||
IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
|
||||
vlog( "Hard sleep occurred.\n" );
|
||||
break;
|
||||
case kIOMessageCanSystemSleep:
|
||||
// In this case, the computer has been idle for several minutes
|
||||
// and will sleep soon so you must either allow or cancel
|
||||
// this notification. Important: if you don’t respond, there will
|
||||
// be a 30-second timeout before the computer sleeps.
|
||||
// IOCancelPowerChange(root_port,(long)messageArgument);
|
||||
result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
|
||||
if( kIOReturnSuccess != result )
|
||||
vlog( "sleep prevention failed. (%d)\n", result);
|
||||
break;
|
||||
case kIOMessageSystemHasPoweredOn:
|
||||
// Handle wakeup.
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void PreventSleep( void )
|
||||
{
|
||||
#if defined( __APPLE__ )
|
||||
vlog( "Disabling sleep... " );
|
||||
sleepInfo.iterator = (io_object_t) 0;
|
||||
sleepInfo.port = NULL;
|
||||
sleepInfo.connection = IORegisterForSystemPower
|
||||
(
|
||||
&sleepInfo, //void * refcon,
|
||||
&sleepInfo.port, //IONotificationPortRef * thePortRef,
|
||||
sleepCallback, //IOServiceInterestCallback callback,
|
||||
&sleepInfo.iterator //io_object_t * notifier
|
||||
);
|
||||
|
||||
if( (io_connect_t) 0 == sleepInfo.connection )
|
||||
vlog( "failed.\n" );
|
||||
else
|
||||
vlog( "done.\n" );
|
||||
|
||||
CFRunLoopAddSource(CFRunLoopGetCurrent(),
|
||||
IONotificationPortGetRunLoopSource(sleepInfo.port),
|
||||
kCFRunLoopDefaultMode);
|
||||
#else
|
||||
vlog( "*** PreventSleep() is not implemented on this platform.\n" );
|
||||
#endif
|
||||
}
|
||||
|
||||
void ResumeSleep( void )
|
||||
{
|
||||
#if defined( __APPLE__ )
|
||||
IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
|
||||
if( 0 != result )
|
||||
vlog( "Got error %d restoring sleep \n", result );
|
||||
else
|
||||
vlog( "Sleep restored.\n" );
|
||||
#else
|
||||
vlog( "*** ResumeSleep() is not implemented on this platform.\n" );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "Sleep.h"
|
||||
#include "Utility.h"
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <IOKit/pwr_mgt/IOPMLib.h>
|
||||
#include <IOKit/IOMessage.h>
|
||||
|
||||
struct
|
||||
{
|
||||
io_connect_t connection;
|
||||
IONotificationPortRef port;
|
||||
io_object_t iterator;
|
||||
}sleepInfo;
|
||||
|
||||
void sleepCallback( void * refcon,
|
||||
io_service_t service,
|
||||
natural_t messageType,
|
||||
void * messageArgument );
|
||||
|
||||
void sleepCallback( void * refcon UNUSED,
|
||||
io_service_t service UNUSED,
|
||||
natural_t messageType,
|
||||
void * messageArgument )
|
||||
{
|
||||
|
||||
IOReturn result;
|
||||
/*
|
||||
service -- The IOService whose state has changed.
|
||||
messageType -- A messageType enum, defined by IOKit/IOMessage.h or by the IOService's family.
|
||||
messageArgument -- An argument for the message, dependent on the messageType.
|
||||
*/
|
||||
switch ( messageType )
|
||||
{
|
||||
case kIOMessageSystemWillSleep:
|
||||
// Handle demand sleep (such as sleep caused by running out of
|
||||
// batteries, closing the lid of a laptop, or selecting
|
||||
// sleep from the Apple menu.
|
||||
IOAllowPowerChange(sleepInfo.connection,(long)messageArgument);
|
||||
vlog( "Hard sleep occurred.\n" );
|
||||
break;
|
||||
case kIOMessageCanSystemSleep:
|
||||
// In this case, the computer has been idle for several minutes
|
||||
// and will sleep soon so you must either allow or cancel
|
||||
// this notification. Important: if you don’t respond, there will
|
||||
// be a 30-second timeout before the computer sleeps.
|
||||
// IOCancelPowerChange(root_port,(long)messageArgument);
|
||||
result = IOCancelPowerChange(sleepInfo.connection,(long)messageArgument);
|
||||
if( kIOReturnSuccess != result )
|
||||
vlog( "sleep prevention failed. (%d)\n", result);
|
||||
break;
|
||||
case kIOMessageSystemHasPoweredOn:
|
||||
// Handle wakeup.
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void PreventSleep( void )
|
||||
{
|
||||
#if defined( __APPLE__ )
|
||||
vlog( "Disabling sleep... " );
|
||||
sleepInfo.iterator = (io_object_t) 0;
|
||||
sleepInfo.port = NULL;
|
||||
sleepInfo.connection = IORegisterForSystemPower
|
||||
(
|
||||
&sleepInfo, //void * refcon,
|
||||
&sleepInfo.port, //IONotificationPortRef * thePortRef,
|
||||
sleepCallback, //IOServiceInterestCallback callback,
|
||||
&sleepInfo.iterator //io_object_t * notifier
|
||||
);
|
||||
|
||||
if( (io_connect_t) 0 == sleepInfo.connection )
|
||||
vlog( "failed.\n" );
|
||||
else
|
||||
vlog( "done.\n" );
|
||||
|
||||
CFRunLoopAddSource(CFRunLoopGetCurrent(),
|
||||
IONotificationPortGetRunLoopSource(sleepInfo.port),
|
||||
kCFRunLoopDefaultMode);
|
||||
#else
|
||||
vlog( "*** PreventSleep() is not implemented on this platform.\n" );
|
||||
#endif
|
||||
}
|
||||
|
||||
void ResumeSleep( void )
|
||||
{
|
||||
#if defined( __APPLE__ )
|
||||
IOReturn result = IODeregisterForSystemPower ( &sleepInfo.iterator );
|
||||
if( 0 != result )
|
||||
vlog( "Got error %d restoring sleep \n", result );
|
||||
else
|
||||
vlog( "Sleep restored.\n" );
|
||||
#else
|
||||
vlog( "*** ResumeSleep() is not implemented on this platform.\n" );
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,24 +1,24 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef SLEEP_H
|
||||
#define SLEEP_H
|
||||
|
||||
void PreventSleep( void );
|
||||
void ResumeSleep( void );
|
||||
|
||||
#endif /* SLEEP_H */
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef SLEEP_H
|
||||
#define SLEEP_H
|
||||
|
||||
void PreventSleep( void );
|
||||
void ResumeSleep( void );
|
||||
|
||||
#endif /* SLEEP_H */
|
||||
|
||||
|
||||
|
||||
@@ -1,151 +1,151 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "Utility.h"
|
||||
|
||||
#if defined(__PPC__)
|
||||
// Global varaiable used to hold the FPU control register state. The FPSCR register can not
|
||||
// be used because not all Power implementations retain or observed the NI (non-IEEE
|
||||
// mode) bit.
|
||||
__thread fpu_control_t fpu_control = 0;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void MulD(double *rhi, double *rlo, double u, double v)
|
||||
{
|
||||
const double c = 134217729.0; // 1+2^27
|
||||
double up, u1, u2, vp, v1, v2;
|
||||
|
||||
up = u*c;
|
||||
u1 = (u - up) + up;
|
||||
u2 = u - u1;
|
||||
|
||||
vp = v*c;
|
||||
v1 = (v - vp) + vp;
|
||||
v2 = v - v1;
|
||||
|
||||
double rh = u*v;
|
||||
double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);
|
||||
|
||||
*rhi = rh;
|
||||
*rlo = rl;
|
||||
}
|
||||
|
||||
void AddD(double *rhi, double *rlo, double a, double b)
|
||||
{
|
||||
double zhi, zlo;
|
||||
zhi = a + b;
|
||||
if(fabs(a) > fabs(b)) {
|
||||
zlo = zhi - a;
|
||||
zlo = b - zlo;
|
||||
}
|
||||
else {
|
||||
zlo = zhi - b;
|
||||
zlo = a - zlo;
|
||||
}
|
||||
|
||||
*rhi = zhi;
|
||||
*rlo = zlo;
|
||||
}
|
||||
|
||||
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
|
||||
{
|
||||
double mh, ml;
|
||||
double c = 134217729.0;
|
||||
double up, u1, u2, vp, v1, v2;
|
||||
|
||||
up = xh*c;
|
||||
u1 = (xh - up) + up;
|
||||
u2 = xh - u1;
|
||||
|
||||
vp = yh*c;
|
||||
v1 = (yh - vp) + vp;
|
||||
v2 = yh - v1;
|
||||
|
||||
mh = xh*yh;
|
||||
ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
|
||||
ml += xh*yl + xl*yh;
|
||||
|
||||
*rhi = mh + ml;
|
||||
*rlo = (mh - (*rhi)) + ml;
|
||||
}
|
||||
|
||||
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
|
||||
{
|
||||
double r, s;
|
||||
r = xh + yh;
|
||||
s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
|
||||
*rhi = r + s;
|
||||
*rlo = (r - (*rhi)) + s;
|
||||
}
|
||||
|
||||
void DivideDD(double *chi, double *clo, double a, double b)
|
||||
{
|
||||
*chi = a / b;
|
||||
double rhi, rlo;
|
||||
MulD(&rhi, &rlo, *chi, b);
|
||||
AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
|
||||
*clo = rhi / b;
|
||||
}
|
||||
|
||||
// These functions comapre two floats/doubles. Since some platforms may choose to
|
||||
// flush denormals to zeros before comparison, comparison like a < b may give wrong
|
||||
// result in "certain cases" where we do need correct compasion result when operands
|
||||
// are denormals .... these functions comapre floats/doubles using signed integer/long int
|
||||
// rep. In other cases, when flushing to zeros is fine, these should not be used.
|
||||
// Also these doesn't check for nans and assume nans are handled separately as special edge case
|
||||
// by the caller which calls these functions
|
||||
// return 0 if both are equal, 1 if x > y and -1 if x < y.
|
||||
|
||||
inline
|
||||
int compareFloats(float x, float y)
|
||||
{
|
||||
int32f_t a, b;
|
||||
|
||||
a.f = x;
|
||||
b.f = y;
|
||||
|
||||
if( a.i & 0x80000000 )
|
||||
a.i = 0x80000000 - a.i;
|
||||
if( b.i & 0x80000000 )
|
||||
b.i = 0x80000000 - b.i;
|
||||
|
||||
if( a.i == b.i )
|
||||
return 0;
|
||||
|
||||
return a.i < b.i ? -1 : 1;
|
||||
}
|
||||
|
||||
inline
|
||||
int compareDoubles(double x, double y)
|
||||
{
|
||||
int64d_t a, b;
|
||||
|
||||
a.d = x;
|
||||
b.d = y;
|
||||
|
||||
if( a.l & 0x8000000000000000LL )
|
||||
a.l = 0x8000000000000000LL - a.l;
|
||||
if( b.l & 0x8000000000000000LL )
|
||||
b.l = 0x8000000000000000LL - b.l;
|
||||
|
||||
if( a.l == b.l )
|
||||
return 0;
|
||||
|
||||
return a.l < b.l ? -1 : 1;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#include "Utility.h"
|
||||
|
||||
#if defined(__PPC__)
|
||||
// Global varaiable used to hold the FPU control register state. The FPSCR register can not
|
||||
// be used because not all Power implementations retain or observed the NI (non-IEEE
|
||||
// mode) bit.
|
||||
__thread fpu_control_t fpu_control = 0;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
void MulD(double *rhi, double *rlo, double u, double v)
|
||||
{
|
||||
const double c = 134217729.0; // 1+2^27
|
||||
double up, u1, u2, vp, v1, v2;
|
||||
|
||||
up = u*c;
|
||||
u1 = (u - up) + up;
|
||||
u2 = u - u1;
|
||||
|
||||
vp = v*c;
|
||||
v1 = (v - vp) + vp;
|
||||
v2 = v - v1;
|
||||
|
||||
double rh = u*v;
|
||||
double rl = (((u1*v1 - rh) + (u1*v2)) + (u2*v1)) + (u2*v2);
|
||||
|
||||
*rhi = rh;
|
||||
*rlo = rl;
|
||||
}
|
||||
|
||||
void AddD(double *rhi, double *rlo, double a, double b)
|
||||
{
|
||||
double zhi, zlo;
|
||||
zhi = a + b;
|
||||
if(fabs(a) > fabs(b)) {
|
||||
zlo = zhi - a;
|
||||
zlo = b - zlo;
|
||||
}
|
||||
else {
|
||||
zlo = zhi - b;
|
||||
zlo = a - zlo;
|
||||
}
|
||||
|
||||
*rhi = zhi;
|
||||
*rlo = zlo;
|
||||
}
|
||||
|
||||
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
|
||||
{
|
||||
double mh, ml;
|
||||
double c = 134217729.0;
|
||||
double up, u1, u2, vp, v1, v2;
|
||||
|
||||
up = xh*c;
|
||||
u1 = (xh - up) + up;
|
||||
u2 = xh - u1;
|
||||
|
||||
vp = yh*c;
|
||||
v1 = (yh - vp) + vp;
|
||||
v2 = yh - v1;
|
||||
|
||||
mh = xh*yh;
|
||||
ml = (((u1*v1 - mh) + (u1*v2)) + (u2*v1)) + (u2*v2);
|
||||
ml += xh*yl + xl*yh;
|
||||
|
||||
*rhi = mh + ml;
|
||||
*rlo = (mh - (*rhi)) + ml;
|
||||
}
|
||||
|
||||
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
|
||||
{
|
||||
double r, s;
|
||||
r = xh + yh;
|
||||
s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl) : (yh - r + xh + xl + yl);
|
||||
*rhi = r + s;
|
||||
*rlo = (r - (*rhi)) + s;
|
||||
}
|
||||
|
||||
void DivideDD(double *chi, double *clo, double a, double b)
|
||||
{
|
||||
*chi = a / b;
|
||||
double rhi, rlo;
|
||||
MulD(&rhi, &rlo, *chi, b);
|
||||
AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
|
||||
*clo = rhi / b;
|
||||
}
|
||||
|
||||
// These functions comapre two floats/doubles. Since some platforms may choose to
|
||||
// flush denormals to zeros before comparison, comparison like a < b may give wrong
|
||||
// result in "certain cases" where we do need correct compasion result when operands
|
||||
// are denormals .... these functions comapre floats/doubles using signed integer/long int
|
||||
// rep. In other cases, when flushing to zeros is fine, these should not be used.
|
||||
// Also these doesn't check for nans and assume nans are handled separately as special edge case
|
||||
// by the caller which calls these functions
|
||||
// return 0 if both are equal, 1 if x > y and -1 if x < y.
|
||||
|
||||
inline
|
||||
int compareFloats(float x, float y)
|
||||
{
|
||||
int32f_t a, b;
|
||||
|
||||
a.f = x;
|
||||
b.f = y;
|
||||
|
||||
if( a.i & 0x80000000 )
|
||||
a.i = 0x80000000 - a.i;
|
||||
if( b.i & 0x80000000 )
|
||||
b.i = 0x80000000 - b.i;
|
||||
|
||||
if( a.i == b.i )
|
||||
return 0;
|
||||
|
||||
return a.i < b.i ? -1 : 1;
|
||||
}
|
||||
|
||||
inline
|
||||
int compareDoubles(double x, double y)
|
||||
{
|
||||
int64d_t a, b;
|
||||
|
||||
a.d = x;
|
||||
b.d = y;
|
||||
|
||||
if( a.l & 0x8000000000000000LL )
|
||||
a.l = 0x8000000000000000LL - a.l;
|
||||
if( b.l & 0x8000000000000000LL )
|
||||
b.l = 0x8000000000000000LL - b.l;
|
||||
|
||||
if( a.l == b.l )
|
||||
return 0;
|
||||
|
||||
return a.l < b.l ? -1 : 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,280 +1,280 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef UTILITY_H
|
||||
#define UTILITY_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#if !defined(_MSC_VER)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
#include "../../test_common/harness/fpcontrol.h"
|
||||
|
||||
#if defined( _WIN32) && defined (_MSC_VER)
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include "../../test_common/harness/ThreadPool.h"
|
||||
#define BUFFER_SIZE (1024*1024*2)
|
||||
|
||||
#if defined( __GNUC__ )
|
||||
#define UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
#define UNUSED
|
||||
#endif
|
||||
|
||||
#define VECTOR_SIZE_COUNT 6
|
||||
extern const char *sizeNames[VECTOR_SIZE_COUNT];
|
||||
extern const int sizeValues[VECTOR_SIZE_COUNT];
|
||||
|
||||
extern cl_device_type gDeviceType;
|
||||
extern cl_device_id gDevice;
|
||||
extern cl_context gContext;
|
||||
extern cl_command_queue gQueue;
|
||||
extern void *gIn;
|
||||
extern void *gIn2;
|
||||
extern void *gIn3;
|
||||
extern void *gOut_Ref;
|
||||
extern void *gOut_Ref2;
|
||||
extern void *gOut[VECTOR_SIZE_COUNT];
|
||||
extern void *gOut2[VECTOR_SIZE_COUNT];
|
||||
extern cl_mem gInBuffer;
|
||||
extern cl_mem gInBuffer2;
|
||||
extern cl_mem gInBuffer3;
|
||||
extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
|
||||
extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
|
||||
extern uint32_t gComputeDevices;
|
||||
extern uint32_t gSimdSize;
|
||||
extern int gSkipCorrectnessTesting;
|
||||
extern int gMeasureTimes;
|
||||
extern int gReportAverageTimes;
|
||||
extern int gForceFTZ;
|
||||
extern int gWimpyMode;
|
||||
extern int gHasDouble;
|
||||
extern int gIsInRTZMode;
|
||||
extern int gInfNanSupport;
|
||||
extern int gIsEmbedded;
|
||||
extern uint32_t gMaxVectorSizeIndex;
|
||||
extern uint32_t gMinVectorSizeIndex;
|
||||
extern uint32_t gDeviceFrequency;
|
||||
extern cl_device_fp_config gFloatCapabilities;
|
||||
extern cl_device_fp_config gDoubleCapabilities;
|
||||
|
||||
#if !defined( _MSC_VER)
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
|
||||
#if USE_ATF
|
||||
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define test_finish() ATFTestFinish()
|
||||
#define vlog( ... ) ATFLogInfo(__VA_ARGS__)
|
||||
#define vlog_error( ... ) ATFLogError(__VA_ARGS__)
|
||||
#define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... ) ATFLogPerformanceNumber(_number, _higherIsBetter, _units, _nameFmt, __VA_ARGS__ )
|
||||
|
||||
#else
|
||||
|
||||
#define test_start()
|
||||
#define test_finish()
|
||||
#define vlog( ... ) printf( __VA_ARGS__ )
|
||||
#define vlog_error( ... ) printf( __VA_ARGS__ )
|
||||
#define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... ) printf( "\t%8.2f", _number )
|
||||
|
||||
void _logPerf(double number, int higherIsBetter, const char *units, const char *nameFormat, ...);
|
||||
#endif
|
||||
|
||||
#if defined (_MSC_VER )
|
||||
//Deal with missing scalbn on windows
|
||||
#define scalbnf( _a, _i ) ldexpf( _a, _i )
|
||||
#define scalbn( _a, _i ) ldexp( _a, _i )
|
||||
#define scalbnl( _a, _i ) ldexpl( _a, _i )
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
float Ulp_Error( float test, double reference );
|
||||
//float Ulp_Error_Half( float test, double reference );
|
||||
float Ulp_Error_Double( double test, long double reference );
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
uint64_t GetTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
|
||||
int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
|
||||
|
||||
// used to convert a bucket of bits into a search pattern through double
|
||||
static inline double DoubleFromUInt32( uint32_t bits );
|
||||
static inline double DoubleFromUInt32( uint32_t bits )
|
||||
{
|
||||
union{ uint64_t u; double d;} u;
|
||||
|
||||
// split 0x89abcdef to 0x89abc00000000def
|
||||
u.u = bits & 0xfffU;
|
||||
u.u |= (uint64_t) (bits & ~0xfffU) << 32;
|
||||
|
||||
// sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
|
||||
u.u -= (bits & 0x800U) << 1;
|
||||
|
||||
// return result
|
||||
return u.d;
|
||||
}
|
||||
|
||||
void _LogBuildError( cl_program p, int line, const char *file );
|
||||
#define LogBuildError( program ) _LogBuildError( program, __LINE__, __FILE__ )
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN(_a, _b) ((_a) < (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
|
||||
#define PERF_LOOP_COUNT 100
|
||||
|
||||
// Note: though this takes a double, this is for use with single precision tests
|
||||
static inline int IsFloatSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf((float)x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int IsDoubleSubnormal( long double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs((double) x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
|
||||
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
|
||||
static inline int IsFloatResultSubnormal( double x, float ulps )
|
||||
{
|
||||
x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
|
||||
return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
|
||||
}
|
||||
|
||||
static inline int IsDoubleResultSubnormal( long double x, float ulps )
|
||||
{
|
||||
x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
|
||||
return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
|
||||
}
|
||||
|
||||
static inline int IsFloatInfinity(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) == 0x7F800000U);
|
||||
}
|
||||
|
||||
static inline int IsFloatMaxFloat(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
|
||||
}
|
||||
|
||||
static inline int IsFloatNaN(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
||||
}
|
||||
|
||||
extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
|
||||
|
||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||
// (that's x87 default state). This causes problems with the tests that
|
||||
// convert long and ulong to float and double or otherwise deal with values
|
||||
// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
|
||||
static inline void Force64BitFPUPrecision(void)
|
||||
{
|
||||
#if __MINGW32__
|
||||
// The usual method is to use _controlfp as follows:
|
||||
// #include <float.h>
|
||||
// _controlfp(_PC_64, _MCW_PC);
|
||||
//
|
||||
// _controlfp is available on MinGW32 but not on MinGW64. Instead of having
|
||||
// divergent code just use inline assembly which works for both.
|
||||
unsigned short int orig_cw = 0;
|
||||
unsigned short int new_cw = 0;
|
||||
__asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
|
||||
new_cw = orig_cw | 0x0300; // set precision to 64-bit
|
||||
__asm__ __volatile__ ("fldcw %0"::"m" (new_cw));
|
||||
#else
|
||||
/* Implement for other platforms if needed */
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#else
|
||||
extern
|
||||
#endif
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
||||
|
||||
typedef union
|
||||
{
|
||||
int32_t i;
|
||||
float f;
|
||||
}int32f_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
int64_t l;
|
||||
double d;
|
||||
}int64d_t;
|
||||
|
||||
void MulD(double *rhi, double *rlo, double u, double v);
|
||||
void AddD(double *rhi, double *rlo, double a, double b);
|
||||
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
||||
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
||||
void DivideDD(double *chi, double *clo, double a, double b);
|
||||
int compareFloats(float x, float y);
|
||||
int compareDoubles(double x, double y);
|
||||
|
||||
#endif /* UTILITY_H */
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef UTILITY_H
|
||||
#define UTILITY_H
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/opencl.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#if !defined(_MSC_VER)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include "../../test_common/harness/rounding_mode.h"
|
||||
#include "../../test_common/harness/fpcontrol.h"
|
||||
|
||||
#if defined( _WIN32) && defined (_MSC_VER)
|
||||
#include "../../test_common/harness/testHarness.h"
|
||||
#endif
|
||||
|
||||
#include "../../test_common/harness/compat.h"
|
||||
#include "../../test_common/harness/ThreadPool.h"
|
||||
#define BUFFER_SIZE (1024*1024*2)
|
||||
|
||||
#if defined( __GNUC__ )
|
||||
#define UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
#define UNUSED
|
||||
#endif
|
||||
|
||||
#define VECTOR_SIZE_COUNT 6
|
||||
extern const char *sizeNames[VECTOR_SIZE_COUNT];
|
||||
extern const int sizeValues[VECTOR_SIZE_COUNT];
|
||||
|
||||
extern cl_device_type gDeviceType;
|
||||
extern cl_device_id gDevice;
|
||||
extern cl_context gContext;
|
||||
extern cl_command_queue gQueue;
|
||||
extern void *gIn;
|
||||
extern void *gIn2;
|
||||
extern void *gIn3;
|
||||
extern void *gOut_Ref;
|
||||
extern void *gOut_Ref2;
|
||||
extern void *gOut[VECTOR_SIZE_COUNT];
|
||||
extern void *gOut2[VECTOR_SIZE_COUNT];
|
||||
extern cl_mem gInBuffer;
|
||||
extern cl_mem gInBuffer2;
|
||||
extern cl_mem gInBuffer3;
|
||||
extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
|
||||
extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
|
||||
extern uint32_t gComputeDevices;
|
||||
extern uint32_t gSimdSize;
|
||||
extern int gSkipCorrectnessTesting;
|
||||
extern int gMeasureTimes;
|
||||
extern int gReportAverageTimes;
|
||||
extern int gForceFTZ;
|
||||
extern int gWimpyMode;
|
||||
extern int gHasDouble;
|
||||
extern int gIsInRTZMode;
|
||||
extern int gInfNanSupport;
|
||||
extern int gIsEmbedded;
|
||||
extern uint32_t gMaxVectorSizeIndex;
|
||||
extern uint32_t gMinVectorSizeIndex;
|
||||
extern uint32_t gDeviceFrequency;
|
||||
extern cl_device_fp_config gFloatCapabilities;
|
||||
extern cl_device_fp_config gDoubleCapabilities;
|
||||
|
||||
#if !defined( _MSC_VER)
|
||||
#include <fenv.h>
|
||||
#endif
|
||||
|
||||
#define LOWER_IS_BETTER 0
|
||||
#define HIGHER_IS_BETTER 1
|
||||
|
||||
#if USE_ATF
|
||||
|
||||
#include <ATF/ATF.h>
|
||||
#define test_start() ATFTestStart()
|
||||
#define test_finish() ATFTestFinish()
|
||||
#define vlog( ... ) ATFLogInfo(__VA_ARGS__)
|
||||
#define vlog_error( ... ) ATFLogError(__VA_ARGS__)
|
||||
#define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... ) ATFLogPerformanceNumber(_number, _higherIsBetter, _units, _nameFmt, __VA_ARGS__ )
|
||||
|
||||
#else
|
||||
|
||||
#define test_start()
|
||||
#define test_finish()
|
||||
#define vlog( ... ) printf( __VA_ARGS__ )
|
||||
#define vlog_error( ... ) printf( __VA_ARGS__ )
|
||||
#define vlog_perf( _number, _higherIsBetter, _units, _nameFmt, ... ) printf( "\t%8.2f", _number )
|
||||
|
||||
void _logPerf(double number, int higherIsBetter, const char *units, const char *nameFormat, ...);
|
||||
#endif
|
||||
|
||||
#if defined (_MSC_VER )
|
||||
//Deal with missing scalbn on windows
|
||||
#define scalbnf( _a, _i ) ldexpf( _a, _i )
|
||||
#define scalbn( _a, _i ) ldexp( _a, _i )
|
||||
#define scalbnl( _a, _i ) ldexpl( _a, _i )
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
float Ulp_Error( float test, double reference );
|
||||
//float Ulp_Error_Half( float test, double reference );
|
||||
float Ulp_Error_Double( double test, long double reference );
|
||||
#ifdef __cplusplus
|
||||
} //extern "C"
|
||||
#endif
|
||||
|
||||
uint64_t GetTime( void );
|
||||
double SubtractTime( uint64_t endTime, uint64_t startTime );
|
||||
int MakeKernel( const char **c, cl_uint count, const char *name, cl_kernel *k, cl_program *p );
|
||||
int MakeKernels( const char **c, cl_uint count, const char *name, cl_uint kernel_count, cl_kernel *k, cl_program *p );
|
||||
|
||||
// used to convert a bucket of bits into a search pattern through double
|
||||
static inline double DoubleFromUInt32( uint32_t bits );
|
||||
static inline double DoubleFromUInt32( uint32_t bits )
|
||||
{
|
||||
union{ uint64_t u; double d;} u;
|
||||
|
||||
// split 0x89abcdef to 0x89abc00000000def
|
||||
u.u = bits & 0xfffU;
|
||||
u.u |= (uint64_t) (bits & ~0xfffU) << 32;
|
||||
|
||||
// sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
|
||||
u.u -= (bits & 0x800U) << 1;
|
||||
|
||||
// return result
|
||||
return u.d;
|
||||
}
|
||||
|
||||
void _LogBuildError( cl_program p, int line, const char *file );
|
||||
#define LogBuildError( program ) _LogBuildError( program, __LINE__, __FILE__ )
|
||||
|
||||
#ifndef MAX
|
||||
#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
#ifndef MIN
|
||||
#define MIN(_a, _b) ((_a) < (_b) ? (_a) : (_b))
|
||||
#endif
|
||||
|
||||
#define PERF_LOOP_COUNT 100
|
||||
|
||||
// Note: though this takes a double, this is for use with single precision tests
|
||||
static inline int IsFloatSubnormal( double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ float d; uint32_t u;}u;
|
||||
u.d = fabsf((float)x);
|
||||
return (u.u-1) < 0x007fffffU;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) FLT_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline int IsDoubleSubnormal( long double x )
|
||||
{
|
||||
#if 2 == FLT_RADIX
|
||||
// Do this in integer to avoid problems with FTZ behavior
|
||||
union{ double d; uint64_t u;}u;
|
||||
u.d = fabs((double) x);
|
||||
return (u.u-1) < 0x000fffffffffffffULL;
|
||||
#else
|
||||
// rely on floating point hardware for non-radix2 non-IEEE-754 hardware -- will fail if you flush subnormals to zero
|
||||
return fabs(x) < (double) DBL_MIN && x != 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
//The spec is fairly clear that we may enforce a hard cutoff to prevent premature flushing to zero.
|
||||
// However, to avoid conflict for 1.0, we are letting results at TYPE_MIN + ulp_limit to be flushed to zero.
|
||||
static inline int IsFloatResultSubnormal( double x, float ulps )
|
||||
{
|
||||
x = fabs(x) - MAKE_HEX_DOUBLE( 0x1.0p-149, 0x1, -149) * (double) ulps;
|
||||
return x < MAKE_HEX_DOUBLE( 0x1.0p-126, 0x1, -126 );
|
||||
}
|
||||
|
||||
static inline int IsDoubleResultSubnormal( long double x, float ulps )
|
||||
{
|
||||
x = fabsl(x) - MAKE_HEX_LONG( 0x1.0p-1074, 0x1, -1074) * (long double) ulps;
|
||||
return x < MAKE_HEX_LONG( 0x1.0p-1022, 0x1, -1022 );
|
||||
}
|
||||
|
||||
static inline int IsFloatInfinity(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) == 0x7F800000U);
|
||||
}
|
||||
|
||||
static inline int IsFloatMaxFloat(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
|
||||
}
|
||||
|
||||
static inline int IsFloatNaN(double x)
|
||||
{
|
||||
union { cl_float d; cl_uint u; } u;
|
||||
u.d = (cl_float) x;
|
||||
return ((u.u & 0x7fffffffU) > 0x7F800000U);
|
||||
}
|
||||
|
||||
extern cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
|
||||
|
||||
// Windows (since long double got deprecated) sets the x87 to 53-bit precision
|
||||
// (that's x87 default state). This causes problems with the tests that
|
||||
// convert long and ulong to float and double or otherwise deal with values
|
||||
// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
|
||||
static inline void Force64BitFPUPrecision(void)
|
||||
{
|
||||
#if __MINGW32__
|
||||
// The usual method is to use _controlfp as follows:
|
||||
// #include <float.h>
|
||||
// _controlfp(_PC_64, _MCW_PC);
|
||||
//
|
||||
// _controlfp is available on MinGW32 but not on MinGW64. Instead of having
|
||||
// divergent code just use inline assembly which works for both.
|
||||
unsigned short int orig_cw = 0;
|
||||
unsigned short int new_cw = 0;
|
||||
__asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
|
||||
new_cw = orig_cw | 0x0300; // set precision to 64-bit
|
||||
__asm__ __volatile__ ("fldcw %0"::"m" (new_cw));
|
||||
#else
|
||||
/* Implement for other platforms if needed */
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
#else
|
||||
extern
|
||||
#endif
|
||||
void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
|
||||
|
||||
typedef union
|
||||
{
|
||||
int32_t i;
|
||||
float f;
|
||||
}int32f_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
int64_t l;
|
||||
double d;
|
||||
}int64d_t;
|
||||
|
||||
void MulD(double *rhi, double *rlo, double u, double v);
|
||||
void AddD(double *rhi, double *rlo, double a, double b);
|
||||
void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
||||
void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl);
|
||||
void DivideDD(double *chi, double *clo, double a, double b);
|
||||
int compareFloats(float x, float y);
|
||||
int compareDoubles(double x, double y);
|
||||
|
||||
#endif /* UTILITY_H */
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -54,7 +54,7 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count,
|
||||
" out[i] = ", name, "( in1[i], in2[i] );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global float* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -96,31 +96,31 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count,
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
}
|
||||
|
||||
static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" out[i] = ", name, "( in1[i], in2[i] );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
@@ -161,34 +161,34 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
}
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
};
|
||||
|
||||
static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
|
||||
@@ -271,7 +271,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
|
||||
test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
|
||||
test_info.skipNanInf = test_info.isFDim && ! gInfNanSupport;
|
||||
test_info.isNextafter = isNextafter;
|
||||
@@ -336,7 +336,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
// Run the kernels
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
@@ -352,7 +352,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -361,7 +361,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
@@ -412,7 +412,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -426,7 +426,7 @@ int TestFunc_Float_Float_Float_common(const Func *f, MTdata d, int isNextafter)
|
||||
vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
@@ -455,7 +455,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -492,30 +492,30 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if (job_id <= (cl_uint)indx)
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesFloat[x];
|
||||
@@ -536,7 +536,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -554,15 +554,15 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -576,7 +576,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -606,37 +606,37 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
memset( &oldMode, 0, sizeof( oldMode ) );
|
||||
if( ftz )
|
||||
ForceFTZ( &oldMode );
|
||||
|
||||
|
||||
// Set the rounding mode to match the device
|
||||
if (gIsInRTZMode)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
}
|
||||
|
||||
|
||||
cl_int copysign_test = 0;
|
||||
if(!strcmp(name, "copysign"))
|
||||
copysign_test = 1;
|
||||
|
||||
#define ref_func(s, s2) (copysign_test ? func.f_ff_f( s, s2 ) : func.f_ff( s, s2 ))
|
||||
|
||||
if(!strcmp(name, "copysign"))
|
||||
copysign_test = 1;
|
||||
|
||||
#define ref_func(s, s2) (copysign_test ? func.f_ff_f( s, s2 ) : func.f_ff( s, s2 ))
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
float *r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||
float *s = (float *)gIn + thread_id * buffer_elements;
|
||||
float *s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
if( skipNanInf )
|
||||
{
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float) ref_func( s[j], s2[j] );
|
||||
overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float) ref_func( s[j], s2[j] );
|
||||
overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (float) ref_func( s[j], s2[j] );
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (float) ref_func( s[j], s2[j] );
|
||||
}
|
||||
|
||||
|
||||
if( isFDim && ftz )
|
||||
RestoreFPState( &oldMode );
|
||||
|
||||
@@ -648,7 +648,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -657,7 +657,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Verify data
|
||||
@@ -667,27 +667,27 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
float test = ((float*) q)[j];
|
||||
double correct = ref_func( s[j], s2[j] );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if ( skipNanInf)
|
||||
if ( skipNanInf)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
IsFloatInfinity(correct) || IsFloatNaN(correct) ||
|
||||
IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) ||
|
||||
IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) )
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
float err = Ulp_Error( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -714,36 +714,36 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
float value = copysignf(twoToMinus126, s2[j]);
|
||||
fail = fail && (test != value);
|
||||
if (!fail)
|
||||
if (!fail)
|
||||
err = 0.0f;
|
||||
}
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// retry per section 6.5.3.3
|
||||
if( IsFloatSubnormal( s[j] ) )
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
|
||||
if( skipNanInf )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
|
||||
correct2 = ref_func( 0.0, s2[j] );
|
||||
correct3 = ref_func( -0.0, s2[j] );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if( skipNanInf )
|
||||
{
|
||||
if( fetestexcept(FE_OVERFLOW) )
|
||||
continue;
|
||||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
|
||||
IsFloatInfinity(correct3) || IsFloatNaN(correct3) )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
err3 = Ulp_Error( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
@@ -751,7 +751,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -759,13 +759,13 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( ! fail )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//try with both args as zero
|
||||
if( IsFloatSubnormal( s2[j] ) )
|
||||
{
|
||||
double correct4, correct5;
|
||||
float err4, err5;
|
||||
|
||||
float err4, err5;
|
||||
|
||||
if( skipNanInf )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
@@ -773,21 +773,21 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
correct3 = ref_func( -0.0, 0.0 );
|
||||
correct4 = ref_func( 0.0, -0.0 );
|
||||
correct5 = ref_func( -0.0, -0.0 );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if( skipNanInf )
|
||||
{
|
||||
if( fetestexcept(FE_OVERFLOW) )
|
||||
continue;
|
||||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
|
||||
IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
|
||||
IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
|
||||
IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
err3 = Ulp_Error( test, correct3 );
|
||||
err4 = Ulp_Error( test, correct4 );
|
||||
@@ -820,18 +820,18 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( skipNanInf )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
|
||||
correct2 = ref_func( s[j], 0.0 );
|
||||
correct3 = ref_func( s[j], -0.0 );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if ( skipNanInf)
|
||||
if ( skipNanInf)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
IsFloatInfinity(correct) || IsFloatNaN(correct) ||
|
||||
IsFloatInfinity(correct2)|| IsFloatNaN(correct2) )
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
@@ -841,7 +841,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -852,12 +852,12 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -868,8 +868,8 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isFDim && gIsInRTZMode)
|
||||
|
||||
if (isFDim && gIsInRTZMode)
|
||||
(void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
@@ -878,9 +878,9 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -893,28 +893,28 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
if( overflow )
|
||||
free( overflow );
|
||||
free( overflow );
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
|
||||
};
|
||||
|
||||
@@ -942,7 +942,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
|
||||
test_info.isFDim = 0 == strcmp( "fdim", f->nameInCode );
|
||||
test_info.skipNanInf = 0;
|
||||
test_info.isNextafter = isNextafter;
|
||||
@@ -1007,7 +1007,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
@@ -1022,7 +1022,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -1031,7 +1031,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
//Init input arrays
|
||||
@@ -1081,7 +1081,7 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -1097,14 +1097,14 @@ int TestFunc_Double_Double_Double_common(const Func *f, MTdata d, int isNextafte
|
||||
for( ; j < gMaxVectorSizeIndex; j++ )
|
||||
vlog( "\t -- " );
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
|
||||
|
||||
exit:
|
||||
// Release
|
||||
// Release
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
@@ -1127,7 +1127,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -1163,29 +1163,29 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_double *fp2 = (cl_double *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesDouble[x];
|
||||
@@ -1206,7 +1206,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int64(d);
|
||||
p2[j] = genrand_int64(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -1224,15 +1224,15 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -1246,7 +1246,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -1264,14 +1264,14 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( gSkipCorrectnessTesting )
|
||||
return CL_SUCCESS;
|
||||
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
cl_double *s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (cl_double) func.f_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
|
||||
for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -1280,7 +1280,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -1289,7 +1289,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_ulong *t = (cl_ulong *)r;
|
||||
@@ -1298,7 +1298,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
@@ -1306,7 +1306,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
long double correct = func.f_ff( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -1333,11 +1333,11 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
cl_double value = copysign(twoToMinus1022, s2[j]);
|
||||
fail = fail && (test != value);
|
||||
if (!fail)
|
||||
if (!fail)
|
||||
err = 0.0f;
|
||||
}
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// retry per section 6.5.3.3
|
||||
if( IsDoubleSubnormal( s[j] ) )
|
||||
@@ -1351,7 +1351,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1359,7 +1359,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( ! fail )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//try with both args as zero
|
||||
if( IsDoubleSubnormal( s2[j] ) )
|
||||
{
|
||||
@@ -1403,7 +1403,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1414,12 +1414,12 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -1430,16 +1430,16 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -1452,7 +1452,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
int TestFunc_Float_Float_Float(const Func *f, MTdata d)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -28,7 +28,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata);
|
||||
int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
extern "C"
|
||||
#endif
|
||||
|
||||
const vtbl _binary_operator = { "binaryOperator", TestFunc_Float_Float_Float_Operator, TestFunc_Double_Double_Double_Operator };
|
||||
@@ -38,7 +38,7 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int
|
||||
|
||||
static int BuildKernel( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = {
|
||||
const char *c[] = {
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global float", sizeNames[vectorSize], "* out, __global float", sizeNames[vectorSize], "* in1, __global float", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -86,24 +86,24 @@ static int BuildKernel( const char *name, const char *operator_symbol, int vecto
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
}
|
||||
|
||||
static int BuildKernelDouble( const char *name, const char *operator_symbol, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
const char *c[] = {
|
||||
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -111,7 +111,7 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int
|
||||
"}\n"
|
||||
};
|
||||
const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"__kernel void ", name, "_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
@@ -152,18 +152,18 @@ static int BuildKernelDouble( const char *name, const char *operator_symbol, int
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "%s_kernel%s", name, sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
@@ -224,20 +224,20 @@ typedef struct TestInfo
|
||||
static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
};
|
||||
|
||||
static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
|
||||
@@ -264,7 +264,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
test_info.f = f;
|
||||
test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -325,7 +325,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
@@ -340,7 +340,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -349,7 +349,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
@@ -400,7 +400,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -414,7 +414,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d)
|
||||
vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
@@ -443,7 +443,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -476,30 +476,30 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesFloat[x];
|
||||
@@ -520,7 +520,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -538,15 +538,15 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -560,7 +560,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -587,33 +587,33 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
memset( &oldMode, 0, sizeof( oldMode ) );
|
||||
if( ftz )
|
||||
ForceFTZ( &oldMode );
|
||||
|
||||
|
||||
// Set the rounding mode to match the device
|
||||
RoundingMode oldRoundMode = kRoundToNearestEven;
|
||||
if (gIsInRTZMode)
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
oldRoundMode = set_round(kRoundTowardZero, kfloat);
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
float *r = (float *)gOut_Ref + thread_id * buffer_elements;
|
||||
float *s = (float *)gIn + thread_id * buffer_elements;
|
||||
float *s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
if( gInfNanSupport )
|
||||
if( gInfNanSupport )
|
||||
{
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (float) func.f_ff( s[j], s2[j] );
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
{
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
{
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
r[j] = (float) func.f_ff( s[j], s2[j] );
|
||||
overflow[j] = FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (gIsInRTZMode)
|
||||
(void)set_round(oldRoundMode, kfloat);
|
||||
(void)set_round(oldRoundMode, kfloat);
|
||||
|
||||
if( ftz )
|
||||
RestoreFPState( &oldMode );
|
||||
@@ -626,7 +626,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -635,7 +635,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_uint *t = (cl_uint *)r;
|
||||
@@ -644,30 +644,30 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
float test = ((float*) q)[j];
|
||||
double correct = func.f_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if ( !gInfNanSupport)
|
||||
if ( !gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
IsFloatInfinity(correct) || IsFloatNaN(correct) ||
|
||||
IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j]) ||
|
||||
IsFloatInfinity(s[j]) || IsFloatNaN(s[j]) )
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
|
||||
if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
|
||||
// Per section 10 paragraph 6, accept embedded devices always returning positive 0.0.
|
||||
if (gIsEmbedded && (t[j] == 0x80000000) && (q[j] == 0x00000000)) continue;
|
||||
|
||||
float err = Ulp_Error( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -683,25 +683,25 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
double correct2, correct3;
|
||||
float err2, err3;
|
||||
|
||||
|
||||
if( !gInfNanSupport )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
|
||||
correct2 = func.f_ff( 0.0, s2[j] );
|
||||
correct3 = func.f_ff( -0.0, s2[j] );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if( !gInfNanSupport )
|
||||
{
|
||||
if( fetestexcept(FE_OVERFLOW) )
|
||||
continue;
|
||||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
|
||||
IsFloatInfinity(correct3) || IsFloatNaN(correct3) )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
err3 = Ulp_Error( test, correct3 );
|
||||
fail = fail && ((!(fabsf(err2) <= ulps)) && (!(fabsf(err3) <= ulps)));
|
||||
@@ -709,7 +709,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -717,13 +717,13 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( ! fail )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//try with both args as zero
|
||||
if( IsFloatSubnormal( s2[j] ) )
|
||||
{
|
||||
double correct4, correct5;
|
||||
float err4, err5;
|
||||
|
||||
float err4, err5;
|
||||
|
||||
if( !gInfNanSupport )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
@@ -731,21 +731,21 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
correct3 = func.f_ff( -0.0, 0.0 );
|
||||
correct4 = func.f_ff( 0.0, -0.0 );
|
||||
correct5 = func.f_ff( -0.0, -0.0 );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if( !gInfNanSupport )
|
||||
{
|
||||
if( fetestexcept(FE_OVERFLOW) )
|
||||
continue;
|
||||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( IsFloatInfinity(correct2) || IsFloatNaN(correct2) ||
|
||||
IsFloatInfinity(correct3) || IsFloatNaN(correct3) ||
|
||||
IsFloatInfinity(correct4) || IsFloatNaN(correct4) ||
|
||||
IsFloatInfinity(correct5) || IsFloatNaN(correct5) )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
err3 = Ulp_Error( test, correct3 );
|
||||
err4 = Ulp_Error( test, correct4 );
|
||||
@@ -778,18 +778,18 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( !gInfNanSupport )
|
||||
feclearexcept(FE_OVERFLOW);
|
||||
|
||||
|
||||
correct2 = func.f_ff( s[j], 0.0 );
|
||||
correct3 = func.f_ff( s[j], -0.0 );
|
||||
|
||||
|
||||
// Per section 10 paragraph 6, accept any result if an input or output is a infinity or NaN or overflow
|
||||
if ( !gInfNanSupport)
|
||||
if ( !gInfNanSupport)
|
||||
{
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
// Note: no double rounding here. Reference functions calculate in single precision.
|
||||
if( overflow[j] ||
|
||||
IsFloatInfinity(correct) || IsFloatNaN(correct) ||
|
||||
IsFloatInfinity(correct2)|| IsFloatNaN(correct2) )
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
|
||||
err2 = Ulp_Error( test, correct2 );
|
||||
@@ -799,7 +799,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -809,13 +809,13 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -833,9 +833,9 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -848,28 +848,28 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
if( overflow )
|
||||
free( overflow );
|
||||
free( overflow );
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
|
||||
};
|
||||
|
||||
@@ -897,7 +897,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -959,7 +959,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->name, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
@@ -974,7 +974,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -983,7 +983,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
//Init input arrays
|
||||
@@ -1033,7 +1033,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -1049,14 +1049,14 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d)
|
||||
for( ; j < gMaxVectorSizeIndex; j++ )
|
||||
vlog( "\t -- " );
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
|
||||
|
||||
exit:
|
||||
// Release
|
||||
// Release
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
@@ -1079,7 +1079,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -1113,29 +1113,29 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_ulong *p2 = (cl_ulong *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_double *fp2 = (cl_double *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesDouble[x];
|
||||
@@ -1156,7 +1156,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int64(d);
|
||||
p2[j] = genrand_int64(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -1174,15 +1174,15 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -1196,7 +1196,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -1214,14 +1214,14 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( gSkipCorrectnessTesting )
|
||||
return CL_SUCCESS;
|
||||
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
cl_double *s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (cl_double) func.f_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
|
||||
for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -1230,7 +1230,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -1239,7 +1239,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_ulong *t = (cl_ulong *)r;
|
||||
@@ -1248,7 +1248,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
@@ -1256,7 +1256,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
long double correct = func.f_ff( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -1267,7 +1267,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// retry per section 6.5.3.3
|
||||
if( IsDoubleSubnormal( s[j] ) )
|
||||
{
|
||||
@@ -1280,7 +1280,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1288,7 +1288,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( ! fail )
|
||||
err = 0.0f;
|
||||
}
|
||||
|
||||
|
||||
//try with both args as zero
|
||||
if( IsDoubleSubnormal( s2[j] ) )
|
||||
{
|
||||
@@ -1332,7 +1332,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1342,12 +1342,12 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -1358,16 +1358,16 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -1380,7 +1380,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -29,7 +29,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata);
|
||||
int TestFunc_Double_Double_Int(const Func *f, MTdata);
|
||||
|
||||
#if defined( __cplusplus)
|
||||
extern "C"
|
||||
extern "C"
|
||||
#endif
|
||||
const vtbl _binary_i = { "binary_i", TestFunc_Float_Float_Int, TestFunc_Double_Double_Int };
|
||||
|
||||
@@ -44,7 +44,7 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count,
|
||||
" out[i] = ", name, "( in1[i], in2[i] );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global float* out, __global float* in, __global int* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -87,31 +87,31 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count,
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
}
|
||||
|
||||
static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global int", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" out[i] = ", name, "( in1[i], in2[i] );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global int* in2)\n"
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global double* out, __global double* in, __global int* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
" if( i + 1 < get_global_size(0) )\n"
|
||||
@@ -153,17 +153,17 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
@@ -193,25 +193,25 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
};
|
||||
static size_t specialValuesFloatCount = sizeof( specialValuesFloat ) / sizeof( specialValuesFloat[0] );
|
||||
|
||||
|
||||
static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147,
|
||||
static const int specialValuesInt[] = { 0, 1, 2, 3, 126, 127, 128, 0x02000001, 0x04000001, 1465264071, 1488522147,
|
||||
-1, -2, -3, -126, -127, -128, -0x02000001, -0x04000001, -1465264071, -1488522147 };
|
||||
static size_t specialValuesIntCount = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
|
||||
|
||||
@@ -327,11 +327,11 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
// Run the kernels
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
|
||||
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
{
|
||||
@@ -342,7 +342,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -353,7 +353,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
@@ -404,7 +404,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -418,7 +418,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d)
|
||||
vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "%sf%s", f->name, sizeNames[j] );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
@@ -447,7 +447,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -480,28 +480,28 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesIntCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
cl_int *ip2 = (cl_int *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesFloat[x];
|
||||
@@ -522,7 +522,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -540,15 +540,15 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -562,7 +562,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -596,7 +596,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -605,7 +605,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_uint *t = (cl_uint *)r;
|
||||
@@ -614,7 +614,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_uint *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
@@ -622,7 +622,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
double correct = func.f_fi( s[j], s2[j] );
|
||||
float err = Ulp_Error( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -647,7 +647,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsFloatResultSubnormal( correct2, ulps ) || IsFloatResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -657,12 +657,12 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -680,9 +680,9 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -695,32 +695,32 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
|
||||
};
|
||||
static size_t specialValuesDoubleCount = sizeof( specialValuesDouble ) / sizeof( specialValuesDouble[0] );
|
||||
|
||||
static const int specialValuesInt2[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX,
|
||||
static const int specialValuesInt2[] = { 0, 1, 2, 3, 1022, 1023, 1024, INT_MIN, INT_MAX,
|
||||
-1, -2, -3, -1022, -1023, -11024, -INT_MAX };
|
||||
static size_t specialValuesInt2Count = sizeof( specialValuesInt ) / sizeof( specialValuesInt[0] );
|
||||
|
||||
@@ -746,7 +746,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
test_info.f = f;
|
||||
test_info.ulps = f->double_ulps;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -809,12 +809,12 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
// Run the kernels
|
||||
if( !gSkipCorrectnessTesting )
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
|
||||
|
||||
|
||||
// Accumulate the arithmetic errors
|
||||
for( i = 0; i < test_info.threadCount; i++ )
|
||||
{
|
||||
@@ -825,7 +825,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
@@ -836,7 +836,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
//Init input arrays
|
||||
@@ -886,7 +886,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
@@ -902,14 +902,14 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d)
|
||||
for( ; j < gMaxVectorSizeIndex; j++ )
|
||||
vlog( "\t -- " );
|
||||
}
|
||||
|
||||
|
||||
if( ! gSkipCorrectnessTesting )
|
||||
vlog( "\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2 );
|
||||
vlog( "\n" );
|
||||
|
||||
|
||||
exit:
|
||||
// Release
|
||||
// Release
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
@@ -932,7 +932,7 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
@@ -966,28 +966,28 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
|
||||
cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesInt2Count;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
cl_double *fp = (cl_double *)p;
|
||||
cl_int *ip2 = (cl_int *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesDouble[x];
|
||||
@@ -1008,7 +1008,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
p[j] = DoubleFromUInt32(genrand_int32(d));
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
@@ -1026,15 +1026,15 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -1048,7 +1048,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
@@ -1066,14 +1066,14 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
if( gSkipCorrectnessTesting )
|
||||
return CL_SUCCESS;
|
||||
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
cl_double *r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
cl_int *s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = (cl_double) func.f_fi( s[j], s2[j] );
|
||||
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
|
||||
for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -1082,7 +1082,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for the last buffer
|
||||
@@ -1091,7 +1091,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_ulong *t = (cl_ulong *)r;
|
||||
@@ -1100,7 +1100,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
for( k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
cl_ulong *q = out[k];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( t[j] != q[j] )
|
||||
{
|
||||
@@ -1108,7 +1108,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
long double correct = func.f_fi( s[j], s2[j] );
|
||||
float err = Ulp_Error_Double( test, correct );
|
||||
int fail = ! (fabsf(err) <= ulps);
|
||||
|
||||
|
||||
if( fail && ftz )
|
||||
{
|
||||
// retry per section 6.5.3.2
|
||||
@@ -1131,7 +1131,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
err = err2;
|
||||
if( fabsf( err3 ) < fabsf(err ) )
|
||||
err = err3;
|
||||
|
||||
|
||||
// retry per section 6.5.3.4
|
||||
if( IsDoubleResultSubnormal( correct2, ulps ) || IsDoubleResultSubnormal( correct3, ulps ) )
|
||||
{
|
||||
@@ -1141,12 +1141,12 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( fabsf(err ) > tinfo->maxError )
|
||||
{
|
||||
tinfo->maxError = fabsf(err);
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
tinfo->maxErrorValue = s[j];
|
||||
tinfo->maxErrorValue2 = s2[j];
|
||||
}
|
||||
if( fail )
|
||||
{
|
||||
@@ -1157,16 +1157,16 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
@@ -1179,7 +1179,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
|
||||
exit:
|
||||
return error;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
@@ -28,7 +28,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata);
|
||||
int TestMacro_Int_Double_Double(const Func *f, MTdata);
|
||||
|
||||
#if defined( __cplusplus)
|
||||
extern "C"
|
||||
extern "C"
|
||||
#endif
|
||||
const vtbl _macro_binary = { "macro_binary", TestMacro_Int_Float_Float, TestMacro_Int_Double_Double };
|
||||
|
||||
@@ -43,7 +43,7 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count,
|
||||
" out[i] = ", name, "( in1[i], in2[i] );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char *c3[] = { "__kernel void math_kernel", sizeNames[vectorSize], "( __global int* out, __global float* in, __global float* in2)\n"
|
||||
"{\n"
|
||||
" size_t i = get_global_id(0);\n"
|
||||
@@ -82,33 +82,33 @@ static int BuildKernel( const char *name, int vectorSize, cl_uint kernel_count,
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
||||
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p); }
|
||||
|
||||
|
||||
static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_count, cl_kernel *k, cl_program *p )
|
||||
{
|
||||
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global long", sizeNames[vectorSize], "* out, __global double", sizeNames[vectorSize], "* in1, __global double", sizeNames[vectorSize], "* in2 )\n"
|
||||
"{\n"
|
||||
" int i = get_global_id(0);\n"
|
||||
" out[i] = ", name, "( in1[i], in2[i] );\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char *c3[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
|
||||
"__kernel void math_kernel", sizeNames[vectorSize], "( __global long* out, __global double* in, __global double* in2)\n"
|
||||
"{\n"
|
||||
@@ -148,21 +148,21 @@ static int BuildKernelDouble( const char *name, int vectorSize, cl_uint kernel_c
|
||||
" }\n"
|
||||
"}\n"
|
||||
};
|
||||
|
||||
|
||||
const char **kern = c;
|
||||
size_t kernSize = sizeof(c)/sizeof(c[0]);
|
||||
|
||||
|
||||
if( sizeValues[vectorSize] == 3 )
|
||||
{
|
||||
kern = c3;
|
||||
kernSize = sizeof(c3)/sizeof(c3[0]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
char testName[32];
|
||||
snprintf( testName, sizeof( testName ) -1, "math_kernel%s", sizeNames[vectorSize] );
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
|
||||
return MakeKernels(kern, (cl_uint) kernSize, testName, kernel_count, k, p);
|
||||
}
|
||||
|
||||
typedef struct BuildKernelInfo
|
||||
@@ -192,20 +192,20 @@ static cl_int BuildKernel_DoubleFn( cl_uint job_id, cl_uint thread_id UNUSED, vo
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
static const float specialValuesFloat[] = {
|
||||
-NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
|
||||
-3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
|
||||
+NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
|
||||
+3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
|
||||
MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
|
||||
MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
|
||||
MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
|
||||
};
|
||||
|
||||
static const size_t specialValuesFloatCount = sizeof(specialValuesFloat) / sizeof(specialValuesFloat[0]);
|
||||
@@ -231,7 +231,7 @@ typedef struct TestInfo
|
||||
cl_uint step; // step between each chunk and the next.
|
||||
cl_uint scale; // stride between individual test values
|
||||
int ftz; // non-zero if running in flush to zero mode
|
||||
|
||||
|
||||
}TestInfo;
|
||||
|
||||
static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *p );
|
||||
@@ -241,9 +241,9 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
|
||||
vlog( "%15s", f->name );
|
||||
|
||||
|
||||
// Init test_info
|
||||
memset( &test_info, 0, sizeof( test_info ) );
|
||||
test_info.threadCount = GetThreadCount();
|
||||
@@ -252,7 +252,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -289,7 +289,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%ld, %ld}\n", region.origin, region.size );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
@@ -308,27 +308,27 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_FloatFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
|
||||
|
||||
// Run the kernels
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestFloat, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
|
||||
if( gWimpyMode )
|
||||
vlog( "Wimp pass." );
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
//Init input arrays
|
||||
@@ -339,7 +339,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
|
||||
@@ -350,8 +350,8 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -360,7 +360,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
|
||||
if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
|
||||
if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
|
||||
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for( i = 0; i < PERF_LOOP_COUNT; i++ )
|
||||
@@ -371,21 +371,21 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if( (error = clFinish(gQueue) ) )
|
||||
{
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
if( time < bestTime )
|
||||
bestTime = time;
|
||||
}
|
||||
|
||||
|
||||
if( gReportAverageTimes )
|
||||
bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( float ) );
|
||||
@@ -393,7 +393,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d)
|
||||
}
|
||||
}
|
||||
vlog( "\n" );
|
||||
|
||||
|
||||
exit:
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -402,7 +402,7 @@ exit:
|
||||
{
|
||||
for( j = 0; j < test_info.threadCount; j++ )
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
|
||||
free( test_info.k[i] );
|
||||
}
|
||||
}
|
||||
@@ -417,10 +417,10 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -437,7 +437,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
|
||||
|
||||
// start the map of the output arrays
|
||||
cl_event e[ VECTOR_SIZE_COUNT ];
|
||||
cl_int *out[ VECTOR_SIZE_COUNT ];
|
||||
@@ -448,29 +448,29 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
|
||||
cl_uint *p2 = (cl_uint *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesFloatCount * specialValuesFloatCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
float *fp = (float *)p;
|
||||
float *fp2 = (float *)p2;
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesFloatCount;
|
||||
y = (job_id * buffer_elements) / specialValuesFloatCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
fp[j] = specialValuesFloat[x];
|
||||
@@ -484,41 +484,41 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Init any remaining values.
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
p[j] = genrand_int32(d);
|
||||
p2[j] = genrand_int32(d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -527,38 +527,38 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
|
||||
|
||||
if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 2 failed\n" );
|
||||
|
||||
if( gSkipCorrectnessTesting )
|
||||
return CL_SUCCESS;
|
||||
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
cl_int *r = (cl_int *)gOut_Ref + thread_id * buffer_elements;
|
||||
float *s = (float *)gIn + thread_id * buffer_elements;
|
||||
float *s2 = (float *)gIn2 + thread_id * buffer_elements;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = func.i_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
|
||||
for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -567,23 +567,23 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Wait for the last buffer
|
||||
out[j] = (cl_int*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
|
||||
if( error || NULL == out[j] )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_int *t = (cl_int *)r;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
{
|
||||
cl_int *q = out[0];
|
||||
|
||||
|
||||
if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
|
||||
{
|
||||
if( ftz )
|
||||
@@ -596,7 +596,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
int correct2 = func.i_ff( 0.0f, -0.0f );
|
||||
int correct3 = func.i_ff( -0.0f, 0.0f );
|
||||
int correct4 = func.i_ff( -0.0f, -0.0f );
|
||||
|
||||
|
||||
if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
|
||||
continue;
|
||||
}
|
||||
@@ -615,9 +615,9 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( correct == q[j] || correct2 == q[j] )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
uint32_t err = t[j] - q[j];
|
||||
if( q[j] > t[j] )
|
||||
err = q[j] - t[j];
|
||||
@@ -625,7 +625,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
error = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
q = out[k];
|
||||
@@ -642,7 +642,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
int correct2 = -func.i_ff( 0.0f, -0.0f );
|
||||
int correct3 = -func.i_ff( -0.0f, 0.0f );
|
||||
int correct4 = -func.i_ff( -0.0f, -0.0f );
|
||||
|
||||
|
||||
if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
|
||||
continue;
|
||||
}
|
||||
@@ -661,7 +661,7 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( correct == q[j] || correct2 == q[j] )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
cl_uint err = -t[j] - q[j];
|
||||
if( q[j] > -t[j] )
|
||||
@@ -678,40 +678,40 @@ static cl_int TestFloat( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
|
||||
|
||||
|
||||
if( 0 == ( base & 0x0fffffff) )
|
||||
{
|
||||
vlog("." );
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
// A table of more difficult cases to get right
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
static const double specialValuesDouble[] = {
|
||||
-NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
|
||||
-3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
|
||||
+NAN, +INFINITY, +DBL_MAX, MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12), MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
|
||||
MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
|
||||
+3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
|
||||
MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
|
||||
MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
|
||||
};
|
||||
|
||||
@@ -725,9 +725,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
TestInfo test_info;
|
||||
cl_int error;
|
||||
size_t i, j;
|
||||
|
||||
|
||||
vlog( "%14sD", f->name );
|
||||
|
||||
|
||||
// Init test_info
|
||||
memset( &test_info, 0, sizeof( test_info ) );
|
||||
test_info.threadCount = GetThreadCount();
|
||||
@@ -736,7 +736,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
test_info.step = (cl_uint) test_info.subBufferSize * test_info.scale;
|
||||
test_info.f = f;
|
||||
test_info.ftz = f->ftz || gForceFTZ;
|
||||
|
||||
|
||||
// cl_kernels aren't thread safe, so we make one for each vector size for every thread
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
@@ -773,7 +773,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
vlog_error( "Error: Unable to create sub-buffer of gInBuffer for region {%ld, %ld}\n", region.origin, region.size );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error);
|
||||
@@ -792,26 +792,26 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
|
||||
test_info.tinfo[i].d = init_genrand(genrand_int32(d));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Init the kernels
|
||||
BuildKernelInfo build_info = { gMinVectorSizeIndex, test_info.threadCount, test_info.k, test_info.programs, f->nameInCode };
|
||||
if( (error = ThreadPool_Do( BuildKernel_DoubleFn, gMaxVectorSizeIndex - gMinVectorSizeIndex, &build_info ) ))
|
||||
goto exit;
|
||||
|
||||
|
||||
if( !gSkipCorrectnessTesting )
|
||||
{
|
||||
error = ThreadPool_Do( TestDouble, (cl_uint) ((1ULL<<32) / test_info.step), &test_info );
|
||||
|
||||
|
||||
if( error )
|
||||
goto exit;
|
||||
|
||||
|
||||
if( gWimpyMode )
|
||||
vlog( "Wimp pass." );
|
||||
else
|
||||
vlog( "passed." );
|
||||
}
|
||||
|
||||
|
||||
if( gMeasureTimes )
|
||||
{
|
||||
//Init input arrays
|
||||
@@ -822,7 +822,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
p[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
|
||||
p2[j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, BUFFER_SIZE, gIn, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer ***\n", error );
|
||||
@@ -833,8 +833,8 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
vlog_error( "\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error );
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Run the kernels
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -843,7 +843,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
if( ( error = clSetKernelArg( test_info.k[j][0], 0, sizeof( gOutBuffer[j] ), &gOutBuffer[j] ) )) { LogBuildError(test_info.programs[j]); goto exit; }
|
||||
if( ( error = clSetKernelArg( test_info.k[j][0], 1, sizeof( gInBuffer ), &gInBuffer ) )) { LogBuildError(test_info.programs[j]); goto exit; }
|
||||
if( ( error = clSetKernelArg( test_info.k[j][0], 2, sizeof( gInBuffer2 ), &gInBuffer2 ) )) { LogBuildError(test_info.programs[j]); goto exit; }
|
||||
|
||||
|
||||
double sum = 0.0;
|
||||
double bestTime = INFINITY;
|
||||
for( i = 0; i < PERF_LOOP_COUNT; i++ )
|
||||
@@ -854,21 +854,21 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Make sure OpenCL is done
|
||||
if( (error = clFinish(gQueue) ) )
|
||||
{
|
||||
vlog_error( "Error %d at clFinish\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
uint64_t endTime = GetTime();
|
||||
double time = SubtractTime( endTime, startTime );
|
||||
sum += time;
|
||||
if( time < bestTime )
|
||||
bestTime = time;
|
||||
}
|
||||
|
||||
|
||||
if( gReportAverageTimes )
|
||||
bestTime = sum / PERF_LOOP_COUNT;
|
||||
double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (BUFFER_SIZE / sizeof( double ) );
|
||||
@@ -877,11 +877,11 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d)
|
||||
for( ; j < gMaxVectorSizeIndex; j++ )
|
||||
vlog( "\t -- " );
|
||||
}
|
||||
|
||||
|
||||
vlog( "\n" );
|
||||
|
||||
|
||||
exit:
|
||||
// Release
|
||||
// Release
|
||||
for( i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++ )
|
||||
{
|
||||
clReleaseProgram(test_info.programs[i]);
|
||||
@@ -889,7 +889,7 @@ exit:
|
||||
{
|
||||
for( j = 0; j < test_info.threadCount; j++ )
|
||||
clReleaseKernel(test_info.k[i][j]);
|
||||
|
||||
|
||||
free( test_info.k[i] );
|
||||
}
|
||||
}
|
||||
@@ -904,10 +904,10 @@ exit:
|
||||
clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
|
||||
clReleaseCommandQueue(test_info.tinfo[i].tQueue);
|
||||
}
|
||||
|
||||
|
||||
free( test_info.tinfo );
|
||||
}
|
||||
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -924,7 +924,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
cl_uint j, k;
|
||||
cl_int error;
|
||||
const char *name = job->f->name;
|
||||
|
||||
|
||||
Force64BitFPUPrecision();
|
||||
|
||||
// start the map of the output arrays
|
||||
@@ -937,27 +937,27 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush failed\n" );
|
||||
|
||||
|
||||
//Init input array
|
||||
double *p = (double *)gIn + thread_id * buffer_elements;
|
||||
double *p2 = (double *)gIn2 + thread_id * buffer_elements;
|
||||
j = 0;
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
int totalSpecialValueCount = specialValuesDoubleCount * specialValuesDoubleCount;
|
||||
int indx = (totalSpecialValueCount - 1) / buffer_elements;
|
||||
|
||||
if( job_id <= (cl_uint)indx )
|
||||
{ // test edge cases
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
uint32_t x, y;
|
||||
|
||||
x = (job_id * buffer_elements) % specialValuesDoubleCount;
|
||||
y = (job_id * buffer_elements) / specialValuesDoubleCount;
|
||||
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
p[j] = specialValuesDouble[x];
|
||||
@@ -971,41 +971,41 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Init any remaining values.
|
||||
for( ; j < buffer_elements; j++ )
|
||||
{
|
||||
((cl_ulong*)p)[j] = genrand_int64(d);
|
||||
((cl_ulong*)p2)[j] = genrand_int64(d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0, buffer_size, p, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
if( (error = clEnqueueWriteBuffer( tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0, buffer_size, p2, 0, NULL, NULL) ))
|
||||
{
|
||||
vlog_error( "Error: clEnqueueWriteBuffer failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
//Wait for the map to finish
|
||||
if( (error = clWaitForEvents(1, e + j) ))
|
||||
{
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
vlog_error( "Error: clWaitForEvents failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
if( (error = clReleaseEvent( e[j] ) ))
|
||||
{
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
vlog_error( "Error: clReleaseEvent failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// Fill the result buffer with garbage, so that old results don't carry over
|
||||
uint32_t pattern = 0xffffdead;
|
||||
memset_pattern4(out[j], &pattern, buffer_size);
|
||||
@@ -1014,38 +1014,38 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
vlog_error( "Error: clEnqueueMapBuffer failed! err: %d\n", error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
// run the kernel
|
||||
size_t vectorCount = (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
|
||||
cl_kernel kernel = job->k[j][thread_id]; //each worker thread has its own copy of the cl_kernel
|
||||
cl_program program = job->programs[j];
|
||||
|
||||
|
||||
if( ( error = clSetKernelArg( kernel, 0, sizeof( tinfo->outBuf[j] ), &tinfo->outBuf[j] ))){ LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 1, sizeof( tinfo->inBuf ), &tinfo->inBuf ) )) { LogBuildError(program); return error; }
|
||||
if( ( error = clSetKernelArg( kernel, 2, sizeof( tinfo->inBuf2 ), &tinfo->inBuf2 ) )) { LogBuildError(program); return error; }
|
||||
|
||||
|
||||
if( (error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL, &vectorCount, NULL, 0, NULL, NULL)))
|
||||
{
|
||||
vlog_error( "FAILED -- could not execute kernel\n" );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Get that moving
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 2 failed\n" );
|
||||
|
||||
|
||||
if( gSkipCorrectnessTesting )
|
||||
return CL_SUCCESS;
|
||||
|
||||
|
||||
//Calculate the correctly rounded reference result
|
||||
cl_long *r = (cl_long *)gOut_Ref + thread_id * buffer_elements;
|
||||
cl_double *s = (cl_double *)gIn + thread_id * buffer_elements;
|
||||
cl_double *s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
r[j] = dfunc.i_ff( s[j], s2[j] );
|
||||
|
||||
|
||||
|
||||
|
||||
// Read the data back -- no need to wait for the first N-1 buffers. This is an in order queue.
|
||||
for( j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
@@ -1054,23 +1054,23 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Wait for the last buffer
|
||||
out[j] = (cl_long*) clEnqueueMapBuffer( tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0, buffer_size, 0, NULL, NULL, &error);
|
||||
if( error || NULL == out[j] )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error );
|
||||
goto exit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Verify data
|
||||
cl_long *t = (cl_long *)r;
|
||||
for( j = 0; j < buffer_elements; j++ )
|
||||
{
|
||||
cl_long *q = (cl_long *) out[0];
|
||||
|
||||
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( gMinVectorSizeIndex == 0 && t[j] != q[j] )
|
||||
{
|
||||
@@ -1084,7 +1084,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
int64_t correct2 = dfunc.i_ff( 0.0f, -0.0f );
|
||||
int64_t correct3 = dfunc.i_ff( -0.0f, 0.0f );
|
||||
int64_t correct4 = dfunc.i_ff( -0.0f, -0.0f );
|
||||
|
||||
|
||||
if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
|
||||
continue;
|
||||
}
|
||||
@@ -1103,9 +1103,9 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( correct == q[j] || correct2 == q[j] )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
uint64_t err = t[j] - q[j];
|
||||
if( q[j] > t[j] )
|
||||
err = q[j] - t[j];
|
||||
@@ -1113,10 +1113,10 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
error = -1;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
for( k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++ )
|
||||
{
|
||||
{
|
||||
q = (cl_long*) out[k];
|
||||
// If we aren't getting the correctly rounded result
|
||||
if( -t[j] != q[j] )
|
||||
@@ -1131,7 +1131,7 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
int64_t correct2 = -dfunc.i_ff( 0.0f, -0.0f );
|
||||
int64_t correct3 = -dfunc.i_ff( -0.0f, 0.0f );
|
||||
int64_t correct4 = -dfunc.i_ff( -0.0f, -0.0f );
|
||||
|
||||
|
||||
if( correct == q[j] || correct2 == q[j] || correct3 == q[j] || correct4 == q[j] )
|
||||
continue;
|
||||
}
|
||||
@@ -1150,9 +1150,9 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
if( correct == q[j] || correct2 == q[j] )
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
uint64_t err = -t[j] - q[j];
|
||||
if( q[j] > -t[j] )
|
||||
err = q[j] + t[j];
|
||||
@@ -1162,26 +1162,26 @@ static cl_int TestDouble( cl_uint job_id, cl_uint thread_id, void *data )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for( j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++ )
|
||||
{
|
||||
if( (error = clEnqueueUnmapMemObject( tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)) )
|
||||
{
|
||||
vlog_error( "Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n", j, error );
|
||||
return error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if( (error = clFlush(tinfo->tQueue) ))
|
||||
vlog( "clFlush 3 failed\n" );
|
||||
|
||||
|
||||
|
||||
|
||||
if( 0 == ( base & 0x0fffffff) )
|
||||
{
|
||||
vlog("." );
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
||||
exit:
|
||||
return error;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,217 +1,217 @@
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef REFERENCE_MATH_H
|
||||
#define REFERENCE_MATH_H
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
// -- for testing float --
|
||||
double reference_sinh( double x );
|
||||
double reference_sqrt( double x );
|
||||
double reference_tanh( double x );
|
||||
double reference_acos( double );
|
||||
double reference_asin( double );
|
||||
double reference_atan( double );
|
||||
double reference_atan2( double, double );
|
||||
double reference_ceil( double );
|
||||
double reference_cosh( double );
|
||||
double reference_exp( double );
|
||||
double reference_fabs( double );
|
||||
double reference_acospi( double );
|
||||
double reference_asinpi( double );
|
||||
double reference_atanpi( double );
|
||||
double reference_atan2pi( double, double );
|
||||
double reference_cospi( double );
|
||||
double reference_divide( double, double );
|
||||
double reference_fract( double, double * );
|
||||
float reference_fma( float, float, float, int );
|
||||
double reference_mad( double, double, double );
|
||||
double reference_nextafter(double, double );
|
||||
double reference_recip( double );
|
||||
double reference_rootn( double, int );
|
||||
double reference_rsqrt( double );
|
||||
double reference_sincos( double, double * );
|
||||
double reference_sinpi( double );
|
||||
double reference_tanpi( double );
|
||||
double reference_pow(double x, double y);
|
||||
double reference_pown( double, int );
|
||||
double reference_powr( double, double );
|
||||
double reference_cos( double );
|
||||
double reference_sin( double );
|
||||
double reference_tan( double );
|
||||
double reference_log( double );
|
||||
double reference_log10( double );
|
||||
double reference_modf( double, double *n );
|
||||
|
||||
double reference_fdim( double, double );
|
||||
double reference_add( double, double );
|
||||
double reference_subtract( double, double );
|
||||
double reference_divide( double, double );
|
||||
double reference_multiply( double, double );
|
||||
double reference_remquo( double, double, int* );
|
||||
double reference_lgamma_r( double, int* );
|
||||
|
||||
int reference_isequal( double, double );
|
||||
int reference_isfinite( double );
|
||||
int reference_isgreater( double, double );
|
||||
int reference_isgreaterequal( double, double );
|
||||
int reference_isinf( double );
|
||||
int reference_isless( double, double );
|
||||
int reference_islessequal( double, double );
|
||||
int reference_islessgreater( double, double );
|
||||
int reference_isnan( double );
|
||||
int reference_isnormal( double );
|
||||
int reference_isnotequal( double, double );
|
||||
int reference_isordered( double, double );
|
||||
int reference_isunordered( double, double );
|
||||
int reference_signbit( float );
|
||||
|
||||
double reference_acosh( double x );
|
||||
double reference_asinh( double x );
|
||||
double reference_atanh( double x );
|
||||
double reference_cbrt(double x);
|
||||
float reference_copysign( float x, float y);
|
||||
double reference_copysignd( double x, double y);
|
||||
double reference_exp10( double );
|
||||
double reference_exp2( double x );
|
||||
double reference_expm1( double x );
|
||||
double reference_fmax( double x, double y );
|
||||
double reference_fmin( double x, double y );
|
||||
double reference_hypot( double x, double y );
|
||||
double reference_lgamma( double x);
|
||||
int reference_ilogb( double );
|
||||
double reference_log2( double x );
|
||||
double reference_log1p( double x );
|
||||
double reference_logb( double x );
|
||||
double reference_maxmag( double x, double y );
|
||||
double reference_minmag( double x, double y );
|
||||
double reference_nan( cl_uint x );
|
||||
double reference_reciprocal( double x );
|
||||
double reference_remainder( double x, double y );
|
||||
double reference_rint( double x );
|
||||
double reference_round( double x );
|
||||
double reference_trunc( double x );
|
||||
double reference_floor( double x );
|
||||
double reference_fmod( double x, double y );
|
||||
double reference_frexp( double x, int *n );
|
||||
double reference_ldexp( double x, int n );
|
||||
|
||||
double reference_assignment( double x );
|
||||
int reference_not( double x );
|
||||
|
||||
// -- for testing double --
|
||||
|
||||
long double reference_sinhl( long double x );
|
||||
long double reference_sqrtl( long double x );
|
||||
long double reference_tanhl( long double x );
|
||||
long double reference_acosl( long double );
|
||||
long double reference_asinl( long double );
|
||||
long double reference_atanl( long double );
|
||||
long double reference_atan2l( long double, long double );
|
||||
long double reference_ceill( long double );
|
||||
long double reference_coshl( long double );
|
||||
long double reference_expl( long double );
|
||||
long double reference_fabsl( long double );
|
||||
long double reference_acospil( long double );
|
||||
long double reference_asinpil( long double );
|
||||
long double reference_atanpil( long double );
|
||||
long double reference_atan2pil( long double, long double );
|
||||
long double reference_cospil( long double );
|
||||
long double reference_dividel( long double, long double );
|
||||
long double reference_fractl( long double, long double * );
|
||||
long double reference_fmal( long double, long double, long double );
|
||||
long double reference_madl( long double, long double, long double );
|
||||
long double reference_nextafterl(long double, long double );
|
||||
long double reference_recipl( long double );
|
||||
long double reference_rootnl( long double, int );
|
||||
long double reference_rsqrtl( long double );
|
||||
long double reference_sincosl( long double, long double * );
|
||||
long double reference_sinpil( long double );
|
||||
long double reference_tanpil( long double );
|
||||
long double reference_powl(long double x, long double y);
|
||||
long double reference_pownl( long double, int );
|
||||
long double reference_powrl( long double, long double );
|
||||
long double reference_cosl( long double );
|
||||
long double reference_sinl(long double );
|
||||
long double reference_tanl( long double );
|
||||
long double reference_logl( long double );
|
||||
long double reference_log10l( long double );
|
||||
long double reference_modfl( long double, long double *n );
|
||||
|
||||
|
||||
long double reference_fdiml( long double, long double );
|
||||
long double reference_addl( long double, long double );
|
||||
long double reference_subtractl( long double, long double );
|
||||
long double reference_dividel( long double, long double );
|
||||
long double reference_multiplyl( long double, long double );
|
||||
long double reference_remquol( long double, long double, int* );
|
||||
long double reference_lgamma_rl( long double, int* );
|
||||
|
||||
|
||||
int reference_isequall( long double, long double );
|
||||
int reference_isfinitel( long double );
|
||||
int reference_isgreaterl( long double, long double );
|
||||
int reference_isgreaterequall( long double, long double );
|
||||
int reference_isinfl( long double );
|
||||
int reference_islessl( long double, long double );
|
||||
int reference_islessequall( long double, long double );
|
||||
int reference_islessgreaterl( long double, long double );
|
||||
int reference_isnanl( long double );
|
||||
int reference_isnormall( long double );
|
||||
int reference_isnotequall( long double, long double );
|
||||
int reference_isorderedl( long double, long double );
|
||||
int reference_isunorderedl( long double, long double );
|
||||
int reference_signbitl( long double );
|
||||
|
||||
long double reference_acoshl( long double x );
|
||||
long double reference_asinhl( long double x );
|
||||
long double reference_atanhl( long double x );
|
||||
long double reference_cbrtl(long double x);
|
||||
long double reference_copysignl( long double x, long double y);
|
||||
long double reference_exp10l( long double );
|
||||
long double reference_exp2l( long double x );
|
||||
long double reference_expm1l( long double x );
|
||||
long double reference_fmaxl( long double x, long double y );
|
||||
long double reference_fminl( long double x, long double y );
|
||||
long double reference_hypotl( long double x, long double y );
|
||||
long double reference_lgammal( long double x);
|
||||
int reference_ilogbl( long double );
|
||||
long double reference_log2l( long double x );
|
||||
long double reference_log1pl( long double x );
|
||||
long double reference_logbl( long double x );
|
||||
long double reference_maxmagl( long double x, long double y );
|
||||
long double reference_minmagl( long double x, long double y );
|
||||
long double reference_nanl( cl_ulong x );
|
||||
long double reference_reciprocall( long double x );
|
||||
long double reference_remainderl( long double x, long double y );
|
||||
long double reference_rintl( long double x );
|
||||
long double reference_roundl( long double x );
|
||||
long double reference_truncl( long double x );
|
||||
long double reference_floorl( long double x );
|
||||
long double reference_fmodl( long double x, long double y );
|
||||
long double reference_frexpl( long double x, int *n );
|
||||
long double reference_ldexpl( long double x, int n );
|
||||
|
||||
long double reference_assignmentl( long double x );
|
||||
int reference_notl( long double x );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// Copyright (c) 2017 The Khronos Group Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
#ifndef REFERENCE_MATH_H
|
||||
#define REFERENCE_MATH_H
|
||||
|
||||
#if defined( __APPLE__ )
|
||||
#include <OpenCL/opencl.h>
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
#endif
|
||||
|
||||
// -- for testing float --
|
||||
double reference_sinh( double x );
|
||||
double reference_sqrt( double x );
|
||||
double reference_tanh( double x );
|
||||
double reference_acos( double );
|
||||
double reference_asin( double );
|
||||
double reference_atan( double );
|
||||
double reference_atan2( double, double );
|
||||
double reference_ceil( double );
|
||||
double reference_cosh( double );
|
||||
double reference_exp( double );
|
||||
double reference_fabs( double );
|
||||
double reference_acospi( double );
|
||||
double reference_asinpi( double );
|
||||
double reference_atanpi( double );
|
||||
double reference_atan2pi( double, double );
|
||||
double reference_cospi( double );
|
||||
double reference_divide( double, double );
|
||||
double reference_fract( double, double * );
|
||||
float reference_fma( float, float, float, int );
|
||||
double reference_mad( double, double, double );
|
||||
double reference_nextafter(double, double );
|
||||
double reference_recip( double );
|
||||
double reference_rootn( double, int );
|
||||
double reference_rsqrt( double );
|
||||
double reference_sincos( double, double * );
|
||||
double reference_sinpi( double );
|
||||
double reference_tanpi( double );
|
||||
double reference_pow(double x, double y);
|
||||
double reference_pown( double, int );
|
||||
double reference_powr( double, double );
|
||||
double reference_cos( double );
|
||||
double reference_sin( double );
|
||||
double reference_tan( double );
|
||||
double reference_log( double );
|
||||
double reference_log10( double );
|
||||
double reference_modf( double, double *n );
|
||||
|
||||
double reference_fdim( double, double );
|
||||
double reference_add( double, double );
|
||||
double reference_subtract( double, double );
|
||||
double reference_divide( double, double );
|
||||
double reference_multiply( double, double );
|
||||
double reference_remquo( double, double, int* );
|
||||
double reference_lgamma_r( double, int* );
|
||||
|
||||
int reference_isequal( double, double );
|
||||
int reference_isfinite( double );
|
||||
int reference_isgreater( double, double );
|
||||
int reference_isgreaterequal( double, double );
|
||||
int reference_isinf( double );
|
||||
int reference_isless( double, double );
|
||||
int reference_islessequal( double, double );
|
||||
int reference_islessgreater( double, double );
|
||||
int reference_isnan( double );
|
||||
int reference_isnormal( double );
|
||||
int reference_isnotequal( double, double );
|
||||
int reference_isordered( double, double );
|
||||
int reference_isunordered( double, double );
|
||||
int reference_signbit( float );
|
||||
|
||||
double reference_acosh( double x );
|
||||
double reference_asinh( double x );
|
||||
double reference_atanh( double x );
|
||||
double reference_cbrt(double x);
|
||||
float reference_copysign( float x, float y);
|
||||
double reference_copysignd( double x, double y);
|
||||
double reference_exp10( double );
|
||||
double reference_exp2( double x );
|
||||
double reference_expm1( double x );
|
||||
double reference_fmax( double x, double y );
|
||||
double reference_fmin( double x, double y );
|
||||
double reference_hypot( double x, double y );
|
||||
double reference_lgamma( double x);
|
||||
int reference_ilogb( double );
|
||||
double reference_log2( double x );
|
||||
double reference_log1p( double x );
|
||||
double reference_logb( double x );
|
||||
double reference_maxmag( double x, double y );
|
||||
double reference_minmag( double x, double y );
|
||||
double reference_nan( cl_uint x );
|
||||
double reference_reciprocal( double x );
|
||||
double reference_remainder( double x, double y );
|
||||
double reference_rint( double x );
|
||||
double reference_round( double x );
|
||||
double reference_trunc( double x );
|
||||
double reference_floor( double x );
|
||||
double reference_fmod( double x, double y );
|
||||
double reference_frexp( double x, int *n );
|
||||
double reference_ldexp( double x, int n );
|
||||
|
||||
double reference_assignment( double x );
|
||||
int reference_not( double x );
|
||||
|
||||
// -- for testing double --
|
||||
|
||||
long double reference_sinhl( long double x );
|
||||
long double reference_sqrtl( long double x );
|
||||
long double reference_tanhl( long double x );
|
||||
long double reference_acosl( long double );
|
||||
long double reference_asinl( long double );
|
||||
long double reference_atanl( long double );
|
||||
long double reference_atan2l( long double, long double );
|
||||
long double reference_ceill( long double );
|
||||
long double reference_coshl( long double );
|
||||
long double reference_expl( long double );
|
||||
long double reference_fabsl( long double );
|
||||
long double reference_acospil( long double );
|
||||
long double reference_asinpil( long double );
|
||||
long double reference_atanpil( long double );
|
||||
long double reference_atan2pil( long double, long double );
|
||||
long double reference_cospil( long double );
|
||||
long double reference_dividel( long double, long double );
|
||||
long double reference_fractl( long double, long double * );
|
||||
long double reference_fmal( long double, long double, long double );
|
||||
long double reference_madl( long double, long double, long double );
|
||||
long double reference_nextafterl(long double, long double );
|
||||
long double reference_recipl( long double );
|
||||
long double reference_rootnl( long double, int );
|
||||
long double reference_rsqrtl( long double );
|
||||
long double reference_sincosl( long double, long double * );
|
||||
long double reference_sinpil( long double );
|
||||
long double reference_tanpil( long double );
|
||||
long double reference_powl(long double x, long double y);
|
||||
long double reference_pownl( long double, int );
|
||||
long double reference_powrl( long double, long double );
|
||||
long double reference_cosl( long double );
|
||||
long double reference_sinl(long double );
|
||||
long double reference_tanl( long double );
|
||||
long double reference_logl( long double );
|
||||
long double reference_log10l( long double );
|
||||
long double reference_modfl( long double, long double *n );
|
||||
|
||||
|
||||
long double reference_fdiml( long double, long double );
|
||||
long double reference_addl( long double, long double );
|
||||
long double reference_subtractl( long double, long double );
|
||||
long double reference_dividel( long double, long double );
|
||||
long double reference_multiplyl( long double, long double );
|
||||
long double reference_remquol( long double, long double, int* );
|
||||
long double reference_lgamma_rl( long double, int* );
|
||||
|
||||
|
||||
int reference_isequall( long double, long double );
|
||||
int reference_isfinitel( long double );
|
||||
int reference_isgreaterl( long double, long double );
|
||||
int reference_isgreaterequall( long double, long double );
|
||||
int reference_isinfl( long double );
|
||||
int reference_islessl( long double, long double );
|
||||
int reference_islessequall( long double, long double );
|
||||
int reference_islessgreaterl( long double, long double );
|
||||
int reference_isnanl( long double );
|
||||
int reference_isnormall( long double );
|
||||
int reference_isnotequall( long double, long double );
|
||||
int reference_isorderedl( long double, long double );
|
||||
int reference_isunorderedl( long double, long double );
|
||||
int reference_signbitl( long double );
|
||||
|
||||
long double reference_acoshl( long double x );
|
||||
long double reference_asinhl( long double x );
|
||||
long double reference_atanhl( long double x );
|
||||
long double reference_cbrtl(long double x);
|
||||
long double reference_copysignl( long double x, long double y);
|
||||
long double reference_exp10l( long double );
|
||||
long double reference_exp2l( long double x );
|
||||
long double reference_expm1l( long double x );
|
||||
long double reference_fmaxl( long double x, long double y );
|
||||
long double reference_fminl( long double x, long double y );
|
||||
long double reference_hypotl( long double x, long double y );
|
||||
long double reference_lgammal( long double x);
|
||||
int reference_ilogbl( long double );
|
||||
long double reference_log2l( long double x );
|
||||
long double reference_log1pl( long double x );
|
||||
long double reference_logbl( long double x );
|
||||
long double reference_maxmagl( long double x, long double y );
|
||||
long double reference_minmagl( long double x, long double y );
|
||||
long double reference_nanl( cl_ulong x );
|
||||
long double reference_reciprocall( long double x );
|
||||
long double reference_remainderl( long double x, long double y );
|
||||
long double reference_rintl( long double x );
|
||||
long double reference_roundl( long double x );
|
||||
long double reference_truncl( long double x );
|
||||
long double reference_floorl( long double x );
|
||||
long double reference_fmodl( long double x, long double y );
|
||||
long double reference_frexpl( long double x, int *n );
|
||||
long double reference_ldexpl( long double x, int n );
|
||||
|
||||
long double reference_assignmentl( long double x );
|
||||
int reference_notl( long double x );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
220
test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
Normal file → Executable file
220
test_conformance/math_brute_force/run_math_brute_force_in_parallel.py
Normal file → Executable file
@@ -1,110 +1,110 @@
|
||||
#! /usr/bin/python
|
||||
|
||||
# // OpenCL Conformance Tests
|
||||
# //
|
||||
# // Copyright: (c) 2009-2011 by Apple Inc. All Rights Reserved.
|
||||
# //
|
||||
|
||||
import os, re, sys, subprocess, time
|
||||
|
||||
# A script to run the entierty of math_brute_force, but to run each separate job in parallel.
|
||||
|
||||
def DEBUG(text, level=1):
|
||||
if (DEBUG_LEVEL >= level): print(text)
|
||||
|
||||
def write_info(text):
|
||||
print text,
|
||||
if (ATF):
|
||||
ATF_log.write("<Info>"+text+"</Info>\n")
|
||||
ATF_log.flush()
|
||||
|
||||
def write_error(text):
|
||||
print "ERROR:" + text,
|
||||
if (ATF):
|
||||
ATF_log.write("<Error>"+text+"</Error>\n")
|
||||
ATF_log.flush()
|
||||
|
||||
def start_atf():
|
||||
global ATF, ATF_log
|
||||
DEBUG("start_atf()")
|
||||
if (os.environ.get("ATF_RESULTSDIRECTORY") == None):
|
||||
ATF = False
|
||||
DEBUG("\tATF not defined",0)
|
||||
return
|
||||
ATF = True
|
||||
ATF_output_file_name = "TestLog.xml"
|
||||
output_path = os.environ.get("ATF_RESULTSDIRECTORY")
|
||||
try:
|
||||
ATF_log = open(output_path+ATF_output_file_name, "w")
|
||||
except IOError:
|
||||
DEBUG("Could not open ATF file " + ATF_output_file_name, 0)
|
||||
ATF = False
|
||||
return
|
||||
DEBUG("ATF Enabled")
|
||||
# Generate the XML header
|
||||
ATF_log.write("<Log>\n")
|
||||
ATF_log.write("<TestStart/>\n")
|
||||
DEBUG("Done start_atf()")
|
||||
|
||||
def stop_atf():
|
||||
DEBUG("stop_atf()")
|
||||
if (ATF):
|
||||
ATF.write("<TestFinish/>\n")
|
||||
ATF.write("</Log>\n")
|
||||
ATF.close()
|
||||
|
||||
def get_time() :
|
||||
return time.strftime("%A %H:%M:%S", time.localtime())
|
||||
|
||||
def start_test(id):
|
||||
DEBUG("start_test("+str(id) + ")")
|
||||
command = test + " " + str(id) + " " + str(id)
|
||||
try:
|
||||
write_info(get_time() + " Executing " + command + "...")
|
||||
p = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
|
||||
except OSError:
|
||||
write_error("Failed to execute " + command)
|
||||
return
|
||||
running_tests[id] = p
|
||||
DEBUG("start_test("+str(id) + ") added: " + str(running_tests[id]) + \
|
||||
", now " + str(len(running_tests.keys())) + " tests running")
|
||||
|
||||
|
||||
|
||||
|
||||
DEBUG_LEVEL = 2
|
||||
test = "./bruteforce -w"
|
||||
instances = 4
|
||||
max_test_ID = 12
|
||||
running_tests = {}
|
||||
ATF_log = None
|
||||
ATF = False
|
||||
|
||||
# Start the ATF log
|
||||
start_atf()
|
||||
next_test = 0
|
||||
next_test_to_finish = 0
|
||||
|
||||
while ( (next_test <= max_test_ID) | (next_test_to_finish <= max_test_ID)):
|
||||
# If we want to run more tests, start them
|
||||
while ((len(running_tests.keys()) < instances) & (next_test <= max_test_ID)):
|
||||
start_test(next_test)
|
||||
next_test = next_test + 1
|
||||
time.sleep(1)
|
||||
# Check if the oldest test has finished
|
||||
p = running_tests[next_test_to_finish]
|
||||
if (p.poll() != None):
|
||||
write_info(get_time() + " Test " + str(next_test_to_finish) +" finished.")
|
||||
del running_tests[next_test_to_finish]
|
||||
next_test_to_finish = next_test_to_finish + 1
|
||||
# Write the results from the test out
|
||||
for line in p.stdout.readlines():
|
||||
write_info(line)
|
||||
for line in p.stderr.readlines():
|
||||
write_error(line)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# Stop the ATF log
|
||||
stop_atf()
|
||||
#! /usr/bin/python
|
||||
|
||||
# // OpenCL Conformance Tests
|
||||
# //
|
||||
# // Copyright: (c) 2009-2011 by Apple Inc. All Rights Reserved.
|
||||
# //
|
||||
|
||||
import os, re, sys, subprocess, time
|
||||
|
||||
# A script to run the entierty of math_brute_force, but to run each separate job in parallel.
|
||||
|
||||
def DEBUG(text, level=1):
|
||||
if (DEBUG_LEVEL >= level): print(text)
|
||||
|
||||
def write_info(text):
|
||||
print text,
|
||||
if (ATF):
|
||||
ATF_log.write("<Info>"+text+"</Info>\n")
|
||||
ATF_log.flush()
|
||||
|
||||
def write_error(text):
|
||||
print "ERROR:" + text,
|
||||
if (ATF):
|
||||
ATF_log.write("<Error>"+text+"</Error>\n")
|
||||
ATF_log.flush()
|
||||
|
||||
def start_atf():
|
||||
global ATF, ATF_log
|
||||
DEBUG("start_atf()")
|
||||
if (os.environ.get("ATF_RESULTSDIRECTORY") == None):
|
||||
ATF = False
|
||||
DEBUG("\tATF not defined",0)
|
||||
return
|
||||
ATF = True
|
||||
ATF_output_file_name = "TestLog.xml"
|
||||
output_path = os.environ.get("ATF_RESULTSDIRECTORY")
|
||||
try:
|
||||
ATF_log = open(output_path+ATF_output_file_name, "w")
|
||||
except IOError:
|
||||
DEBUG("Could not open ATF file " + ATF_output_file_name, 0)
|
||||
ATF = False
|
||||
return
|
||||
DEBUG("ATF Enabled")
|
||||
# Generate the XML header
|
||||
ATF_log.write("<Log>\n")
|
||||
ATF_log.write("<TestStart/>\n")
|
||||
DEBUG("Done start_atf()")
|
||||
|
||||
def stop_atf():
|
||||
DEBUG("stop_atf()")
|
||||
if (ATF):
|
||||
ATF.write("<TestFinish/>\n")
|
||||
ATF.write("</Log>\n")
|
||||
ATF.close()
|
||||
|
||||
def get_time() :
|
||||
return time.strftime("%A %H:%M:%S", time.localtime())
|
||||
|
||||
def start_test(id):
|
||||
DEBUG("start_test("+str(id) + ")")
|
||||
command = test + " " + str(id) + " " + str(id)
|
||||
try:
|
||||
write_info(get_time() + " Executing " + command + "...")
|
||||
p = subprocess.Popen(command, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
|
||||
except OSError:
|
||||
write_error("Failed to execute " + command)
|
||||
return
|
||||
running_tests[id] = p
|
||||
DEBUG("start_test("+str(id) + ") added: " + str(running_tests[id]) + \
|
||||
", now " + str(len(running_tests.keys())) + " tests running")
|
||||
|
||||
|
||||
|
||||
|
||||
DEBUG_LEVEL = 2
|
||||
test = "./bruteforce -w"
|
||||
instances = 4
|
||||
max_test_ID = 12
|
||||
running_tests = {}
|
||||
ATF_log = None
|
||||
ATF = False
|
||||
|
||||
# Start the ATF log
|
||||
start_atf()
|
||||
next_test = 0
|
||||
next_test_to_finish = 0
|
||||
|
||||
while ( (next_test <= max_test_ID) | (next_test_to_finish <= max_test_ID)):
|
||||
# If we want to run more tests, start them
|
||||
while ((len(running_tests.keys()) < instances) & (next_test <= max_test_ID)):
|
||||
start_test(next_test)
|
||||
next_test = next_test + 1
|
||||
time.sleep(1)
|
||||
# Check if the oldest test has finished
|
||||
p = running_tests[next_test_to_finish]
|
||||
if (p.poll() != None):
|
||||
write_info(get_time() + " Test " + str(next_test_to_finish) +" finished.")
|
||||
del running_tests[next_test_to_finish]
|
||||
next_test_to_finish = next_test_to_finish + 1
|
||||
# Write the results from the test out
|
||||
for line in p.stdout.readlines():
|
||||
write_info(line)
|
||||
for line in p.stderr.readlines():
|
||||
write_error(line)
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
# Stop the ATF log
|
||||
stop_atf()
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user