//
// Copyright (c) 2017 The Khronos Group Inc.
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP
#define TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP

#include "../common.hpp"
#include "../funcs_test_utils.hpp"

#include <type_traits>

// float4 cross(float4 p0, float4 p1)
struct geometric_func_cross : public binary_func<cl_float4, cl_float4, cl_float4>
{
    geometric_func_cross(cl_device_id device)
    {
        // On an embedded device w/ round-to-zero, 3 ulps is the worst-case tolerance for cross product
        this->m_delta = 3.0f * CL_FLT_EPSILON;
        // RTZ devices accrue approximately double the amount of error per operation.  Allow for that.
        if(get_default_rounding_mode(device) == CL_FP_ROUND_TO_ZERO)
        {
            this->m_delta *= 2.0f;
        }
    }

    std::string str()
    {
        return "cross";
    }

    std::string headers()
    {
        return "#include <opencl_geometric>\n";
    }

    cl_float4 operator()(const cl_float4& p0, const cl_float4& p1)
    {
        cl_float4 r;
        r.s[0] = (p0.s[1] * p1.s[2]) - (p0.s[2] * p1.s[1]);
        r.s[1] = (p0.s[2] * p1.s[0]) - (p0.s[0] * p1.s[2]);
        r.s[2] = (p0.s[0] * p1.s[1]) - (p0.s[1] * p1.s[0]);
        r.s[3] = 0.0f;
        return r;
    }

    cl_float4 max1()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 max2()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 min1()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    cl_float4 min2()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    bool use_ulp()
    {
        return false;
    }

    cl_double4 delta(const cl_float4& p0, const cl_float4& p1, const cl_float4& expected)
    {
        (void) p0; (void) p1;
        auto e = detail::make_value<cl_double4>(m_delta);
        return detail::multiply<cl_double4>(e, expected);
    }

private:
    cl_double m_delta;
};

// float dot(float4 p0, float4 p1);
struct geometric_func_dot : public binary_func<cl_float4, cl_float4, cl_float>
{

    std::string str()
    {
        return "dot";
    }

    std::string headers()
    {
        return "#include <opencl_geometric>\n";
    }

    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
    {
        cl_float r;
        r = p0.s[0] * p1.s[0];
        r += p0.s[1] * p1.s[1];
        r += p0.s[2] * p1.s[2];
        r += p0.s[3] * p1.s[3];
        return r;
    }

    cl_float4 max1()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 max2()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 min1()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    cl_float4 min2()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    bool use_ulp()
    {
        return false;
    }

    cl_double delta(const cl_float4& p0, const cl_float4& p1, cl_float expected)
    {
        (void) p0; (void) p1;
        return expected * ((4.0f + (4.0f - 1.0f)) * CL_FLT_EPSILON);
    }
};

// float distance(float4 p0, float4 p1);
struct geometric_func_distance : public binary_func<cl_float4, cl_float4, cl_float>
{

    std::string str()
    {
        return "distance";
    }

    std::string headers()
    {
        return "#include <opencl_geometric>\n";
    }

    cl_float operator()(const cl_float4& p0, const cl_float4& p1)
    {
        cl_double r = 0.0f;
        cl_double t;
        for(size_t i = 0; i < 4; i++)
        {
            t = static_cast<cl_double>(p0.s[i]) - static_cast<cl_double>(p1.s[i]);
            r += t * t;
        }
        return std::sqrt(r);
    }

    cl_float4 max1()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 max2()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 min1()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    cl_float4 min2()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    float ulp()
    {
        return
            3.0f + // error in sqrt
            (1.5f * 4.0f) + // cumulative error for multiplications
            (0.5f * 3.0f);  // cumulative error for additions
    }
};

// float length(float4 p);
struct geometric_func_length : public unary_func<cl_float4,cl_float>
{

    std::string str()
    {
        return "length";
    }

    std::string headers()
    {
        return "#include <opencl_geometric>\n";
    }

    cl_float operator()(const cl_float4& p)
    {
        cl_double r = 0.0f;
        for(size_t i = 0; i < 4; i++)
        {
            r += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
        }
        return std::sqrt(r);
    }

    cl_float4 max1()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 min1()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    float ulp()
    {
        return
            3.0f + // error in sqrt
            0.5f * // effect on e of taking sqrt( x + e )
                ((0.5f * 4.0f) + // cumulative error for multiplications
                (0.5f * 3.0f));  // cumulative error for additions
    }
};

// float4 normalize(float4 p);
struct geometric_func_normalize : public unary_func<cl_float4,cl_float4>
{
    std::string str()
    {
        return "normalize";
    }

    std::string headers()
    {
        return "#include <opencl_geometric>\n";
    }

    cl_float4 operator()(const cl_float4& p)
    {
        cl_double t = 0.0f;
        cl_float4 r;

        // normalize( v ) returns a vector full of NaNs if any element is a NaN.
        for(size_t i = 0; i < 4; i++)
        {
            if((std::isnan)(p.s[i]))
            {
                for(size_t j = 0; j < 4; j++)
                {
                    r.s[j] = p.s[i];
                }
                return r;
            }
        }

        // normalize( v ) for which any element in v is infinite shall proceed as
        // if the elements in v were replaced as follows:
        // for( i = 0; i < sizeof(v) / sizeof(v[0] ); i++ )
        //     v[i] = isinf(v[i]) ? copysign(1.0, v[i]) : 0.0 * v [i];
        for(size_t i = 0; i < 4; i++)
        {
            if((std::isinf)(p.s[i]))
            {
                for(size_t j = 0; j < 4; j++)
                {
                    r.s[j] = (std::isinf)(p.s[j]) ? (std::copysign)(1.0, p.s[j]) : 0.0 * p.s[j];
                }
                r = (*this)(r);
                return r;
            }
        }

        for(size_t i = 0; i < 4; i++)
        {
            t += static_cast<cl_double>(p.s[i]) * static_cast<cl_double>(p.s[i]);
        }

        // normalize( v ) returns v if all elements of v are zero.
        if(t == 0.0f)
        {
            for(size_t i = 0; i < 4; i++)
            {
                r.s[i] = 0.0f;
            }
            return r;
        }

        t = std::sqrt(t);
        for(size_t i = 0; i < 4; i++)
        {
            r.s[i] = static_cast<cl_double>(p.s[i]) / t;
        }

        return r;
    }

    cl_float4 max1()
    {
        return detail::def_limit<cl_float4>(1000.0f);
    }

    cl_float4 min1()
    {
        return detail::def_limit<cl_float4>(-1000.0f);
    }

    std::vector<cl_float4> in_special_cases()
    {
        return {
            {0.0f, 0.0f, 0.0f, 0.0f},
            {std::numeric_limits<float>::infinity(), 0.0f, 0.0f, 0.0f},
            {
                std::numeric_limits<float>::infinity(),
                std::numeric_limits<float>::infinity(),
                std::numeric_limits<float>::infinity(),
                std::numeric_limits<float>::infinity()
            },
            {
                std::numeric_limits<float>::infinity(),
                1.0f,
                0.0f,
                std::numeric_limits<float>::quiet_NaN()
            },
            {-1.0f, -1.0f, 0.0f,-300.0f}
        };
    }

    float ulp()
    {
        return
            2.5f + // error in rsqrt + error in multiply
            (0.5f * 4.0f) + // cumulative error for multiplications
            (0.5f * 3.0f);  // cumulative error for additions
    }
};

AUTO_TEST_CASE(test_geometric_funcs)
(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
    int error = CL_SUCCESS;
    int last_error = CL_SUCCESS;

    // float4 cross(float4 p0, float4 p1)
    TEST_BINARY_FUNC_MACRO((geometric_func_cross(device)))

    // float dot(float4 p0, float4 p1)
    TEST_BINARY_FUNC_MACRO((geometric_func_dot()))

    // float distance(float4 p0, float4 p1)
    TEST_BINARY_FUNC_MACRO((geometric_func_distance()))

    // float length(float4 p)
    TEST_UNARY_FUNC_MACRO((geometric_func_length()))

    // float4 normalize(float4 p)
    TEST_UNARY_FUNC_MACRO((geometric_func_normalize()))

    if(error != CL_SUCCESS)
    {
        return -1;
    }
    return error;
}

#endif // TEST_CONFORMANCE_CLCPP_GEOMETRIC_FUNCS_GEOMETRIC_FUNCS_HPP