MathUtils.h#

Parent directory (math)

Includes required GLM library components and defines the rest of the information necessary for PowerVR Math needs.

Includes#

  • PVRCore/glm.h

  • PVRCore/types/Types.h

  • cmath

  • cstdint

Included By#

Namespaces#

Functions#

Source Code#

#pragma once
#include <PVRCore/types/Types.h>
#include <cmath>
#include <cstdint>
#include "PVRCore/glm.h"

namespace pvr {
namespace math {

template<typename T>
inline T gcd(T lhs, T rhs)
{
    T tmprhs;
    while (true)
    {
        if (rhs == 0) { return lhs; }
        tmprhs = rhs;
        rhs = lhs % rhs;
        lhs = tmprhs;
    }
}

template<typename T>
inline T lcm(T lhs, T rhs)
{
    return (lhs / gcd(lhs, rhs)) * rhs;
}

template<typename T>
inline T lcm_with_max(T lhs, T rhs)
{
    T strict = (lhs / gcd(lhs, rhs)) * rhs;
    if (strict == 0) { strict = std::max(lhs, rhs); }
    return strict;
}

inline int32_t makePowerOfTwoHigh(int32_t iVal)
{
    int iTmp = 1;
    do
    {
        iTmp <<= 1;
    } while (iTmp < iVal);
    return iTmp;
}

inline int32_t makePowerOfTwoLow(int32_t iVal)
{
    int iTmp = 1;
    do
    {
        iTmp <<= 1;
    } while (iTmp < iVal);
    return iTmp;
    iTmp >>= 1;
}

inline int32_t ndcToPixel(float ndc, int32_t screenSize) { return static_cast<int32_t>(ndc * screenSize * .5f + screenSize * .5f); }

inline float pixelToNdc(int32_t pixelCoord, int32_t screenSize) { return (2.f / screenSize) * (pixelCoord - screenSize * .5f); }

inline float quadraticEaseOut(float start, float end, float factor)
{
    float fTInv = 1.0f - factor;
    return ((start - end) * fTInv * fTInv) + end;
}

inline float quadraticEaseIn(float start, float end, float factor) { return ((end - start) * factor * factor) + start; }

template<typename genType>
bool intersectLinePlane(genType const& origin, genType const& dir, genType const& planeOrigin, genType const& planeNormal, typename genType::value_type& intersectionDistance,
    typename genType::value_type epsilon = std::numeric_limits<typename genType::value_type>::epsilon())
{
    using namespace glm;
    typename genType::value_type d = dot(dir, planeNormal);

    if (glm::abs(d) > epsilon)
    {
        intersectionDistance = dot(planeOrigin - origin, planeNormal) / d;
        return true;
    }
    return false;
}

template<typename Vec2>
Vec2 getPerpendicular(Vec2 const& aVector)
{
    return Vec2(aVector.y, -aVector.x);
}

inline glm::mat4 perspective(Api api, float fovy, float aspect, float near1, float far1, float rotate = .0f)
{
    glm::mat4 mat = glm::perspective(fovy, aspect, near1, far1);
    if (api == Api::Vulkan)
    {
        mat[1] *= -1.f; // negate the y axis's y component, because vulkan coordinate system is +y down.
        // We would normally negate the entire row, but the rest of the components are zero.
    }
    return (rotate == 0.f ? mat : glm::rotate(rotate, glm::vec3(0.0f, 0.0f, 1.0f)) * mat);
}

inline glm::mat4 perspectiveFov(Api api, float fovy, float width, float height, float near1, float far1, float rotate = .0f)
{
    return perspective(api, fovy, width / height, near1, far1, rotate);
}

inline glm::mat4 ortho(Api api, float left, float right, float bottom, float top, float rotate = 0.0f)
{
    if (api == pvr::Api::Vulkan)
    {
        std::swap(bottom, top); // Vulkan origin y is top
    }
    glm::mat4 proj = glm::ortho<float>(left, right, bottom, top);
    return (rotate == 0.0f ? proj : glm::rotate(rotate, glm::vec3(0.0f, 0.0f, 1.0f)) * proj);
}
} // namespace math
} // namespace pvr

namespace {
inline void addCoefficients(const uint64_t pascalSum, const size_t halfCoefficientsMinusOne, const size_t numCoefficients, const std::vector<uint64_t>& coefficients,
    std::vector<double>& weights, std::vector<double>& offsets)
{
    // Handle cases where the coefficients vector contains coefficients which are to be truncated. i.e. we get rid of the outer set of coefficients
    size_t unneededCoefficients = (coefficients.size() - static_cast<size_t>(numCoefficients)) / 2;
    size_t i = unneededCoefficients;
    for (; i < (size_t)halfCoefficientsMinusOne; i++)
    {
        weights.emplace_back(static_cast<double>(coefficients[i]) / pascalSum);
        double offset = static_cast<double>(i) - static_cast<double>(halfCoefficientsMinusOne);
        offsets.emplace_back(offset);
    }

    weights.emplace_back(static_cast<double>(coefficients[i]) / pascalSum);
    offsets.emplace_back(static_cast<double>(0));

    for (i = static_cast<size_t>(halfCoefficientsMinusOne) + 1; i < static_cast<size_t>(numCoefficients) + unneededCoefficients; i++)
    {
        weights.emplace_back(static_cast<double>(coefficients[i]) / pascalSum);
        offsets.emplace_back(static_cast<double>(i - halfCoefficientsMinusOne));
    }
}
} // namespace

namespace pvr {
namespace math {

inline uint64_t generatePascalTriangleRow(size_t row, std::vector<uint64_t>& pascalCoefficients)
{
    // Each entry of any given row of the Pascal Triangle is constructed by adding the number above and to the left with the number above and to the right.
    // Entries which fall outside of the Pascal Triangle are treated as having a 0 value. The first row consists of a single entry with a value of 1.
    // The following shows the first 4 rows of the Pascal Triangle
    //        1     row 0 ... sum = 1
    //       1 1    row 1 ... sum = 2
    //      1 2 1   row 2 ... sum = 4
    //     1 3 3 1  row 3 ... sum = 8
    pascalCoefficients.emplace_back(1u);
    uint64_t sum = pascalCoefficients.back();
    for (size_t i = 0; i < row; i++)
    {
        uint64_t val = pascalCoefficients[i] * (row - i) / (i + 1u);
        pascalCoefficients.emplace_back(val);
        sum += pascalCoefficients.back();
    }

    return sum;
}

inline void adjustOffsetsAndWeightsForLinearSampling(const size_t halfCoefficientsMinusOne, std::vector<double>& weights, std::vector<double>& offsets)
{
    std::vector<double> adjustedWeights;
    std::vector<double> adjustedOffsets;

    // if kernel size minus 1 is divisible by 2 then we have a central sample with offset 0
    if (halfCoefficientsMinusOne % 2u == 0u)
    {
        size_t i = 0u;
        // Ensure not zero
        for (; (halfCoefficientsMinusOne > 0) && (i < halfCoefficientsMinusOne - 1u); i += 2u)
        {
            adjustedWeights.emplace_back(weights[i] + weights[i + 1]);
            double adjustedOffset = ((offsets[i] * weights[i]) + (offsets[i + 1u] * weights[i + 1u])) / adjustedWeights.back();
            adjustedOffsets.emplace_back(adjustedOffset);
        }

        adjustedWeights.emplace_back(weights[halfCoefficientsMinusOne]);
        adjustedOffsets.emplace_back(0.0f);

        for (i = halfCoefficientsMinusOne + 1u; i < static_cast<uint64_t>(offsets.size()); i += 2u)
        {
            adjustedWeights.emplace_back(weights[i] + weights[i + 1u]);
            double adjustedOffset = ((offsets[i] * weights[i]) + (offsets[i + 1u] * weights[i + 1u])) / adjustedWeights.back();
            adjustedOffsets.emplace_back(adjustedOffset);
        }
    }
    else // otherwise we have to duplicate the central sample *but* this means we can handle 3x3 using 2x2 samples
    {
        size_t i = 0;
        for (; i < halfCoefficientsMinusOne; i += 2u)
        {
            double adjustedOffset = 0.0;
            if (i == halfCoefficientsMinusOne - 1u)
            {
                adjustedWeights.emplace_back(weights[i] + weights[i + 1u] * 0.5);
                adjustedOffset = ((offsets[i] * weights[i]) + (offsets[i + 1u] * weights[i + 1u] * 0.5)) / adjustedWeights.back();
            }
            else
            {
                adjustedWeights.emplace_back(weights[i] + weights[i + 1u]);
                adjustedOffset = ((offsets[i] * weights[i]) + (offsets[i + 1u] * weights[i + 1u])) / adjustedWeights.back();
            }
            adjustedOffsets.emplace_back(adjustedOffset);
        }

        for (i = halfCoefficientsMinusOne; i < offsets.size(); i += 2u)
        {
            double adjustedOffset = 0.0;
            if (i == halfCoefficientsMinusOne)
            {
                adjustedWeights.emplace_back(weights[i] * 0.5 + weights[i + 1u]);
                adjustedOffset = ((offsets[i] * weights[i] * 0.5) + (offsets[i + 1u] * weights[i + 1u])) / adjustedWeights.back();
            }
            else
            {
                adjustedWeights.emplace_back(weights[i] + weights[i + 1u]);
                adjustedOffset = ((offsets[i] * weights[i]) + (offsets[i + 1u] * weights[i + 1u])) / adjustedWeights.back();
            }
            adjustedOffsets.emplace_back(adjustedOffset);
        }
    }

    offsets.clear();
    weights.clear();

    for (size_t i = 0; i < adjustedWeights.size(); i++)
    {
        offsets.emplace_back(adjustedOffsets[i]);
        weights.emplace_back(adjustedWeights[i]);
    }
}

inline void generateGaussianKernelWeightsAndOffsets(uint32_t kernelSize, bool truncateCoefficients, bool useLinearSamplerOptimization, std::vector<double>& weights,
    std::vector<double>& offsets, float minimumAcceptableCoefficient = 0.0001f)
{
    // Odd kernel sizes are a requirement
    assert((kernelSize % 2u) == 1u);

    // The starting row of the Pascal Triangle being used
    size_t pascalRow = kernelSize - 1u;

    // The number of coefficients minus 1 halved
    // This variable is used to get the index of the coefficient used for an offset of 0
    // (numCoefficients - 1) / 2
    size_t halfCoefficientsMinusOne = pascalRow / 2u;

    // stores the set of pascal coefficients
    std::vector<uint64_t> pascalCoefficients;

    // the number of coefficients skipped due to minimum coefficient checks
    size_t numCoefficientsSkipped = 0u;

    // The simple case
    if (!truncateCoefficients)
    {
        // Generate the Pascal Triangle row and calculate the sum of the row
        uint64_t pascalSum = generatePascalTriangleRow(pascalRow, pascalCoefficients);
        // Store the Pascal Triangle coefficients for the given row
        addCoefficients(pascalSum, halfCoefficientsMinusOne, pascalCoefficients.size(), pascalCoefficients, weights, offsets);
    }
    else
    // Ignoring negligible coefficients - we'll now attempt to find a row which provides enough coefficients for the given kernel size whilst not falling below the
    // given minimal coefficient value.
    {
        size_t currentRow = pascalRow;
        bool foundSuitableRow = false;

        // only accept rows where we have kernelSize coefficients larger than the min coefficient
        while (!foundSuitableRow)
        {
            // clear the offsets and weights
            offsets.clear();
            weights.clear();

            // clear the set of pascal triangle coefficients
            pascalCoefficients.clear();

            // get the pascal coefficients for the current row of the triangle
            uint64_t pascalSum = generatePascalTriangleRow(currentRow, pascalCoefficients);

            halfCoefficientsMinusOne = currentRow / 2u;

            // check how many of the coefficients are negligible in size and therefore should be ignored
            numCoefficientsSkipped = 0u;
            for (size_t i = halfCoefficientsMinusOne; i < pascalCoefficients.size(); i++)
            {
                double currentWeight = static_cast<double>(pascalCoefficients[i]) / pascalSum;
                if (currentWeight < minimumAcceptableCoefficient) { numCoefficientsSkipped++; }
            }

            // if there aren't enough coefficients left to fulfill the requirements for the kernel size then continue to the next row
            if ((halfCoefficientsMinusOne + 1u) - numCoefficientsSkipped < (pascalRow / 2u + 1u))
            {
                currentRow += 2u;
                foundSuitableRow = false;
                continue;
            }
            // if negligible coefficients are to be removed we must also update the overall sums used to give them weighting
            // otherwise repeated blurring will result in darkening of the image
            uint64_t adjustedPascalSum = pascalSum;
            // the extra coefficients which aren't needed which do match the conditions for non-negligibility but would result in us taking extra coefficients
            size_t unrequiredCoefficients = ((pascalCoefficients.size() - kernelSize - (numCoefficientsSkipped * 2u)) / 2u);
            for (size_t i = 0; i < numCoefficientsSkipped + unrequiredCoefficients; i++) { adjustedPascalSum -= 2u * pascalCoefficients[pascalCoefficients.size() - 1u - i]; }

            // add the non-negligible coefficients to the weights and offsets buffers
            size_t numCoefficients = pascalCoefficients.size() - (2u * (numCoefficientsSkipped + unrequiredCoefficients));
            addCoefficients(adjustedPascalSum, halfCoefficientsMinusOne, numCoefficients, pascalCoefficients, weights, offsets);
            halfCoefficientsMinusOne = static_cast<uint64_t>((offsets.size() - 1u) / 2u);
            foundSuitableRow = true;
        }
    }

    // If using the Linear Sampling optimisation then adjust the Gaussian weights and offsets accordingly
    if (useLinearSamplerOptimization) { adjustOffsetsAndWeightsForLinearSampling(halfCoefficientsMinusOne, weights, offsets); }
}

inline glm::mat4 constructSRT(const glm::vec3& scale, const glm::quat& rotate, const glm::vec3& translation)
{
    return glm::translate(translation) * glm::toMat4(rotate) * glm::scale(scale);
}
} // namespace math
} // namespace pvr