Mixed Scalar/Vector Math¶

normalise()/length()/distance()/reflect() functions usually contain a lot of function calls inside them such as dot(). There is an advantage to knowing how these functions are implemented.

For example, if it is known that two operations have a shared sub-expression, then it is possible to reduce the cycle count. However, this only happens if the input order allows it.

fragColor.x = length(t-v); // seven cycles
fragColor.y = distance(v, t);
{sopmad, sopmad, sopmad, sopmad}
{sop, sop, sopmov}
{sopmad, sopmad, sopmad, sopmad}
{sop, sop, sopmov}
{sop, sop}
{frsq}
{frcp}
-->
fragColor.x = length(t-v); // nine cycles
fragColor.y = distance(t, v);
{mov}
{wdf}
{sopmad, sopmad, sopmad, sopmad}
{sop, sop, sopmov}
{sop, sop, sopmov}
{sop, sop}
{frsq}
{frcp}
{mov}

// Instructions on Volcanic:
// Shared sub-expression case, v and t obtain thei values from a common source:
fragColor.x = length(t - v);
fragColor.y = distance(v, t);
{mul}
{add}
{fma}
{add}
{fma}
{rsq}
{add}
{add}
{add}
{rcp}
{mul}
{fma}
{fma}
{rsq}
{rcp}
// No shared sub-expression case:
fragColor.x = length(t - v);
fragColor.y = distance(v, t);
{add}
{mul}
{add}
{fma}
{add}
{fma}
{rsq}
{add}
{add}
{add}
{rcp}
{mul}
{fma}
{fma}
{rsq}
{rcp}

Manually expanding these complex instructions can sometimes help the compiler optimise the code:

fragColor.xyz = normalize(t.xyz); // six cycles
{fmul, mov}
{fmad, mov}
{fmad, mov}
{frsq}
{fmul, fmul, mov, mov}
{fmul, mov}
-->
fragColor.xyz = inversesqrt( dot(t.xyz, t.xyz) ) * t.xyz; // five cycles
{sop, sop, sopmov}
{sop, sop}
{frsq}
{sop, sop}
{sop, sop}

// Instructions on Volcanic:
fragColor.xyz = normalize(t.xyz);
{mul}
{fma}
{fma}
{rsq}
{mul}
{mul}
{mul}
-->
fragColor.xyz = inversesqrt( dot(t.xyz, t.xyz) ) * t.xyz; // Same cycle number
{mul}
{fma}
{fma}
{rsq}
{mul}
{mul}
{mul}

Also, in expanded form it is possible to take advantage of grouping vector and scalar instructions together:

fragColor.xyz = 50.0 * normalize(t.xyz); // seven cycles
{fmul, mov}
{fmad, mov}
{fmad, mov}
{frsq}
{fmul, fmul, mov, mov}
{fmul, fmul, mov, mov}
{sop, sop}
-->
fragColor.xyz = (50.0 * inversesqrt( dot(t.xyz, t.xyz) )) * t.xyz; // six cycles
{sop, sop, sopmov}
{sop, sop}
{frsq}
{sop, sop, sopmov}
{sop, sop}
{sop, sop}

// Instuctions on Volcanic:
fragColor.xyz = 50.0 * normalize(t.xyz);
{mul}
{fma}
{fma}
{rsq}
{mul}
{mul}
{mul}
{mul}
{mul}
{mul}
-->
fragColor.xyz = (50.0 * inversesqrt( dot(t.xyz, t.xyz) )) * t.xyz;
{mul}
{fma}
{fma}
{rsq}
{mul}
{mul}
{mul}
{mul}

The following list shows what the complex instructions can be expanded to.

cross() can be expanded to:

vec3 cross( vec3 a, vec3 b )
{
    return vec3( a.y * b.z - b.y * a.z,
                 a.z * b.x - b.z * a.x,
                 a.x * b.y - b.y * a.y );
}

distance() can be expanded to:

float distance( vec3 a, vec3 b )
{
    vec3 tmp = a - b;
    return sqrt( dot(tmp, tmp) );
}

dot() can be expanded to:

float dot( vec3 a, vec3 b )
{
    return a.x * b.x + a.y * b.y + a.z * b.z;
}

faceforward() can be expanded to:

vec3 faceforward( vec3 n, vec3 I, vec3 Nref )
{
    if( dot( Nref, I ) < 0 )
    {
      return n;
    }
    else
    {
      return -n:
    }
}

length() can be expanded to:

float length( vec3 v )
{
    return sqrt( dot(v, v) );
}

normalize() can be expanded to:

vec3 normalize( vec3 v )
{
    return v / sqrt( dot(v, v) );
}

reflect() can be expanded to:

vec3 reflect( vec3 N, vec3 I )
{
    return I - 2.0 * dot(N, I) * N;
}

refract() can be expanded to:

vec3 refract( vec3 n, vec3 I, float eta )
{
    float k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I));
    if (k < 0.0)
        return 0.0;
    else
        return eta * I - (eta * dot(N, I) + sqrt(k)) * N;
}