//Normalize one vector
void Normalize1( vector float v[1] )
{

vector float v1, lengthSquared, oneOverLength;
vector float zero = (vector float) (0);
vector float one = (vector float) (1);
vector unsigned char minusOne = vec_splat_u8(-1);
vector float wMask = vec_sld( zero, (vector float) minusOne, 4 );

//load in our four vectors and mask out
v1 = vec_andc( v[0], wMask );

//square each term
v1 = vec_madd( v1, v1, zero );

//Now add across each vector so that each vector contains the sum of squares in each slot
v1 = vec_add( v1, vec_sld( v1, v1, 8) );
v1 = vec_add( v1, vec_sld( v1, v1, 4) );

//Find 1 / length for each
oneOverLength = ReciprocalSquareRoot( v1 );

//multiply this back into the starting vectors
//The W's get clobbered here, but we will patch them up later
v1 = vec_madd( v[0], vec_splat( oneOverLength, 0 ), zero );

//Mask in the original W values
v[0] = vec_sel( v1, v[0], (vector bool int) wMask );

}

//The scalar version.
//Note that this could be optimized more
void ScalarNormalize( float data[4] )
{

register float x, y, z, lengthSquared, oneOverLength;

x = data[0];
y = data[1];
z = data[2];

lengthSquared = x * x + y * y + z * z;
oneOverLength = 1.0 / sqrt( lengthSquared );

data[0] = x * oneOverLength;
data[1] = y * oneOverLength;
data[2] = z * oneOverLength;

}