What follows are a pair of code example for finding an series of objects' speeds along the vector between them and the origin given the objects' 3D velocities and positions. It is written to use the AltiVec unit. This example does not handle arrays that are not 16 byte aligned and object counts that are not an even multiple of eight.

void CalcSpeed( float *velocities, float *distances, float *results, int count )
{

register float vX1, vY1, vZ1; //velocity 1
register float vX2, vY2, vZ2;
//velocity 2
register float vX3, vY3, vZ3;
//velocity 3
register float dX1, dY1, dZ1;
//distance 1
register float dX2, dY2, dZ2;
//distance 2
register float dX3, dY3, dZ3;
//distance 3
register float DD1, DD2, DD3;
//D dot D, 1 through 3
register float DV1, DV2, DV3;
//D dot V, 1 through 3
register float rsqrt1, rsqrt2, rsqrt3;
register float one = 1.0;
register float oneHalf = 0.5;
register float *vel = (float*) velocities;
register float *pos = (float*) distances;

//Process three sounds per loop
for( ; count >= 3; count -= 3 )
{

//Load X, Y and Z velocity components for three
//input distances from the listener

dX1 = pos[0];
dY1 = pos[1];
dZ1 = pos[2];
dX2 = pos[3];
dY2 = pos[4];
dZ2 = pos[5];
dX3 = pos[6];
dY3 = pos[7];
dZ3 = pos[8];
pos += 9;

//Load X, Y and Z velocity components for
//three input velocities

vX1 = vel[0];
vY1 = vel[1];
vZ1 = vel[2];
vX2 = vel[3];
vY2 = vel[4];
vZ2 = vel[5];
vX3 = vel[6];
vY3 = vel[7];
vZ3 = vel[8];
vel += 9;

//Calculate D dot V, and store in DV#
DV1 = vX1 * dX1 + vY1 * dY1 + vZ1 * dZ1;
DV2 = vX2 * dX2 + vY2 * dY2 + vZ2 * dZ2;
DV3 = vX3 * dX3 + vY3 * dY3 + vZ3 * dZ3;

//Calculate D dot D for each of the three vectors
DD1 = dX1 * dX1 + dY1 * dY1 + dZ1 * dZ1;
DD2 = dX2 * dX2 + dY2 * dY2 + dZ2 * dZ2;
DD3 = dX3 * dX3 + dY3 * dY3 + dZ3 * dZ3;

//Calculate (D dot D)-0.5
rsqrt1 = __frsqrte( DD1 );
rsqrt2 = __frsqrte( DD2 );
rsqrt3 = __frsqrte( DD3 );

//Do one round of Newton-Raphson refinement
//because the results from frsqrte are
//only accurate to one part in 32. This should
//give us about one part in 1024 accuracy, which
//is enough for audio volume, for which we only need one
//part in 256 for the Sound Manager

rsqrt1 += oneHalf * rsqrt1 * ( one - DD1 * rsqrt1 * rsqrt1 );
rsqrt2 += oneHalf * rsqrt2 * ( one - DD2 * rsqrt2 * rsqrt2 );
rsqrt3 += oneHalf * rsqrt3 * ( one - DD3 * rsqrt3 * rsqrt3 );

//store out the result
//result = (D dot V) * 1.0 / sqrt( D dot D)

(results++)[0] = DV1 * rsqrt1;
(results++)[0] = DV2 * rsqrt2;
(results++)[0] = DV3 * rsqrt3;

}

//Process the remaining sounds
//This is the same algorithm as is used above

for( ; count > 0; count-- )
{

//Load X, Y and Z velocity components for three
//input distances from the listener

dX1 = pos[0];
dY1 = pos[1];
dZ1 = pos[2];
pos += 3;

//Load X, Y and Z velocity components for
//three input velocities

vX1 = vel[0];
vY1 = vel[1];
vZ1 = vel[2];
vel += 3;

//Calculate D dot V, and store in DV#
DV1 = vX1 * dX1 + vY1 * dY1 + vZ1 * dZ1;

//Calculate D dot D for each of the three vectors
DD1 = dX1 * dX1 + dY1 * dY1 + dZ1 * dZ1;

//Calculate (D dot D)-0.5
rsqrt1 = __frsqrte( DD1 );

//Do one round of Newton-Raphson refinement
//because the results from frsqrte are
//only accurate to one part in 32. This should
//give us about one part in 1024 accuracy, which
//is enough for audio volume, for which we only need one
//part in 256 for the Sound Manager

rsqrt1 += oneHalf * rsqrt1 * ( one - DD1 * rsqrt1 * rsqrt1 );

//store out the result
//result = (D dot V) * 1.0 / sqrt( D dot D)

(results++)[0] = DV1 * rsqrt1;

}

}

//velocities and distances are input as {x, y, z} interleaved 3-tuples
//This simplified function requires that velocities, distances
//and results are all 16 byte aligned. In addition, count must
//be a even multiple of 8.

void CalcSpeedVec( vector float *velocities, vector float *distances, vector float *results, int count )
{

int loopCount = count / ( 2 * vec_step( vector float ));
vector float zero = (vector float) vec_splat_u32(0);
vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));

while( loopCount-- )
{

vector float vLoad1, vLoad2, vLoad3, vLoad4, vLoad5, vLoad6;
vector float dLoad1, dLoad2, dLoad3, dLoad4, dLoad5, dLoad6;
vector float dX1, dY1, dZ1, dX2, dY2, dZ2; //x, y, and z distances
vector float vX1, vY1, vZ1, vX2, vY2, vZ2; //x, y, and z velocities
vector float D_dot_D1, D_dot_V1, D_dot_D2, D_dot_V2;

//Load eight vector[3]'s from the distance and velocity arrays
dLoad1 = vec_ld( 0 * sizeof( vector float ), distances );
dLoad2 = vec_ld( 1 * sizeof( vector float ), distances );
dLoad3 = vec_ld( 2 * sizeof( vector float ), distances );
dLoad4 = vec_ld( 3 * sizeof( vector float ), distances );
dLoad5 = vec_ld( 4 * sizeof( vector float ), distances );
dLoad6 = vec_ld( 5 * sizeof( vector float ), distances );
distances += 6;

vLoad1 = vec_ld( 0 * sizeof( vector float ), velocities );
vLoad2 = vec_ld( 1 * sizeof( vector float ), velocities );
vLoad3 = vec_ld( 2 * sizeof( vector float ), velocities );
vLoad4 = vec_ld( 3 * sizeof( vector float ), velocities );
vLoad5 = vec_ld( 4 * sizeof( vector float ), velocities );
vLoad6 = vec_ld( 5 * sizeof( vector float ), velocities );
velocities += 6;

//Now, we transpose the format using a 4x3 matrix transpose
//
to arrive at uniform vectors containing the four x's,
//four y's or four z's
//
//If you store your data in a planar format, rather
//than interleaved like this then you dont have to
//
do this part.
//
// { X1, Y1, Z1, X2 } { X1, X2, X3, X4 }
// { Y2, Z2, X3, Y3 } -----> { Y1, Y2, Y3, Y4 }
// { Z3, X4, Y4, Z4 } { Z1, Z2, Z3, Z4 }
//
//It is possible to do each transpose in just five permutes
//rather than six.
//That would save a few cycles. Can you figure out how?

{

vector float temp1, temp2, temp3;

temp1 = vec_perm( dLoad1, dLoad2, mergeHighLow ); // X1, X3, Y1, Y3
temp2 = vec_perm( dLoad1, dLoad3, mergeLowHigh );
// Z1, Z3, X2, X4
temp3 = vec_perm( dLoad2, dLoad3, mergeHighLow );
// Y2, Y4, Z2, Z4
dX1 = vec_perm( temp1, temp2, mergeHighLow );
// X1, X2, X3, X4
dY1 = vec_perm( temp1, temp3, mergeLowHigh );
// Y1, Y2, Y3, Y4
dZ1 = vec_perm( temp2, temp3, mergeHighLow );
// Z1, Z2, Z3, Z4

temp1 = vec_perm( dLoad4, dLoad5, mergeHighLow ); // X1, X3, Y1, Y3
temp2 = vec_perm( dLoad4, dLoad6, mergeLowHigh );
// Z1, Z3, X2, X4
temp3 = vec_perm( dLoad5, dLoad6, mergeHighLow );
// Y2, Y4, Z2, Z4
dX2 = vec_perm( temp1, temp2, mergeHighLow );
// X1, X2, X3, X4
dY2 = vec_perm( temp1, temp3, mergeLowHigh );
// Y1, Y2, Y3, Y4
dZ2 = vec_perm( temp2, temp3, mergeHighLow );
// Z1, Z2, Z3, Z4

temp1 = vec_perm( vLoad1, vLoad2, mergeHighLow ); // X1, X3, Y1, Y3
temp2 = vec_perm( vLoad1, vLoad3, mergeLowHigh );
// Z1, Z3, X2, X4
temp3 = vec_perm( vLoad2, vLoad3, mergeHighLow );
// Y2, Y4, Z2, Z4
vX1 = vec_perm( temp1, temp2, mergeHighLow );
// X1, X2, X3, X4
vY1 = vec_perm( temp1, temp3, mergeLowHigh );
// Y1, Y2, Y3, Y4
vZ1 = vec_perm( temp2, temp3, mergeHighLow );
// Z1, Z2, Z3, Z4

temp1 = vec_perm( vLoad4, vLoad5, mergeHighLow ); // X1, X3, Y1, Y3
temp2 = vec_perm( vLoad4, vLoad6, mergeLowHigh );
// Z1, Z3, X2, X4
temp3 = vec_perm( vLoad5, vLoad6, mergeHighLow );
// Y2, Y4, Z2, Z4
vX2 = vec_perm( temp1, temp2, mergeHighLow );
// X1, X2, X3, X4
vY2 = vec_perm( temp1, temp3, mergeLowHigh );
// Y1, Y2, Y3, Y4
vZ2 = vec_perm( temp2, temp3, mergeHighLow );
// Z1, Z2, Z3, Z4

}

//Do the dot products: D dot D and D dot V
//Some of these will happen concurrently with the transpose operations above
//X part of the dot product

D_dot_D1 = vec_madd( dX1, dX1, zero );
D_dot_D2 = vec_madd( dX2, dX2, zero );
D_dot_V1 = vec_madd( dX1, vX1, zero );
D_dot_V2 = vec_madd( dX2, vX2, zero );

//Y part of the dot product
D_dot_D1 = vec_madd( dY1, dY1, D_dot_D1 );
D_dot_D2 = vec_madd( dY2, dY2, D_dot_D2 );
D_dot_V1 = vec_madd( dY1, vY1, D_dot_V1 );
D_dot_V2 = vec_madd( dY2, vY2, D_dot_V2 );

//Z part of the dot product
D_dot_D1 = vec_madd( dZ1, dZ1, D_dot_D1 );
D_dot_D2 = vec_madd( dZ2, dZ2, D_dot_D2 );
D_dot_V1 = vec_madd( dZ1, vZ1, D_dot_V1 );
D_dot_V2 = vec_madd( dZ2, vZ2, D_dot_V2 );

// Find the reciprocal square root estimate for D dot D
// We lose a four cycles here due to data dependency stalls.
// Unrolling this loop to do 16 at a time rather than 8 would
// allow you to avoid these.
//
// result = (D dot V) * (D dot D)-0.5
results[0] = vec_madd( D_dot_V1, vec_rsqrte( D_dot_D1), zero );
results[1] = vec_madd( D_dot_V2, vec_rsqrte( D_dot_D2), zero );
results += 2;

}

}

Here is the SimG4 trace for the function:

1
2 D*D#
3 R R D*F#
4 R R D*D#
5 R E F*D#
6 D*D# R R F
7 E E R
8 R E
9 R D*
10 R
11 D*D#
12 R R D*D#
13 D# R R D*
14 F D* E
15 R F D*D# R
16 R R E
17 R
18 D*D#
19 R E D*
20 D# R R D*
21 R D* R
22 R D*D#
23 R E D*D#
24 R E F D*D#
25 D*D# R R E F
26 E F D*D# R R
27 R R E F D*D#
28 R R F F D*D#
29 D*D# R R E F
30 E F D*D# R R
31 R R F E D*D#
32 R R F F D*D#
33 D*D# R R F E
34 F F D*D# R E
35 F F E F D*D# E
36 R F F F E F R
37 R R F F F D*D#
38 D*D# R R F F F
39 E F D*D# R R F
40 R F F F D*D# R
41 R R F E F D*D#
42 D*D# R R F F F
43 E F D*D# R R F
44 R F E F D*D# R
45 R E F F E D*D#
46 D* E F F E F F
47 F R R F E F F
48 F D*D# R R F F
49 F E F D*D# R R
50 R E F E F D*
51 E F E F E D*
52 R R E F E E
53 R R E E D*
54 D*D# R E F
55 F F D*D# R R
56 R R E F D*
57 E F E D*
58 E F E E D*
59 R R E E E D*
60 D* R E E E
61 E D* R E E
62 E E D* R E
63 E E D R
64 R E E D*D#
65 R E D E D*D#
66 D# E E E E F F*
67 E D*R E E E F F
68 F F D*E E E F F
69 F F E R E E F F
70 F F F D*R E F F
71 F F F F D*R R F
72 R F F F E D*D#R
73 R R F F F D D*
74 D*D# R R F E F
75 F F D*D# R R F
76 R F E F D*D# R
77 R R F E F D*D#
78 D*D# R R F F E
79 F F D*D# R R F
80 R F F E D*D# R
81 R R E F F D*D#
82 D*D# E F F E F
83 E F R R F F F
84 F F D*D# R R F
85 R F F F D*D# R
86 R R F E F D*D#
87 D*D# R R F F F
88 E F D*D# R R F
89 R F F F D*D# R
90 R R F E F D*D#
91 D*D# R R F E F
92 F E D*D# R E F
93 F E F F D* E F
94 F E F F F R R
95 R R F F F D*D#
96 D# R R F E F D*
97 F D* R E F E
98 F E D* E F E
99 F E E R R E
100 R E E D* R
101 R E F D*D#
102 R R F F D*D#
103 D* R R E F
104 E D* E F
105 E E D* E F
106 E E E D* R R
107 R E E E D*
108 R E E E D*
109 R E E E D*
110 R E E D
111 D# R E E D*
112 E D*D# R E D
113 E E F F*D# E E
114 E E F F E D*R E
115 E E F F F F D*E
116 E E F F F F E R
117 R E F F F F F D*
118 D*R R F F F F F
119 E D*D#R R F F F
120 F F D D* R R F
121 R F E F D*D# R
122 R R F F F D*D#
123 D*D# R R F E F
124 E F D*D# R R F
125 R F F E D*D# R
126 R R F F F D*D#
127 D*D# R R F F E
128 F F D*D# R R E
129 F F E F D*D# E
130 R F F F E F R
131 R R F F F D*D#
132 D*D# R R F F F
133 E F D*D# R R F
134 R F F F D*D# R
135 R R F E F D*D#
136 D*D# R R F F F
137 E F D*D# R R F
138 R F E F D*D# R
139 R E F F E D*D#
140 D* E F F E F F
141 F R R F E F F
142 F D*D# R R F F
143 F E F D*D# R R
144 R E F E F D*
145 E F E F E D*
146 R R E F E E
147 R R E E D*
148 D*D# R E F
149 F F D*D# R R
150 R R E F D*
151 E F E D*
152 E F E E D*
153 R R E E E D*
154 D* R E E E
155 E D* R E E
156 E E D* R E
157 E E D R
158 R E E D*D#
159 R E D E D*D#
160 D# E E E E F F*
161 E D*R E E E F F
162 F F D*E E E F F
163 F F E R E E F F
164 F F F D*R E F F
165 F F F F D*R R F
166 R F F F E D*D#R
167 R R F F F D D*
168 D*D# R R F E F
169 F F D*D# R R F
170 R F E F D*D# R
171 R R F E F D*D#
172 D*D# R R F F E
173 F F D*D# R R F
174 R F F E D*D# R
175 R R E F F D*D#
176 D*D# E F F E F
177 E F R R F F F
178 F F D*D# R R F
179 R F F F D*D# R
180 R R F E F D*D#
181 D*D# R R F F F
182 E F D*D# R R F
183 R F F F D*D# R
184 R R F E F D*D#
185 D*D# R R F E F
186 F E D*D# R E F
187 F E F F D* E F
188 F E F F F R R
189 R R F F F D*D#
190 D# R R F E F D*
191 F D* R E F E
192 F E D* E F E
193 F E E R R E
194 R E E D* R
195 R E F D*D#
196 R R F F D*D#
197 D* R R E F
198 E D* E F
199 E E D* E F
200 E E E D* R R
201 R E E E D*
202 R E E E D*
203 R E E E D*
204 R E E D
205 D# R E E D*
206 E D*D# R E D
207 E E F F*D# E E
208 E E F F E D*R E
209 E E F F F F D*E
210 E E F F F F E R
211 R E F F F F F D*
212 D*R R F F F F F
213 E D*D#R R F F F
214 F F D D* R R F
215 R F E F D*D# R
216 R R F F F D*D#
217 D*D# R R F E F
218 E F D*D# R R F
219 R F F E D*D# R
220 R R F F F D*D#
221 D*D# R R F F E
222 F F D*D# R R E
223 F F E F D*D# E
224 R F F F E F R
225 R R F F F D*D#
226 D*D# R R F F F
227 E F D*D# R R F
228 R F F F D*D# R
229 R R F E F D*D#
230 D*D# R R F F F
231 E F D*D# R R F
232 R F E F D*D# R
233 R E F F E D*D#
234 D* E F F E F F
235 F R R F E F F
236 F D*D# R R F F
237 F E F D*D# R R
238 R E F E F D*
239 E F E F E D*
240 R R E F E E
241 R R E E D*
242 D*D# R E F
243 F F D*D# R R
244 R R E F D*
245 E F E D*
246 E F E E D*
247 R R E E E D*
248 D* R E E E
249 E D* R E E
250 E E D* R E
251 E E D R
252 R E E D*D#
253 R E D E D*D#
254 D# E E E E F F*
255 F D*R E E E F F
256 F E D*E E E F F
257 F F E R E E F F
258 F F E R E F F
259 F F E R R F
260 R F E R
261 R E
262 R
263 D*D#
264 E E
265 R E
266 R

0:( 1)* or R4,R25,R25
2:( 1)* addi R3,R1,0x40
4:( 3)* mfspr R2,LR
6:( 3)* bcl+ 20,31,0x4
8:( 5)* stvx V31,R1,R10


10:( 5)* mfspr R10,8

11:( 5)* mtspr LR,R2
13:( 6)* addis R7,R31,0x0
15:( 7)* addze R2,R2
17:( 12)* addi R0,R7,0xa74
18:( 12)* cmpi 0,R2,0x0


20:( 13)* addi R6,R2,0xffff
22:( 14)* vspltisb V1,0x8
23:( 15)* vspltisw V31,0x0
26:( 19)* addi R11,R6,0x1
27:( 19)* addi R0,R0,0x10
29:( 21)* lvx V4,R0,R4
31:( 22)* lvx V13,R2,R4
33:( 23)* lvx V6,R7,R4
35:( 24)* lvx V11,R12,R4
37:( 25)* vperm V19,V4,V6,V14
39:( 26)* lvx V8,R6,R4
41:( 27)* lvx V10,R11,R4
43:( 28)* vperm V13,V12,V19,V15
45:( 29)* vperm V7,V12,V5,V14
47:( 30)* vperm V9,V11,V10,V14
49:( 31)* vperm V17,V11,V8,V15
51:( 32)* lvx V0,R2,R3
53:( 33)* lvx V5,R8,R3

55:( 34)* vperm V18,V8,V10,V15
57:( 35)* lvx V3,R7,R3
59:( 36)* vperm V10,V5,V0,V15
61:( 38)* lvx V19,R12,R3
63:( 39)* vperm V16,V4,V0,V14
65:( 40)* lvx V2,R6,R3
67:( 41)* vmaddfp V11,V1,V31,V1
69:( 42)* vperm V4,V16,V10,V15
71:( 43)* vperm V10,V17,V16,V15
73:( 44)* vperm V16,V3,V2,V14

74:( 44)* vmaddfp V18,V13,V31,V10
76:( 45)* vmaddfp V11,V9,V11,V9
78:( 46)* vmaddfp V12,V6,V0,V6
79:( 47)* vmaddfp V3,V1,V31,V17

80:( 49)* vperm V0,V19,V2,V15
81:( 49)* vperm V2,V10,V0,V14
83:( 50)* vmaddfp V10,V7,V18,V5
85:( 52)* vmaddfp V11,V8,V11,V8
86:( 54)* vmaddfp V17,V9,V3,V2
87:( 55)* vrsqrtefp V16,V12
88:( 56)* vmaddfp V7,V6,V10,V4
89:( 56)* vrsqrtefp V5,V11
90:( 56)* vmaddfp V3,V8,V17,V19
91:( 57)* vmaddfp V2,V7,V31,V16

92:( 58)* vmaddfp V0,V3,V31,V5
94:( 60)* stvx V0,R5,R0
96:( 62)* bc+ 16,0,0xfef4
98:( 65)* addi R2,R0,0x10
99:( 65)* lvx V13,R2,R4

100:( 66)* addi R7,R0,0x20
101:( 66)* lvx V6,R7,R4
102:( 67)* addi R12,R0,0x30
104:( 68)* vperm V12,V4,V13,V15
105:( 69)* vperm V19,V4,V6,V14
107:( 72)* lvx V8,R6,R4
109:( 73)* lvx V10,R11,R4
111:( 75)* vperm V13,V12,V19,V15
113:( 76)* vperm V7,V12,V5,V14
115:( 77)* vperm V9,V11,V10,V14
117:( 78)* vperm V17,V11,V8,V15
119:( 79)* lvx V0,R2,R3
121:( 80)* lvx V5,R8,R3

123:( 81)* vperm V18,V8,V10,V15
125:( 82)* lvx V3,R7,R3
127:( 83)* vperm V10,V5,V0,V15
129:( 85)* lvx V19,R12,R3
131:( 86)* vperm V16,V4,V0,V14
133:( 87)* lvx V2,R6,R3
135:( 88)* vmaddfp V11,V1,V31,V1
137:( 89)* vperm V4,V16,V10,V15
139:( 90)* vperm V10,V17,V16,V15
141:( 91)* vperm V16,V3,V2,V14

142:( 91)* vmaddfp V18,V13,V31,V10
144:( 92)* vmaddfp V11,V9,V11,V9
146:( 93)* vmaddfp V12,V6,V0,V6
147:( 94)* vmaddfp V3,V1,V31,V17

148:( 96)* vperm V0,V19,V2,V15
149:( 96)* vperm V2,V10,V0,V14
151:( 97)* vmaddfp V10,V7,V18,V5
153:( 99)* vmaddfp V11,V8,V11,V8
154:( 101)* vmaddfp V17,V9,V3,V2
155:( 102)* vrsqrtefp V16,V12
156:( 103)* vmaddfp V7,V6,V10,V4
157:( 103)* vrsqrtefp V5,V11
158:( 103)* vmaddfp V3,V8,V17,V19
159:( 104)* vmaddfp V2,V7,V31,V16

160:( 105)* vmaddfp V0,V3,V31,V5
162:( 107)* stvx V0,R5,R0
164:( 109)* bc+ 16,0,0xfef4
166:( 112)* addi R2,R0,0x10
167:( 112)* lvx V13,R2,R4

168:( 113)* addi R7,R0,0x20
169:( 113)* lvx V6,R7,R4
170:( 114)* addi R12,R0,0x30
172:( 115)* vperm V12,V4,V13,V15
173:( 116)* vperm V19,V4,V6,V14
175:( 119)* lvx V8,R6,R4
177:( 120)* lvx V10,R11,R4
179:( 122)* vperm V13,V12,V19,V15
181:( 123)* vperm V7,V12,V5,V14
183:( 124)* vperm V9,V11,V10,V14
185:( 125)* vperm V17,V11,V8,V15
187:( 126)* lvx V0,R2,R3
189:( 127)* lvx V5,R8,R3

191:( 128)* vperm V18,V8,V10,V15
193:( 129)* lvx V3,R7,R3
195:( 130)* vperm V10,V5,V0,V15
197:( 132)* lvx V19,R12,R3
199:( 133)* vperm V16,V4,V0,V14
201:( 134)* lvx V2,R6,R3
203:( 135)* vmaddfp V11,V1,V31,V1
205:( 136)* vperm V4,V16,V10,V15
207:( 137)* vperm V10,V17,V16,V15
209:( 138)* vperm V16,V3,V2,V14

210:( 138)* vmaddfp V18,V13,V31,V10
212:( 139)* vmaddfp V11,V9,V11,V9
214:( 140)* vmaddfp V12,V6,V0,V6
215:( 141)* vmaddfp V3,V1,V31,V17

216:( 143)* vperm V0,V19,V2,V15
217:( 143)* vperm V2,V10,V0,V14
219:( 144)* vmaddfp V10,V7,V18,V5
221:( 146)* vmaddfp V11,V8,V11,V8
222:( 148)* vmaddfp V17,V9,V3,V2
223:( 149)* vrsqrtefp V16,V12
224:( 150)* vmaddfp V7,V6,V10,V4
225:( 150)* vrsqrtefp V5,V11
226:( 150)* vmaddfp V3,V8,V17,V19
227:( 151)* vmaddfp V2,V7,V31,V16

228:( 152)* vmaddfp V0,V3,V31,V5
230:( 154)* stvx V0,R5,R0
232:( 156)* bc+ 16,0,0xfef4
234:( 159)* addi R2,R0,0x10
235:( 159)* lvx V13,R2,R4

236:( 160)* addi R7,R0,0x20
237:( 160)* lvx V6,R7,R4
238:( 161)* addi R12,R0,0x30
240:( 162)* vperm V12,V4,V13,V15
241:( 163)* vperm V19,V4,V6,V14
243:( 166)* lvx V8,R6,R4
245:( 167)* lvx V10,R11,R4
247:( 169)* vperm V13,V12,V19,V15
249:( 170)* vperm V7,V12,V5,V14
251:( 171)* vperm V9,V11,V10,V14
253:( 172)* vperm V17,V11,V8,V15
255:( 173)* lvx V0,R2,R3
257:( 174)* lvx V5,R8,R3

259:( 175)* vperm V18,V8,V10,V15
261:( 176)* lvx V3,R7,R3
263:( 177)* vperm V10,V5,V0,V15
265:( 179)* lvx V19,R12,R3
267:( 180)* vperm V16,V4,V0,V14
269:( 181)* lvx V2,R6,R3
271:( 182)* vmaddfp V11,V1,V31,V1
273:( 183)* vperm V4,V16,V10,V15
275:( 184)* vperm V10,V17,V16,V15
277:( 185)* vperm V16,V3,V2,V14

278:( 185)* vmaddfp V18,V13,V31,V10
280:( 186)* vmaddfp V11,V9,V11,V9
282:( 187)* vmaddfp V12,V6,V0,V6
283:( 188)* vmaddfp V3,V1,V31,V17

284:( 190)* vperm V0,V19,V2,V15
285:( 190)* vperm V2,V10,V0,V14
287:( 191)* vmaddfp V10,V7,V18,V5
289:( 193)* vmaddfp V11,V8,V11,V8
290:( 195)* vmaddfp V17,V9,V3,V2
291:( 196)* vrsqrtefp V16,V12
292:( 197)* vmaddfp V7,V6,V10,V4
293:( 197)* vrsqrtefp V5,V11
294:( 197)* vmaddfp V3,V8,V17,V19
295:( 198)* vmaddfp V2,V7,V31,V16

296:( 199)* vmaddfp V0,V3,V31,V5
298:( 201)* stvx V0,R5,R0
300:( 203)* bc+ 16,0,0xfef4
302:( 206)* addi R2,R0,0x10
303:( 206)* lvx V13,R2,R4

304:( 207)* addi R7,R0,0x20
305:( 207)* lvx V6,R7,R4
306:( 208)* addi R12,R0,0x30
308:( 209)* vperm V12,V4,V13,V15
309:( 210)* vperm V19,V4,V6,V14
311:( 213)* lvx V8,R6,R4
313:( 214)* lvx V10,R11,R4
315:( 216)* vperm V13,V12,V19,V15
317:( 217)* vperm V7,V12,V5,V14
319:( 218)* vperm V9,V11,V10,V14
321:( 219)* vperm V17,V11,V8,V15
323:( 220)* lvx V0,R2,R3
325:( 221)* lvx V5,R8,R3

327:( 222)* vperm V18,V8,V10,V15
329:( 223)* lvx V3,R7,R3
331:( 224)* vperm V10,V5,V0,V15
333:( 226)* lvx V19,R12,R3
335:( 227)* vperm V16,V4,V0,V14
337:( 228)* lvx V2,R6,R3
339:( 229)* vmaddfp V11,V1,V31,V1
341:( 230)* vperm V4,V16,V10,V15
343:( 231)* vperm V10,V17,V16,V15
345:( 232)* vperm V16,V3,V2,V14

346:( 232)* vmaddfp V18,V13,V31,V10
348:( 233)* vmaddfp V11,V9,V11,V9
350:( 234)* vmaddfp V12,V6,V0,V6
351:( 235)* vmaddfp V3,V1,V31,V17

352:( 237)* vperm V0,V19,V2,V15
353:( 237)* vperm V2,V10,V0,V14
355:( 238)* vmaddfp V10,V7,V18,V5
357:( 240)* vmaddfp V11,V8,V11,V8
358:( 242)* vmaddfp V17,V9,V3,V2
359:( 243)* vrsqrtefp V16,V12
360:( 244)* vmaddfp V7,V6,V10,V4
361:( 244)* vrsqrtefp V5,V11
362:( 244)* vmaddfp V3,V8,V17,V19
363:( 245)* vmaddfp V2,V7,V31,V16

364:( 246)* vmaddfp V0,V3,V31,V5
366:( 248)* stvx V0,R5,R0
368:( 250)* bc+ 16,0,0xfef4
370:( 253)* lwz R31,0xfffc(R1)
371:( 253)* mtspr 8,R10






372:( 254)* lvx V31,R1,R3




1:( 1)# or R5,R23,R23
3:( 1)# bl 0x54
5:( 3)# stw R31,0xfffc(R1)
7:( 4)# addi R10,R0,0xffe0
9:( 5)# mfspr R31,LR




12:( 6)# oris R9,R10,0xffff
14:( 7)# srawi R2,R6,3
16:( 10)# ori R8,R9,0xf001

19:( 13)# mtspr 8,R8


21:( 14)# lvx V14,R0,R0

24:( 16)# vxor V15,V14,V1

28:( 20)# mtspr CTR,R11
30:( 21)# addi R2,R0,0x10
32:( 23)# addi R7,R0,0x20
34:( 23)# addi R12,R0,0x30
36:( 24)# vperm V12,V4,V13,V15
38:( 25)# addi R6,R0,0x40
40:( 27)# addi R11,R0,0x50
42:( 27)# vperm V5,V13,V6,V15
44:( 28)# lvx V4,R0,R3
46:( 29)# addi R8,R0,0x10
48:( 31)# vmaddfp V12,V13,V31,V13
50:( 31)# addi R2,R0,0x20
52:( 32)# vperm V6,V19,V5,V15
54:( 33)# vperm V1,V17,V9,V15

56:( 35)# addi R7,R0,0x30
58:( 35)# vperm V8,V9,V18,V15
60:( 36)# addi R12,R0,0x40
62:( 38)# vperm V9,V17,V18,V14
64:( 40)# addi R6,R0,0x50
66:( 40)# vperm V17,V4,V5,V15
68:( 41)# vperm V5,V17,V10,V14
70:( 42)# vmaddfp V0,V7,V12,V7
72:( 44)# addi R4,R4,0x60


75:( 45)# vperm V10,V3,V19,V15
77:( 46)# vperm V17,V10,V16,V15




82:( 50)# addi R3,R3,0x60
84:( 51)# vperm V19,V16,V0,V15








93:( 59)# stvx V2,R0,R5
95:( 61)# addi R5,R5,0x20
97:( 64)# lvx V4,R0,R4





103:( 67)# lvx V11,R12,R4

106:( 71)# addi R6,R0,0x40
108:( 73)# addi R11,R0,0x50
110:( 74)# vperm V5,V13,V6,V15
112:( 75)# lvx V4,R0,R3
114:( 76)# addi R8,R0,0x10
116:( 78)# vmaddfp V12,V13,V31,V13
118:( 78)# addi R2,R0,0x20
120:( 79)# vperm V6,V19,V5,V15
122:( 80)# vperm V1,V17,V9,V15

124:( 82)# addi R7,R0,0x30
126:( 82)# vperm V8,V9,V18,V15
128:( 83)# addi R12,R0,0x40
130:( 85)# vperm V9,V17,V18,V14
132:( 87)# addi R6,R0,0x50
134:( 87)# vperm V17,V4,V5,V15
136:( 88)# vperm V5,V17,V10,V14
138:( 89)# vmaddfp V0,V7,V12,V7
140:( 91)# addi R4,R4,0x60


143:( 92)# vperm V10,V3,V19,V15
145:( 93)# vperm V17,V10,V16,V15




150:( 97)# addi R3,R3,0x60
152:( 98)# vperm V19,V16,V0,V15








161:( 106)# stvx V2,R0,R5
163:( 108)# addi R5,R5,0x20
165:( 110)# lvx V4,R0,R4





171:( 114)# lvx V11,R12,R4

174:( 118)# addi R6,R0,0x40
176:( 120)# addi R11,R0,0x50
178:( 121)# vperm V5,V13,V6,V15
180:( 122)# lvx V4,R0,R3
182:( 123)# addi R8,R0,0x10
184:( 125)# vmaddfp V12,V13,V31,V13
186:( 125)# addi R2,R0,0x20
188:( 126)# vperm V6,V19,V5,V15
190:( 127)# vperm V1,V17,V9,V15

192:( 129)# addi R7,R0,0x30
194:( 129)# vperm V8,V9,V18,V15
196:( 130)# addi R12,R0,0x40
198:( 132)# vperm V9,V17,V18,V14
200:( 134)# addi R6,R0,0x50
202:( 134)# vperm V17,V4,V5,V15
204:( 135)# vperm V5,V17,V10,V14
206:( 136)# vmaddfp V0,V7,V12,V7
208:( 138)# addi R4,R4,0x60


211:( 139)# vperm V10,V3,V19,V15
213:( 140)# vperm V17,V10,V16,V15




218:( 144)# addi R3,R3,0x60
220:( 145)# vperm V19,V16,V0,V1








229:( 153)# stvx V2,R0,R5
231:( 155)# addi R5,R5,0x20
233:( 157)# lvx V4,R0,R4





239:( 161)# lvx V11,R12,R4

242:( 165)# addi R6,R0,0x40
244:( 167)# addi R11,R0,0x50
246:( 168)# vperm V5,V13,V6,V15
248:( 169)# lvx V4,R0,R3
250:( 170)# addi R8,R0,0x10
252:( 172)# vmaddfp V12,V13,V31,V13
254:( 172)# addi R2,R0,0x20
256:( 173)# vperm V6,V19,V5,V15
258:( 174)# vperm V1,V17,V9,V15

260:( 176)# addi R7,R0,0x30
262:( 176)# vperm V8,V9,V18,V15
264:( 177)# addi R12,R0,0x40
266:( 179)# vperm V9,V17,V18,V14
268:( 181)# addi R6,R0,0x50
270:( 181)# vperm V17,V4,V5,V15
272:( 182)# vperm V5,V17,V10,V14
274:( 183)# vmaddfp V0,V7,V12,V7
276:( 185)# addi R4,R4,0x60


279:( 186)# vperm V10,V3,V19,V15
281:( 187)# vperm V17,V10,V16,V15




286:( 191)# addi R3,R3,0x60
288:( 192)# vperm V19,V16,V0,V15








297:( 200)# stvx V2,R0,R5
299:( 202)# addi R5,R5,0x20
301:( 204)# lvx V4,R0,R4





307:( 208)# lvx V11,R12,R4

310:( 212)# addi R6,R0,0x40
312:( 214)# addi R11,R0,0x50
314:( 215)# vperm V5,V13,V6,V15
316:( 216)# lvx V4,R0,R3
318:( 217)# addi R8,R0,0x10
320:( 219)# vmaddfp V12,V13,V31,V13
322:( 219)# addi R2,R0,0x20
324:( 220)# vperm V6,V19,V5,V15
326:( 221)# vperm V1,V17,V9,V15

328:( 223)# addi R7,R0,0x30
330:( 223)# vperm V8,V9,V18,V15
332:( 224)# addi R12,R0,0x40
334:( 226)# vperm V9,V17,V18,V14
336:( 228)# addi R6,R0,0x50
338:( 228)# vperm V17,V4,V5,V15
340:( 229)# vperm V5,V17,V10,V14
342:( 230)# vmaddfp V0,V7,V12,V7
344:( 232)# addi R4,R4,0x60


347:( 233)# vperm V10,V3,V19,V15
349:( 234)# vperm V17,V10,V16,V15




354:( 238)# addi R3,R3,0x60
356:( 239)# vperm V19,V16,V0,V15








365:( 247)# stvx V2,R0,R5
367:( 249)# addi R5,R5,0x20
369:( 251)# addi R3,R0,0xffe0








374:( 256)# mfspr R3,PMC1



Insufficient IB Insts
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
System Unit Busy
System Unit Busy
Tail Serialization
Tail Serialization
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Fixed Point Unit Busy
Maximum Allowed Dispatched
Tail Serialization
Tail Serialization
Maximum Allowed Dispatched
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
Maximum Rename VR's Reached
Maximum Rename VR's Reached
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Nonemtpy
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
CB Full
CB Full
CB Full
CB Full
CB Full
Maximum Allowed Dispatched
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
Maximum Rename VR's Reached
Maximum Rename VR's Reached
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Nonemtpy
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
CB Full
CB Full
CB Full
CB Full
CB Full
Maximum Allowed Dispatched
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
Maximum Rename VR's Reached
Maximum Rename VR's Reached
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Nonemtpy
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
CB Full
CB Full
CB Full
CB Full
CB Full
Maximum Allowed Dispatched
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
Maximum Rename VR's Reached
Maximum Rename VR's Reached
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Nonemtpy
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
CB Full
CB Full
CB Full
CB Full
CB Full
Maximum Allowed Dispatched
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Rename VR's Reached
Maximum Rename VR's Reached
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
Maximum Rename VR's Reached
Maximum Rename VR's Reached
VPU Nonempty
Maximum Allowed Dispatched
Maximum Allowed Dispatched
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Unit Busy
VAU Nonemtpy
Maximum Allowed Dispatched
Maximum Allowed Dispatched
Maximum Allowed Dispatched
CB Full
Tail Serialization
Tail Serialization
Tail Serialization
Tail Serialization
Tail Serialization
Tail Serialization
Tail Serialization
Maximum Allowed Dispatched
Insufficient IB Insts
Insufficient IB Insts
Insufficient IB Insts