home *** CD-ROM | disk | FTP | other *** search
- /* inv_mdct.cpp
-
- Inverse Discrete Cosine Transform for hybrid
- synthesis in MPEG Audio Layer III. Based on the public_c source,
- but almost completely redone by Jeff Tsay and Mikko Tommila.
-
- Last modified : 04/14/97 */
-
- #include <math.h>
-
- #include "all.h"
- #include "inv_mdct.h"
-
- #define PI12 0.261799387f
- #define PI36 0.087266462f
- #define COSPI3 0.500000000f
- #define COSPI6 0.866025403f
- #define DCTODD1 0.984807753f
- #define DCTODD2 -0.342020143f
- #define DCTODD3 -0.642787609f
- #define DCTEVEN1 0.939692620f
- #define DCTEVEN2 -0.173648177f
- #define DCTEVEN3 -0.766044443f
-
- /*
- This uses Byeong Gi Lee's Fast Cosine Transform algorithm to
- decompose the 36 point and 12 point IDCT's into 9 point and 3
- point IDCT's, respectively. Then the 9 point IDCT is computed
- by a modified version of Mikko Tommila's IDCT algorithm, based on
- the WFTA. See his comments before the first 9 point IDCT. The 3
- point IDCT is already efficient to implement. -- Jeff Tsay. */
-
- void inv_mdct(real *in, real *out, int32 block_type)
- {
- /*------------------------------------------------------------------*/
- /* */
- /* Function: Calculation of the inverse MDCT */
- /* In the case of short blocks the 3 output vectors are already */
- /* overlapped and added in this modul. */
- /* */
- /* New layer3 */
- /* */
- /*------------------------------------------------------------------*/
-
- real tmp[18];
- real *win_bt;
- int32 i;
- static real win[4][36];
- static bool MDCT_init = FALSE;
-
- if(!MDCT_init){
-
- // type 0
- for(i=0;i<36;i++)
- win[0][i] = (real) sin(PI36 *(i+0.5));
-
- // type 1
- for(i=0;i<18;i++)
- win[1][i] = (real) sin(PI36 *(i+0.5));
- for(i=18;i<24;i++)
- win[1][i] = 1.0f;
- for(i=24;i<30;i++)
- win[1][i] = (real) sin(PI12 *(i+0.5-18));
- for(i=30;i<36;i++)
- win[1][i] = 0.0f;
-
- // type 2 (not needed anymore)
- for(i=0;i<12;i++)
- win[2][i] = (real) sin(PI12*(i+0.5)) ;
- for(i=12;i<36;i++)
- win[2][i] = 0.0f;
-
- // type 3
- for(i=0;i<6;i++)
- win[3][i] = 0.0f;
- for(i=6;i<12;i++)
- win[3][i] = (real) sin(PI12 * (i+ 0.5 - 6.0));
- for(i=12;i<18;i++)
- win[3][i] = 1.0f;
- for(i=18;i<36;i++)
- win[3][i] = (real) sin(PI36 * (i + 0.5));
-
- for (i=0; i<4; i++) {
-
- real *win_bt = win[i];
-
- win_bt[26] *= -0.500476342f;
- win_bt[25] *= -0.504314480f;
- win_bt[24] *= -0.512139757f;
- win_bt[23] *= -0.524264562f;
- win_bt[22] *= -0.541196100f;
- win_bt[21] *= -0.563690973f;
- win_bt[20] *= -0.592844523f;
- win_bt[19] *= -0.630236207f;
- win_bt[18] *= -0.678170852f;
-
- win_bt[27] *= -0.500476342f;
- win_bt[28] *= -0.504314480f;
- win_bt[29] *= -0.512139757f;
- win_bt[30] *= -0.524264562f;
- win_bt[31] *= -0.541196100f;
- win_bt[32] *= -0.563690973f;
- win_bt[33] *= -0.592844523f;
- win_bt[34] *= -0.630236207f;
- win_bt[35] *= -0.678170852f;
-
- win_bt[0] *= -0.740093616f;
- win_bt[1] *= -0.821339815f;
- win_bt[2] *= -0.930579498f;
- win_bt[3] *= -1.082840285f;
- win_bt[4] *= -1.306562965f;
- win_bt[5] *= -1.662754762f;
- win_bt[6] *= -2.310113158f;
- win_bt[7] *= -3.830648788f;
- win_bt[8] *= -11.46279281f;
-
- win_bt[17] *= -0.740093616f;
- win_bt[16] *= -0.821339815f;
- win_bt[15] *= -0.930579498f;
- win_bt[14] *= -1.082840285f;
- win_bt[13] *= -1.306562965f;
- win_bt[12] *= -1.662754762f;
- win_bt[11] *= -2.310113158f;
- win_bt[10] *= -3.830648788f;
- win_bt[9] *= -11.46279281f;
-
- }
-
- MDCT_init = TRUE;
- }
-
- if(block_type == 2){
-
- for(int32 p=0;p<36;p+=9) {
- out[p] = out[p+1] = out[p+2] = out[p+3] =
- out[p+4] = out[p+5] = out[p+6] = out[p+7] =
- out[p+8] = 0.0f;
- }
-
- int32 six_i = 0;
-
- for(i=0;i<3;i++)
- {
-
- // 12 point IMDCT
-
- // Begin 12 point IDCT
-
- // Input aliasing for 12 pt IDCT
- in[15+i] += in[12+i]; in[12+i] += in[9+i]; in[9+i] += in[6+i];
- in[6+i] += in[3+i]; in[3+i] += in[0+i];
-
- // Input aliasing on odd indices (for 6 point IDCT)
- in[15+i] += in[9+i]; in[9+i] += in[3+i];
-
- // 3 point IDCT on even indices
-
- {
- real pp1, pp2, sum;
- pp2 = in[12+i] * 0.500000000f;
- pp1 = in[ 6+i] * 0.866025403f;
- sum = in[0+i] + pp2;
- tmp[1] = in[0+i] - in[12+i];
- tmp[0] = sum + pp1;
- tmp[2] = sum - pp1;
-
- // End 3 point IDCT on even indices
-
- // 3 point IDCT on odd indices (for 6 point IDCT)
-
- pp2 = in[15+i] * 0.500000000f;
- pp1 = in[ 9+i] * 0.866025403f;
- sum = in[ 3+i] + pp2;
- tmp[4] = in[3+i] - in[15+i];
- tmp[5] = sum + pp1;
- tmp[3] = sum - pp1;
- }
-
- // End 3 point IDCT on odd indices
-
- // Twiddle factors on odd indices (for 6 point IDCT)
-
- tmp[3] *= 1.931851653f;
- tmp[4] *= 0.707106781f;
- tmp[5] *= 0.517638090f;
-
- // Output butterflies on 2 3 point IDCT's (for 6 point IDCT)
-
- {
- real save = tmp[0];
- tmp[0] += tmp[5];
- tmp[5] = save - tmp[5];
- save = tmp[1];
- tmp[1] += tmp[4];
- tmp[4] = save - tmp[4];
- save = tmp[2];
- tmp[2] += tmp[3];
- tmp[3] = save - tmp[3];
- }
-
- // End 6 point IDCT
-
- // Twiddle factors on indices (for 12 point IDCT)
-
- tmp[0] *= 0.504314480f;
- tmp[1] *= 0.541196100f;
- tmp[2] *= 0.630236207f;
- tmp[3] *= 0.821339815f;
- tmp[4] *= 1.306562965f;
- tmp[5] *= 3.830648788f;
-
- // End 12 point IDCT
-
- // Shift to 12 point modified IDCT, multiply by window type 2
- tmp[8] = -tmp[0] * 0.793353340f;
- tmp[9] = -tmp[0] * 0.608761429f;
- tmp[7] = -tmp[1] * 0.923879532f;
- tmp[10] = -tmp[1] * 0.382683432f;
- tmp[6] = -tmp[2] * 0.991444861f;
- tmp[11] = -tmp[2] * 0.130526192f;
-
- tmp[0] = tmp[3];
- tmp[1] = tmp[4] * 0.382683432f;
- tmp[2] = tmp[5] * 0.608761429f;
-
- tmp[3] = -tmp[5] * 0.793353340f;
- tmp[4] = -tmp[4] * 0.923879532f;
- tmp[5] = -tmp[0] * 0.991444861f;
-
- tmp[0] *= 0.130526192f;
-
- out[six_i + 6] += tmp[0];
- out[six_i + 7] += tmp[1];
- out[six_i + 8] += tmp[2];
- out[six_i + 9] += tmp[3];
- out[six_i + 10] += tmp[4];
- out[six_i + 11] += tmp[5];
- out[six_i + 12] += tmp[6];
- out[six_i + 13] += tmp[7];
- out[six_i + 14] += tmp[8];
- out[six_i + 15] += tmp[9];
- out[six_i + 16] += tmp[10];
- out[six_i + 17] += tmp[11];
-
- six_i += 6;
- }
-
- } else {
-
- /*
- // 36 point IDCT
-
- // input aliasing for 36 point IDCT
- in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14]; in[14]+=in[13];
- in[13]+=in[12]; in[12]+=in[11]; in[11]+=in[10]; in[10]+=in[9];
- in[9] +=in[8]; in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5];
- in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2]; in[2] +=in[1];
- in[1] +=in[0];
-
- // 18 point IDCT for odd indices
-
- // input aliasing for 18 point IDCT
- in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
- in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1];
-
- {
- real tmp0,tmp1,tmp2,tmp3,tmp4,tmp0_,tmp1_,tmp2_,tmp3_;
- real tmp0o,tmp1o,tmp2o,tmp3o,tmp4o,tmp0_o,tmp1_o,tmp2_o,tmp3_o;
-
- // Fast 9 Point Inverse Discrete Cosine Transform
- //
- // By Francois-Raymond Boyer
- // mailto:boyerf@iro.umontreal.ca
- // http://www.iro.umontreal.ca/~boyerf
- //
- // The code has been optimized for Intel processors
- // (takes a lot of time to convert float to and from iternal FPU representation)
- //
- // It is a simple "factorization" of the IDCT matrix.
-
- // 9 point IDCT on even indices
- {
- // 5 points on odd indices (not realy an IDCT)
- real i0 = in[0]+in[0];
- real i0p12 = i0 + in[12];
-
- tmp0 = i0p12 + in[4]*1.8793852415718f + in[8]*1.532088886238f + in[16]*0.34729635533386f;
- tmp1 = i0 + in[4] - in[8] - in[12] - in[12] - in[16];
- tmp2 = i0p12 - in[4]*0.34729635533386f - in[8]*1.8793852415718f + in[16]*1.532088886238f;
- tmp3 = i0p12 - in[4]*1.532088886238f + in[8]*0.34729635533386f - in[16]*1.8793852415718f;
- tmp4 = in[0] - in[4] + in[8] - in[12] + in[16];
-
- // 4 points on even indices
- real i6_ = in[6]*1.732050808f; // Sqrt[3]
-
- tmp0_ = in[2]*1.9696155060244f + i6_ + in[10]*1.2855752193731f + in[14]*0.68404028665134f;
- tmp1_ = (in[2] - in[10] - in[14])*1.732050808f;
- tmp2_ = in[2]*1.2855752193731f - i6_ - in[10]*0.68404028665134f + in[14]*1.9696155060244f;
- tmp3_ = in[2]*0.68404028665134f - i6_ + in[10]*1.9696155060244f - in[14]*1.2855752193731f;
- }
-
- // 9 point IDCT on odd indices
- {
- // 5 points on odd indices (not realy an IDCT)
- real i0 = in[0+1]+in[0+1];
- real i0p12 = i0 + in[12+1];
-
- tmp0o = i0p12 + in[4+1]*1.8793852415718f + in[8+1]*1.532088886238f + in[16+1]*0.34729635533386f;
- tmp1o = i0 + in[4+1] - in[8+1] - in[12+1] - in[12+1] - in[16+1];
- tmp2o = i0p12 - in[4+1]*0.34729635533386f - in[8+1]*1.8793852415718f + in[16+1]*1.532088886238f;
- tmp3o = i0p12 - in[4+1]*1.532088886238f + in[8+1]*0.34729635533386f - in[16+1]*1.8793852415718f;
- tmp4o = (in[0+1] - in[4+1] + in[8+1] - in[12+1] + in[16+1])*0.707106781f; // Twiddled
-
- // 4 points on even indices
- real i6_ = in[6+1]*1.732050808f; // Sqrt[3]
-
- tmp0_o = in[2+1]*1.9696155060244f + i6_ + in[10+1]*1.2855752193731f + in[14+1]*0.68404028665134f;
- tmp1_o = (in[2+1] - in[10+1] - in[14+1])*1.732050808f;
- tmp2_o = in[2+1]*1.2855752193731f - i6_ - in[10+1]*0.68404028665134f + in[14+1]*1.9696155060244f;
- tmp3_o = in[2+1]*0.68404028665134f - i6_ + in[10+1]*1.9696155060244f - in[14+1]*1.2855752193731f;
- }
-
- // Twiddle factors on odd indices
- // and
- // Butterflies on 9 point IDCT's
- // and
- // twiddle factors for 36 point IDCT
-
- real e, o;
- e = tmp0 + tmp0_; o = (tmp0o + tmp0_o)*0.501909918f; tmp[0] = (e + o)*(-0.500476342f*.5f); tmp[17] = (e - o)*(-11.46279281f*.5f);
- e = tmp1 + tmp1_; o = (tmp1o + tmp1_o)*0.517638090f; tmp[1] = (e + o)*(-0.504314480f*.5f); tmp[16] = (e - o)*(-3.830648788f*.5f);
- e = tmp2 + tmp2_; o = (tmp2o + tmp2_o)*0.551688959f; tmp[2] = (e + o)*(-0.512139757f*.5f); tmp[15] = (e - o)*(-2.310113158f*.5f);
- e = tmp3 + tmp3_; o = (tmp3o + tmp3_o)*0.610387294f; tmp[3] = (e + o)*(-0.524264562f*.5f); tmp[14] = (e - o)*(-1.662754762f*.5f);
- tmp[4] = (tmp4 + tmp4o)*(-0.541196100f); tmp[13] = (tmp4 - tmp4o)*(-1.306562965f);
- e = tmp3 - tmp3_; o = (tmp3o - tmp3_o)*0.871723397f; tmp[5] = (e + o)*(-0.563690973f*.5f); tmp[12] = (e - o)*(-1.082840285f*.5f);
- e = tmp2 - tmp2_; o = (tmp2o - tmp2_o)*1.183100792f; tmp[6] = (e + o)*(-0.592844523f*.5f); tmp[11] = (e - o)*(-0.930579498f*.5f);
- e = tmp1 - tmp1_; o = (tmp1o - tmp1_o)*1.931851653f; tmp[7] = (e + o)*(-0.630236207f*.5f); tmp[10] = (e - o)*(-0.821339815f*.5f);
- e = tmp0 - tmp0_; o = (tmp0o - tmp0_o)*5.736856623f; tmp[8] = (e + o)*(-0.678170852f*.5f); tmp[9] = (e - o)*(-0.740093616f*.5f);
- }
- // end 36 point IDCT */
-
- // 36 point IDCT
-
- // input aliasing for 36 point IDCT
- in[17]+=in[16]; in[16]+=in[15]; in[15]+=in[14]; in[14]+=in[13];
- in[13]+=in[12]; in[12]+=in[11]; in[11]+=in[10]; in[10]+=in[9];
- in[9] +=in[8]; in[8] +=in[7]; in[7] +=in[6]; in[6] +=in[5];
- in[5] +=in[4]; in[4] +=in[3]; in[3] +=in[2]; in[2] +=in[1];
- in[1] +=in[0];
-
- // 18 point IDCT for odd indices
-
- // input aliasing for 18 point IDCT
- in[17]+=in[15]; in[15]+=in[13]; in[13]+=in[11]; in[11]+=in[9];
- in[9] +=in[7]; in[7] +=in[5]; in[5] +=in[3]; in[3] +=in[1];
-
- // 9 point IDCT on even indices
-
- // original:
-
- /* for(i=0; i<9; i++) {
- sum = 0.0;
-
- for(j=0;j<18;j+=2)
- sum += in[j] * cos(PI36 * (2*i + 1) * j);
-
- tmp[i] = sum;
- } */
-
- // 9 Point Inverse Discrete Cosine Transform
- //
- // This piece of code is Copyright 1997 Mikko Tommila and is freely usable
- // by anybody. The algorithm itself is of course in the public domain.
- //
- // Again derived heuristically from the 9-point WFTA.
- //
- // The algorithm is optimized (?) for speed, not for small rounding errors or
- // good readability.
- //
- // 36 additions, 11 multiplications
- //
- // Again this is very likely sub-optimal.
- //
- // The code is optimized to use a minimum number of temporary variables,
- // so it should compile quite well even on 8-register Intel x86 processors.
- // This makes the code quite obfuscated and very difficult to understand.
- //
- // References:
- // [1] S. Winograd: "On Computing the Discrete Fourier Transform",
- // Mathematics of Computation, Volume 32, Number 141, January 1978,
- // Pages 175-199
-
- // Some modifications for maplay by Jeff Tsay
- {
- real t0, t1, t2, t3, t4, t5, t6, t7;
-
- t1 = COSPI3 * in[12];
- t2 = COSPI3 * (in[8] + in[16] - in[4]);
-
- t3 = in[0] + t1;
- t4 = in[0] - t1 - t1;
- t5 = t4 - t2;
-
- t0 = DCTEVEN1 * (in[4] + in[8]);
- t1 = DCTEVEN2 * (in[8] - in[16]);
-
- tmp[4] = t4 + t2 + t2;
- t2 = DCTEVEN3 * (in[4] + in[16]);
-
- t6 = t3 - t0 - t2;
- t0 += t3 + t1;
- t3 += t2 - t1;
-
- t2 = DCTODD1 * (in[2] + in[10]);
- t4 = DCTODD2 * (in[10] - in[14]);
- t7 = COSPI6 * in[6];
-
- t1 = t2 + t4 + t7;
- tmp[0] = t0 + t1;
- tmp[8] = t0 - t1;
- t1 = DCTODD3 * (in[2] + in[14]);
- t2 += t1 - t7;
-
- tmp[3] = t3 + t2;
- t0 = COSPI6 * (in[10] + in[14] - in[2]);
- tmp[5] = t3 - t2;
-
- t4 -= t1 + t7;
-
- tmp[1] = t5 - t0;
- tmp[7] = t5 + t0;
- tmp[2] = t6 + t4;
- tmp[6] = t6 - t4;
- }
-
- // End 9 point IDCT on even indices
-
- // 9 point IDCT on odd indices
-
- // original:
- /* for(i=0; i<9; i++) {
- sum = 0.0;
-
- for(j=0;j<18;j+=2)
- sum += in[j+1] * cos(PI36 * (2*i + 1) * j);
-
- tmp[17-i] = sum;
- } */
-
- // This includes multiplication by the twiddle factors
- // at the end -- Jeff.
- {
- real t0, t1, t2, t3, t4, t5, t6, t7;
-
- t1 = COSPI3 * in[13];
- t2 = COSPI3 * (in[9] + in[17] - in[5]);
-
- t3 = in[1] + t1;
- t4 = in[1] - t1 - t1;
- t5 = t4 - t2;
-
- t0 = DCTEVEN1 * (in[5] + in[9]);
- t1 = DCTEVEN2 * (in[9] - in[17]);
-
- tmp[13] = (t4 + t2 + t2) * 0.707106781f;
- t2 = DCTEVEN3 * (in[5] + in[17]);
-
- t6 = t3 - t0 - t2;
- t0 += t3 + t1;
- t3 += t2 - t1;
-
- t2 = DCTODD1 * (in[3] + in[11]);
- t4 = DCTODD2 * (in[11] - in[15]);
- t7 = COSPI6 * in[7];
-
- t1 = t2 + t4 + t7;
- tmp[17] = (t0 + t1) * 0.501909918f;
- tmp[9] = (t0 - t1) * 5.736856623f;
- t1 = DCTODD3 * (in[3] + in[15]);
- t2 += t1 - t7;
-
- tmp[14] = (t3 + t2) * 0.610387294f;
- t0 = COSPI6 * (in[11] + in[15] - in[3]);
- tmp[12] = (t3 - t2) * 0.871723397f;
-
- t4 -= t1 + t7;
-
- tmp[16] = (t5 - t0) * 0.517638090f;
- tmp[10] = (t5 + t0) * 1.931851653f;
- tmp[15] = (t6 + t4) * 0.551688959f;
- tmp[11] = (t6 - t4) * 1.183100792f;
- }
-
- // End 9 point IDCT on odd indices
-
- // Butterflies on 9 point IDCT's
- for (int32 i=0;i<9;i++) {
- real save = tmp[i];
- tmp[i] += tmp[17-i];
- tmp[17-i] = save - tmp[17-i];
- }
- // end 18 point IDCT
-
- // twiddle factors for 36 point IDCT
- // (already taken care of in window)
-
- // tmp[0] *= -0.500476342f;
- // tmp[1] *= -0.504314480f;
- // tmp[2] *= -0.512139757f;
- // tmp[3] *= -0.524264562f;
- // tmp[4] *= -0.541196100f;
- // tmp[5] *= -0.563690973f;
- // tmp[6] *= -0.592844523f;
- // tmp[7] *= -0.630236207f;
- // tmp[8] *= -0.678170852f;
- // tmp[9] *= -0.740093616f;
- // tmp[10]*= -0.821339815f;
- // tmp[11]*= -0.930579498f;
- // tmp[12]*= -1.082840285f;
- // tmp[13]*= -1.306562965f;
- // tmp[14]*= -1.662754762f;
- // tmp[15]*= -2.310113158f;
- // tmp[16]*= -3.830648788f;
- // tmp[17]*= -11.46279281f;
-
- // end 36 point IDCT
-
- // shift to modified IDCT
- win_bt = win[block_type];
-
- out[0] =-tmp[9] * win_bt[0];
- out[1] =-tmp[10] * win_bt[1];
- out[2] =-tmp[11] * win_bt[2];
- out[3] =-tmp[12] * win_bt[3];
- out[4] =-tmp[13] * win_bt[4];
- out[5] =-tmp[14] * win_bt[5];
- out[6] =-tmp[15] * win_bt[6];
- out[7] =-tmp[16] * win_bt[7];
- out[8] =-tmp[17] * win_bt[8];
-
- out[9] = tmp[17] * win_bt[9];
- out[10]= tmp[16] * win_bt[10];
- out[11]= tmp[15] * win_bt[11];
- out[12]= tmp[14] * win_bt[12];
- out[13]= tmp[13] * win_bt[13];
- out[14]= tmp[12] * win_bt[14];
- out[15]= tmp[11] * win_bt[15];
- out[16]= tmp[10] * win_bt[16];
- out[17]= tmp[9] * win_bt[17];
- out[18]= tmp[8] * win_bt[18];
- out[19]= tmp[7] * win_bt[19];
- out[20]= tmp[6] * win_bt[20];
- out[21]= tmp[5] * win_bt[21];
- out[22]= tmp[4] * win_bt[22];
- out[23]= tmp[3] * win_bt[23];
- out[24]= tmp[2] * win_bt[24];
- out[25]= tmp[1] * win_bt[25];
- out[26]= tmp[0] * win_bt[26];
-
- out[27]= tmp[0] * win_bt[27];
- out[28]= tmp[1] * win_bt[28];
- out[29]= tmp[2] * win_bt[29];
- out[30]= tmp[3] * win_bt[30];
- out[31]= tmp[4] * win_bt[31];
- out[32]= tmp[5] * win_bt[32];
- out[33]= tmp[6] * win_bt[33];
- out[34]= tmp[7] * win_bt[34];
- out[35]= tmp[8] * win_bt[35];
- }
- }
-