/*--------------------------------------------------------------------------*/ /* ITU-T G.722.1 Fullband Extension Annex X. Source Code */ /* © 2008 Ericsson AB. and Polycom Inc. */ /* All rights reserved. */ /*--------------------------------------------------------------------------*/ #include "cnst.h" #include "complxop.h" #include "rom.h" /*--------------------------------------------------------------------------*/ /* Function wfft120 */ /* ~~~~~~~~~~~~~~~~~ */ /* */ /* Winograd 120 point FFT */ /*--------------------------------------------------------------------------*/ /* complex *xi (i/o) input and output of the FFT */ /*--------------------------------------------------------------------------*/ void wfft120(complex *xi) { short ind0, n3, n5; float t0r, t1r, t2r, t3r, t4r, t5r, t6r, t7r, q1r, x1r, x5r; float t0i, t1i, t2i, t3i, t4i, t5i, t6i, t7i, q1i, x1i, x5i; float *ps; complex x[144]; complex *p0, *p1, *p2, *p3, *p4, *p5, *p6, *p7; complex *pi0, *pi1, *pi2, *pi3, *pi4, *pi5, *pi6, *pi7; p0 = x; for (ind0 = 0; ind0 < 120; ind0 += 8) { pi0 = xi + indxPre[ind0+0]; pi1 = xi + indxPre[ind0+1]; pi2 = xi + indxPre[ind0+2]; pi3 = xi + indxPre[ind0+3]; pi4 = xi + indxPre[ind0+4]; pi5 = xi + indxPre[ind0+5]; pi6 = xi + indxPre[ind0+6]; pi7 = xi + indxPre[ind0+7]; t0r = pi0->r + pi4->r; t0i = pi0->i + pi4->i; t1r = pi1->r + pi5->r; t1i = pi1->i + pi5->i; t2r = pi2->r + pi6->r; t2i = pi2->i + pi6->i; t3r = pi3->r + pi7->r; t3i = pi3->i + pi7->i; t5r = pi1->r - pi5->r; t5i = pi1->i - pi5->i; t7r = pi3->r - pi7->r; t7i = pi3->i - pi7->i; p0->r = t0r + t2r; p0->i = t0i + t2i; p0++; p0->r = t1r + t3r; p0->i = t1i + t3i; p0++; p0->r = t0r - t2r; p0->i = t0i - t2i; p0++; p0->r = t1r - t3r; p0->i = t1i - t3i; p0++; p0->r = pi0->r - pi4->r; p0->i = pi0->i - pi4->i; p0++; p0->r = t5r + t7r; p0->i = t5i + t7i; p0++; p0->r = pi2->r - pi6->r; p0->i = pi2->i - pi6->i; p0++; p0->r = t5r - t7r; p0->i = t5i - t7i; p0++; } for (n5 = 0; n5 < 120; n5 += 24) { for (ind0 = n5; ind0 < (n5+8); ind0++) { p0 = x + ind0; p1 = x + ind0 + 8; p2 = x + ind0 + 16; x1r = p1->r; x1i = p1->i; p1->r = x1r + p2->r; p1->i = x1i + p2->i; p2->r = x1r - p2->r; p2->i = x1i - p2->i; cadd(*p0, *p1, p0); } } for (n3 = 0; n3 < 24; n3 += 8) { for (ind0 = n3; ind0 < (n3+8); ind0++) { p0 = x + ind0; p1 = x + ind0 + 24; p2 = x + ind0 + 48; p3 = x + ind0 + 72; p4 = x + ind0 + 96; p5 = x + ind0 + 120; ps = fft120cnst + ind0; t3r = p3->r + p2->r; t3i = p3->i + p2->i; t2r = p3->r - p2->r; t2i = p3->i - p2->i; t1r = p1->r + p4->r; t1i = p1->i + p4->i; csub(*p1, *p4, p2); p1->r = t1r + t3r; p1->i = t1i + t3i; p3->r = t1r - t3r; p3->i = t1i - t3i; x5r = p2->r + t2r; x5i = p2->i + t2i; cadd(*p0, *p1, p0); cmpys(*p0, *ps, p0); ps += 24; cmpys(*p1, *ps, p1); ps += 24; cmpys(*p2, *ps, p2); ps += 24; cmpys(*p3, *ps, p3); ps += 24; p4->r = t2r * *ps; p4->i = t2i * *ps; ps += 24; p5->r = x5r * *ps; p5->i = x5i * *ps; } } for (n3 = 0; n3 < 24; n3 += 8) { for (ind0 = n3; ind0 < (n3+8); ind0++) { p0 = x + ind0; p1 = x + ind0 + 24; p2 = x + ind0 + 48; p3 = x + ind0 + 72; p4 = x + ind0 + 96; p5 = x + ind0 + 120; cmpyj(p2); cmpyj(p4); cmpyj(p5); q1r = p0->r + p1->r; q1i = p0->i + p1->i; t1r = q1r + p3->r; t1i = q1i + p3->i; t4r = p2->r + p5->r; t4i = p2->i + p5->i; t3r = q1r - p3->r; t3i = q1i - p3->i; t2r = p4->r + p5->r; t2i = p4->i + p5->i; p1->r = t1r + t4r; p1->i = t1i + t4i; p4->r = t1r - t4r; p4->i = t1i - t4i; p3->r = t3r + t2r; p3->i = t3i + t2i; p2->r = t3r - t2r; p2->i = t3i - t2i; } } for (n5 = 0; n5 < 120; n5 += 24) { for (ind0 = n5; ind0 < (n5+8); ind0++) { p0 = x + ind0; p1 = x + ind0 + 8; p2 = x + ind0 + 16; t1r = p0->r + p1->r; t1i = p0->i + p1->i; cmpyj(p2); p1->r = t1r + p2->r; p1->i = t1i + p2->i; p2->r = t1r - p2->r; p2->i = t1i - p2->i; } } for (ind0 = 0; ind0 < 120; ind0 += 8) { p0 = x + ind0; p1 = x + ind0 + 1; p2 = x + ind0 + 2; p3 = x + ind0 + 3; p4 = x + ind0 + 4; p5 = x + ind0 + 5; p6 = x + ind0 + 6; p7 = x + ind0 + 7; pi0 = xi + indxPost[ind0+0]; pi1 = xi + indxPost[ind0+1]; pi2 = xi + indxPost[ind0+2]; pi3 = xi + indxPost[ind0+3]; pi4 = xi + indxPost[ind0+4]; pi5 = xi + indxPost[ind0+5]; pi6 = xi + indxPost[ind0+6]; pi7 = xi + indxPost[ind0+7]; cmpyj(p3); cmpyj(p5); cmpyj(p6); t5r = p4->r + p5->r; t5i = p4->i + p5->i; t4r = p4->r - p5->r; t4i = p4->i - p5->i; t7r = p6->r + p7->r; t7i = p6->i + p7->i; t6r = p6->r - p7->r; t6i = p6->i - p7->i; cadd(*p0, *p1, pi0); csub(*p0, *p1, pi4); cadd(*p2, *p3, pi2); csub(*p2, *p3, pi6); pi1->r = t5r + t7r; pi1->i = t5i + t7i; pi5->r = t4r + t6r; pi5->i = t4i + t6i; pi3->r = t5r - t7r; pi3->i = t5i - t7i; pi7->r = t4r - t6r; pi7->i = t4i - t6i; } return; } /*--------------------------------------------------------------------------*/ /* Function wfft480 */ /* ~~~~~~~~~~~~~~~~~ */ /* */ /* Winograd 480 points FFT */ /*--------------------------------------------------------------------------*/ /* complex *pI (i) input to the FFT */ /* complex *pO (o) output to the FFT */ /*--------------------------------------------------------------------------*/ static void wfft480(complex *pI, complex *pO) { short i, j; float temp; float x0r, x1r, x2r, x3r, t0r, t1r, t2r, t3r; float x0i, x1i, x2i, x3i, t0i, t1i, t2i, t3i; complex *pw1, *pw2, *pw3; complex *pt0, *pt1, *pt2, *pt3; pt0 = pI; pt1 = pI + 120; pt2 = pI + 240; pt3 = pI + 360; pw1 = ptwdf + 1; pw2 = ptwdf + 2; pw3 = ptwdf + 3; for(i = 0; i < 120; i++) { t0r = pt0->r + pt2->r; t0i = pt0->i + pt2->i; t2r = pt0->r - pt2->r; t2i = pt0->i - pt2->i; t1r = pt1->r + pt3->r; t1i = pt1->i + pt3->i; t3r = pt3->r - pt1->r; t3i = pt3->i - pt1->i; temp = t3r; t3r = -t3i; t3i = temp; x0r = t0r + t1r; x0i = t0i + t1i; x1r = t2r + t3r; x1i = t2i + t3i; x2r = t0r - t1r; x2i = t0i - t1i; x3r = t2r - t3r; x3i = t2i - t3i; // twiddle factors pt0->r = x0r; pt0->i = x0i; pt1->r = x1r * pw1->r - x1i * pw1->i; pt1->i = x1r * pw1->i + x1i * pw1->r; pt2->r = x2r * pw2->r - x2i * pw2->i; pt2->i = x2r * pw2->i + x2i * pw2->r; pt3->r = x3r * pw3->r - x3i * pw3->i; pt3->i = x3r * pw3->i + x3i * pw3->r; pt0++; pt1++; pt2++; pt3++; pw1 += 4; pw2 += 4; pw3 += 4; } for(i = 0; i < 480; i+=120) { wfft120(pI+i); } pt0 = pO; for(j = 0; j < 120; j++) { pt1 = pI + j; for(i = 0; i < 4; i++) { *pt0++ = *pt1; pt1 += 120; } } return; } /*--------------------------------------------------------------------------*/ /* Function dct4_960 */ /* ~~~~~~~~~~~~~~~~~~ */ /* */ /* DCT4 960 points */ /*--------------------------------------------------------------------------*/ /* float v[] (i) input of the DCT4 */ /* float coefs32[] (o) coefficient of the DCT4 */ /*--------------------------------------------------------------------------*/ void dct4_960(float v[MLT960_LENGTH], float coefs32[MLT960_LENGTH]) { short n; float f_int[MLT960_LENGTH]; for(n = 0; n < MLT960_LENGTH; n += 2) { f_int[n] = ((v[n] * dct480_table_1[n>>1].r - v[(MLT960_LENGTH_MINUS_1-n)] * dct480_table_1[n>>1].i)); f_int[n+1] = ((v[n] * dct480_table_1[n>>1].i + v[(MLT960_LENGTH_MINUS_1-n)] * dct480_table_1[n>>1].r)); } wfft480((complex *)f_int, (complex *)v); for(n = 0; n < MLT960_LENGTH; n += 2) { coefs32[n] = ((v[n] * dct480_table_2[n>>1].r - v[n+1] * dct480_table_2[n>>1].i))/4.0f; coefs32[MLT960_LENGTH_MINUS_1-n] = -(v[n] * dct480_table_2[n>>1].i + v[n+1] * dct480_table_2[n>>1].r)/4.0f; } } /*--------------------------------------------------------------------------*/ /* Function dct4_240 */ /* ~~~~~~~~~~~~~~~~~~ */ /* */ /* DCT4 240 points */ /*--------------------------------------------------------------------------*/ /* float v[] (i) input of the DCT4 */ /* float coefs32[] (o) coefficient of the DCT4 */ /*--------------------------------------------------------------------------*/ void dct4_240(float v[MLT240_LENGTH], float coefs32[MLT240_LENGTH]) { short n; float f_int[MLT240_LENGTH]; for (n=0; n < MLT240_LENGTH; n+=2) { f_int[n] = (v[n]*dct120_table_1[n>>1].r - v[(MLT240_LENGTH_MINUS_1-n)]*dct120_table_1[n>>1].i); f_int[n+1] = (v[n]*dct120_table_1[n>>1].i + v[(MLT240_LENGTH_MINUS_1-n)]*dct120_table_1[n>>1].r); } wfft120((complex *)f_int); for (n=0; n < MLT240_LENGTH; n+=2) { coefs32[n] = (f_int[n]*dct120_table_2[n>>1].r - f_int[n+1]*dct120_table_2[n>>1].i)/2.0f; coefs32[MLT240_LENGTH_MINUS_1-n] = -(f_int[n]*dct120_table_2[n>>1].i + f_int[n+1]*dct120_table_2[n>>1].r)/2.0f; } }