2019.06.24

This commit is contained in:
hnwangkg-ezio 2019-06-24 17:11:43 +08:00
commit 1f82e4a488
2990 changed files with 667957 additions and 444550 deletions

View file

@ -0,0 +1,242 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_bitreversal.c
*
* Description: This file has common tables like Bitreverse, reciprocal etc which are used across different functions
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftSize length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
* @param[in] *pBitRevTab points to the bit reversal table.
* @return none.
*/
void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab)
{
uint16_t fftLenBy2, fftLenBy2p1;
uint16_t i, j;
float32_t in;
/* Initializations */
j = 0u;
fftLenBy2 = fftSize >> 1u;
fftLenBy2p1 = (fftSize >> 1u) + 1u;
/* Bit Reversal Implementation */
for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
{
if(i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2u * i];
pSrc[2u * i] = pSrc[2u * j];
pSrc[2u * j] = in;
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[(2u * i) + 1u];
pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
pSrc[(2u * j) + 1u] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2u * (i + fftLenBy2p1)];
pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
pSrc[2u * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
pSrc[(2u * (j + fftLenBy2p1)) + 1u];
pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
}
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[2u * (i + 1u)];
pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
pSrc[2u * (j + fftLenBy2)] = in;
/* pSrc[i+2u] <-> pSrc[j+2u] */
in = pSrc[(2u * (i + 1u)) + 1u];
pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
* @param[in] fftLen length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
* @param[in] *pBitRevTab points to bit reversal table.
* @return none.
*/
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTable)
{
uint32_t fftLenBy2, fftLenBy2p1, i, j;
q31_t in;
/* Initializations */
j = 0u;
fftLenBy2 = fftLen / 2u;
fftLenBy2p1 = (fftLen / 2u) + 1u;
/* Bit Reversal Implementation */
for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
{
if(i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2u * i];
pSrc[2u * i] = pSrc[2u * j];
pSrc[2u * j] = in;
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[(2u * i) + 1u];
pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
pSrc[(2u * j) + 1u] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2u * (i + fftLenBy2p1)];
pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
pSrc[2u * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
pSrc[(2u * (j + fftLenBy2p1)) + 1u];
pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
}
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[2u * (i + 1u)];
pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
pSrc[2u * (j + fftLenBy2)] = in;
/* pSrc[i+2u] <-> pSrc[j+2u] */
in = pSrc[(2u * (i + 1u)) + 1u];
pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTable;
/* Updating the bit reversal index depending on the fft length */
pBitRevTable += bitRevFactor;
}
}
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of Q15 data type.
* @param[in] fftLen length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
* @param[in] *pBitRevTab points to bit reversal table.
* @return none.
*/
void arm_bitreversal_q15(
q15_t * pSrc16,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab)
{
q31_t *pSrc = (q31_t *) pSrc16;
q31_t in;
uint32_t fftLenBy2, fftLenBy2p1;
uint32_t i, j;
/* Initializations */
j = 0u;
fftLenBy2 = fftLen / 2u;
fftLenBy2p1 = (fftLen / 2u) + 1u;
/* Bit Reversal Implementation */
for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
{
if(i < j)
{
/* pSrc[i] <-> pSrc[j]; */
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[i];
pSrc[i] = pSrc[j];
pSrc[j] = in;
/* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
/* pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */
in = pSrc[i + fftLenBy2p1];
pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
pSrc[j + fftLenBy2p1] = in;
}
/* pSrc[i+1u] <-> pSrc[j+fftLenBy2]; */
/* pSrc[i+2] <-> pSrc[j+fftLenBy2+1u] */
in = pSrc[i + 1u];
pSrc[i + 1u] = pSrc[j + fftLenBy2];
pSrc[j + fftLenBy2] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}

View file

@ -0,0 +1,632 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_f32.c
*
* Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
extern void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_bitreversal_32(
uint32_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
/**
* @ingroup groupTransforms
*/
/**
* @defgroup ComplexFFT Complex FFT Functions
*
* \par
* The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
* Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
* than the DFT, especially for long lengths.
* The algorithms described in this section
* operate on complex data. A separate set of functions is devoted to handling
* of real sequences.
* \par
* There are separate algorithms for handling floating-point, Q15, and Q31 data
* types. The algorithms available for each data type are described next.
* \par
* The FFT functions operate in-place. That is, the array holding the input data
* will also be used to hold the corresponding result. The input data is complex
* and contains <code>2*fftLen</code> interleaved values as shown below.
* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
* The FFT result will be contained in the same array and the frequency domain
* values will have the same interleaving.
*
* \par Floating-point
* The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
* stages are performed along with a single radix-2 or radix-4 stage, as needed.
* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
* a different twiddle factor table.
* \par
* The function uses the standard FFT definition and output values may grow by a
* factor of <code>fftLen</code> when computing the forward transform. The
* inverse transform includes a scale of <code>1/fftLen</code> as part of the
* calculation and this matches the textbook definition of the inverse FFT.
* \par
* Pre-initialized data structures containing twiddle factors and bit reversal
* tables are provided and defined in <code>arm_const_structs.h</code>. Include
* this header in your function and then pass one of the constant structures as
* an argument to arm_cfft_f32. For example:
* \par
* <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
* \par
* computes a 64-point inverse complex FFT including bit reversal.
* The data structures are treated as constant data and not modified during the
* calculation. The same data structure can be reused for multiple transforms
* including mixing forward and inverse transforms.
* \par
* Earlier releases of the library provided separate radix-2 and radix-4
* algorithms that operated on floating-point data. These functions are still
* provided but are deprecated. The older functions are slower and less general
* than the new functions.
* \par
* An example of initialization of the constants for the arm_cfft_f32 function follows:
* \code
* const static arm_cfft_instance_f32 *S;
* ...
* switch (length) {
* case 16:
* S = &arm_cfft_sR_f32_len16;
* break;
* case 32:
* S = &arm_cfft_sR_f32_len32;
* break;
* case 64:
* S = &arm_cfft_sR_f32_len64;
* break;
* case 128:
* S = &arm_cfft_sR_f32_len128;
* break;
* case 256:
* S = &arm_cfft_sR_f32_len256;
* break;
* case 512:
* S = &arm_cfft_sR_f32_len512;
* break;
* case 1024:
* S = &arm_cfft_sR_f32_len1024;
* break;
* case 2048:
* S = &arm_cfft_sR_f32_len2048;
* break;
* case 4096:
* S = &arm_cfft_sR_f32_len4096;
* break;
* }
* \endcode
* \par Q15 and Q31
* The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
* stages are performed along with a single radix-2 stage, as needed.
* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
* a different twiddle factor table.
* \par
* The function uses the standard FFT definition and output values may grow by a
* factor of <code>fftLen</code> when computing the forward transform. The
* inverse transform includes a scale of <code>1/fftLen</code> as part of the
* calculation and this matches the textbook definition of the inverse FFT.
* \par
* Pre-initialized data structures containing twiddle factors and bit reversal
* tables are provided and defined in <code>arm_const_structs.h</code>. Include
* this header in your function and then pass one of the constant structures as
* an argument to arm_cfft_q31. For example:
* \par
* <code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
* \par
* computes a 64-point inverse complex FFT including bit reversal.
* The data structures are treated as constant data and not modified during the
* calculation. The same data structure can be reused for multiple transforms
* including mixing forward and inverse transforms.
* \par
* Earlier releases of the library provided separate radix-2 and radix-4
* algorithms that operated on floating-point data. These functions are still
* provided but are deprecated. The older functions are slower and less general
* than the new functions.
* \par
* An example of initialization of the constants for the arm_cfft_q31 function follows:
* \code
* const static arm_cfft_instance_q31 *S;
* ...
* switch (length) {
* case 16:
* S = &arm_cfft_sR_q31_len16;
* break;
* case 32:
* S = &arm_cfft_sR_q31_len32;
* break;
* case 64:
* S = &arm_cfft_sR_q31_len64;
* break;
* case 128:
* S = &arm_cfft_sR_q31_len128;
* break;
* case 256:
* S = &arm_cfft_sR_q31_len256;
* break;
* case 512:
* S = &arm_cfft_sR_q31_len512;
* break;
* case 1024:
* S = &arm_cfft_sR_q31_len1024;
* break;
* case 2048:
* S = &arm_cfft_sR_q31_len2048;
* break;
* case 4096:
* S = &arm_cfft_sR_q31_len4096;
* break;
* }
* \endcode
*
*/
void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1)
{
uint32_t L = S->fftLen;
float32_t * pCol1, * pCol2, * pMid1, * pMid2;
float32_t * p2 = p1 + L;
const float32_t * tw = (float32_t *) S->pTwiddle;
float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
float32_t m0, m1, m2, m3;
uint32_t l;
pCol1 = p1;
pCol2 = p2;
// Define new length
L >>= 1;
// Initialize mid pointers
pMid1 = p1 + L;
pMid2 = p2 + L;
// do two dot Fourier transform
for ( l = L >> 2; l > 0; l-- )
{
t1[0] = p1[0];
t1[1] = p1[1];
t1[2] = p1[2];
t1[3] = p1[3];
t2[0] = p2[0];
t2[1] = p2[1];
t2[2] = p2[2];
t2[3] = p2[3];
t3[0] = pMid1[0];
t3[1] = pMid1[1];
t3[2] = pMid1[2];
t3[3] = pMid1[3];
t4[0] = pMid2[0];
t4[1] = pMid2[1];
t4[2] = pMid2[2];
t4[3] = pMid2[3];
*p1++ = t1[0] + t2[0];
*p1++ = t1[1] + t2[1];
*p1++ = t1[2] + t2[2];
*p1++ = t1[3] + t2[3]; // col 1
t2[0] = t1[0] - t2[0];
t2[1] = t1[1] - t2[1];
t2[2] = t1[2] - t2[2];
t2[3] = t1[3] - t2[3]; // for col 2
*pMid1++ = t3[0] + t4[0];
*pMid1++ = t3[1] + t4[1];
*pMid1++ = t3[2] + t4[2];
*pMid1++ = t3[3] + t4[3]; // col 1
t4[0] = t4[0] - t3[0];
t4[1] = t4[1] - t3[1];
t4[2] = t4[2] - t3[2];
t4[3] = t4[3] - t3[3]; // for col 2
twR = *tw++;
twI = *tw++;
// multiply by twiddle factors
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
// R = R * Tr - I * Ti
*p2++ = m0 + m1;
// I = I * Tr + R * Ti
*p2++ = m2 - m3;
// use vertical symmetry
// 0.9988 - 0.0491i <==> -0.0491 - 0.9988i
m0 = t4[0] * twI;
m1 = t4[1] * twR;
m2 = t4[1] * twI;
m3 = t4[0] * twR;
*pMid2++ = m0 - m1;
*pMid2++ = m2 + m3;
twR = *tw++;
twI = *tw++;
m0 = t2[2] * twR;
m1 = t2[3] * twI;
m2 = t2[3] * twR;
m3 = t2[2] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
m0 = t4[2] * twI;
m1 = t4[3] * twR;
m2 = t4[3] * twI;
m3 = t4[2] * twR;
*pMid2++ = m0 - m1;
*pMid2++ = m2 + m3;
}
// first col
arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u);
// second col
arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u);
}
void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1)
{
uint32_t L = S->fftLen >> 1;
float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
const float32_t *tw2, *tw3, *tw4;
float32_t * p2 = p1 + L;
float32_t * p3 = p2 + L;
float32_t * p4 = p3 + L;
float32_t t2[4], t3[4], t4[4], twR, twI;
float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
float32_t m0, m1, m2, m3;
uint32_t l, twMod2, twMod3, twMod4;
pCol1 = p1; // points to real values by default
pCol2 = p2;
pCol3 = p3;
pCol4 = p4;
pEnd1 = p2 - 1; // points to imaginary values by default
pEnd2 = p3 - 1;
pEnd3 = p4 - 1;
pEnd4 = pEnd3 + L;
tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
L >>= 1;
// do four dot Fourier transform
twMod2 = 2;
twMod3 = 4;
twMod4 = 6;
// TOP
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// Twiddle factors are ones
*p2++ = t2[0];
*p2++ = t2[1];
*p3++ = t3[0];
*p3++ = t3[1];
*p4++ = t4[0];
*p4++ = t4[1];
tw2 += twMod2;
tw3 += twMod3;
tw4 += twMod4;
for (l = (L - 2) >> 1; l > 0; l-- )
{
// TOP
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1 - top
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// BOTTOM
p1ap3_1 = pEnd1[-1] + pEnd3[-1];
p1sp3_1 = pEnd1[-1] - pEnd3[-1];
p1ap3_0 = pEnd1[0] + pEnd3[0];
p1sp3_0 = pEnd1[0] - pEnd3[0];
// col 2
t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1;
t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
// col 3
t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
t3[3] = p1ap3_0 - pEnd2[0] - pEnd4[0];
// col 4
t4[2] = pEnd2[0] - pEnd4[0] - p1sp3_1;
t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
// col 1 - Bottom
*pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
*pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
// COL 2
// read twiddle factors
twR = *tw2++;
twI = *tw2++;
// multiply by twiddle factors
// let Z1 = a + i(b), Z2 = c + i(d)
// => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d)
// Top
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
// use vertical symmetry col 2
// 0.9997 - 0.0245i <==> 0.0245 - 0.9997i
// Bottom
m0 = t2[3] * twI;
m1 = t2[2] * twR;
m2 = t2[2] * twI;
m3 = t2[3] * twR;
*pEnd2-- = m0 - m1;
*pEnd2-- = m2 + m3;
// COL 3
twR = tw3[0];
twI = tw3[1];
tw3 += twMod3;
// Top
m0 = t3[0] * twR;
m1 = t3[1] * twI;
m2 = t3[1] * twR;
m3 = t3[0] * twI;
*p3++ = m0 + m1;
*p3++ = m2 - m3;
// use vertical symmetry col 3
// 0.9988 - 0.0491i <==> -0.9988 - 0.0491i
// Bottom
m0 = -t3[3] * twR;
m1 = t3[2] * twI;
m2 = t3[2] * twR;
m3 = t3[3] * twI;
*pEnd3-- = m0 - m1;
*pEnd3-- = m3 - m2;
// COL 4
twR = tw4[0];
twI = tw4[1];
tw4 += twMod4;
// Top
m0 = t4[0] * twR;
m1 = t4[1] * twI;
m2 = t4[1] * twR;
m3 = t4[0] * twI;
*p4++ = m0 + m1;
*p4++ = m2 - m3;
// use vertical symmetry col 4
// 0.9973 - 0.0736i <==> -0.0736 + 0.9973i
// Bottom
m0 = t4[3] * twI;
m1 = t4[2] * twR;
m2 = t4[2] * twI;
m3 = t4[3] * twR;
*pEnd4-- = m0 - m1;
*pEnd4-- = m2 + m3;
}
//MIDDLE
// Twiddle factors are
// 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1 - Top
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// COL 2
twR = tw2[0];
twI = tw2[1];
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
// COL 3
twR = tw3[0];
twI = tw3[1];
m0 = t3[0] * twR;
m1 = t3[1] * twI;
m2 = t3[1] * twR;
m3 = t3[0] * twI;
*p3++ = m0 + m1;
*p3++ = m2 - m3;
// COL 4
twR = tw4[0];
twI = tw4[1];
m0 = t4[0] * twR;
m1 = t4[1] * twI;
m2 = t4[1] * twR;
m3 = t4[0] * twI;
*p4++ = m0 + m1;
*p4++ = m2 - m3;
// first col
arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u);
// second col
arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u);
// third col
arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u);
// fourth col
arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u);
}
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the floating-point complex FFT.
* @param[in] *S points to an instance of the floating-point CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_f32(
const arm_cfft_instance_f32 * S,
float32_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen, l;
float32_t invL, * pSrc;
if(ifftFlag == 1u)
{
/* Conjugate input data */
pSrc = p1 + 1;
for(l=0; l<L; l++)
{
*pSrc = -*pSrc;
pSrc += 2;
}
}
switch (L)
{
case 16:
case 128:
case 1024:
arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1);
break;
case 32:
case 256:
case 2048:
arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1);
break;
case 64:
case 512:
case 4096:
arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
break;
}
if( bitReverseFlag )
arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
if(ifftFlag == 1u)
{
invL = 1.0f/(float32_t)L;
/* Conjugate and scale output data */
pSrc = p1;
for(l=0; l<L; l++)
{
*pSrc++ *= invL ;
*pSrc = -(*pSrc) * invL;
pSrc++;
}
}
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,357 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_q15.c
*
* Description: Combined Radix Decimation in Q15 Frequency CFFT processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
extern void arm_radix4_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_bitreversal_16(
uint16_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
void arm_cfft_radix4by2_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef);
void arm_cfft_radix4by2_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the Q15 complex FFT.
* @param[in] *S points to an instance of the Q15 CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_q15(
const arm_cfft_instance_q15 * S,
q15_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
if(ifftFlag == 1u)
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle );
break;
}
}
else
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle );
break;
}
}
if( bitReverseFlag )
arm_bitreversal_16((uint16_t*)p1,S->bitRevLength,S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_cfft_radix4by2_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
#ifndef ARM_MATH_CM0_FAMILY
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
#else
uint32_t ia, l;
q15_t xt, yt, cosVal, sinVal;
#endif
n2 = fftLen >> 1;
#ifndef ARM_MATH_CM0_FAMILY
for (i = n2; i > 0; i--)
{
coeff = _SIMD32_OFFSET(pC);
pC += 2;
T = _SIMD32_OFFSET(pSi);
T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
S = _SIMD32_OFFSET(pSl);
S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSi) = __SHADD16(T, S);
pSi += 2;
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSl) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSl += 2;
}
#else // #ifndef ARM_MATH_CM0_FAMILY
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 1u) - (pSrc[2 * l] >> 1u);
pSrc[2 * i] = ((pSrc[2 * i] >> 1u) + (pSrc[2 * l] >> 1u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 1u) - (pSrc[2 * l + 1] >> 1u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1u) + (pSrc[2 * i + 1] >> 1u)) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
}
#endif // #ifndef ARM_MATH_CM0_FAMILY
// first col
arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2u);
// second col
arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2u);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
p2 = pSrc[4*i+2];
p3 = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
p2 <<= 1;
p3 <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = p2;
pSrc[4*i+3] = p3;
}
}
void arm_cfft_radix4by2_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
#ifndef ARM_MATH_CM0_FAMILY
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
#else
uint32_t ia, l;
q15_t xt, yt, cosVal, sinVal;
#endif
n2 = fftLen >> 1;
#ifndef ARM_MATH_CM0_FAMILY
for (i = n2; i > 0; i--)
{
coeff = _SIMD32_OFFSET(pC);
pC += 2;
T = _SIMD32_OFFSET(pSi);
T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
S = _SIMD32_OFFSET(pSl);
S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSi) = __SHADD16(T, S);
pSi += 2;
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSl) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSl += 2;
}
#else // #ifndef ARM_MATH_CM0_FAMILY
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 1u) - (pSrc[2 * l] >> 1u);
pSrc[2 * i] = ((pSrc[2 * i] >> 1u) + (pSrc[2 * l] >> 1u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 1u) - (pSrc[2 * l + 1] >> 1u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1u) + (pSrc[2 * i + 1] >> 1u)) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
}
#endif // #ifndef ARM_MATH_CM0_FAMILY
// first col
arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2u);
// second col
arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2u);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
p2 = pSrc[4*i+2];
p3 = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
p2 <<= 1;
p3 <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = p2;
pSrc[4*i+3] = p3;
}
}

View file

@ -0,0 +1,264 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_q31.c
*
* Description: Combined Radix Decimation in Frequency CFFT fixed point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
extern void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_bitreversal_32(
uint32_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
void arm_cfft_radix4by2_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef);
void arm_cfft_radix4by2_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point complex FFT in Q31 format.
* @param[in] *S points to an instance of the fixed-point CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_q31(
const arm_cfft_instance_q31 * S,
q31_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
if(ifftFlag == 1u)
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle );
break;
}
}
else
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle );
break;
}
}
if( bitReverseFlag )
arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_cfft_radix4by2_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef)
{
uint32_t i, l;
uint32_t n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
n2 = fftLen >> 1;
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2*ia];
sinVal = pCoef[2*ia + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 2) - (pSrc[2 * l] >> 2);
pSrc[2 * i] = (pSrc[2 * i] >> 2) + (pSrc[2 * l] >> 2);
yt = (pSrc[2 * i + 1] >> 2) - (pSrc[2 * l + 1] >> 2);
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2) + (pSrc[2 * i + 1] >> 2);
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0 << 1;
pSrc[2u * l + 1u] = p1 << 1;
}
// first col
arm_radix4_butterfly_q31( pSrc, n2, (q31_t*)pCoef, 2u);
// second col
arm_radix4_butterfly_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2u);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
xt = pSrc[4*i+2];
yt = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
xt <<= 1;
yt <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = xt;
pSrc[4*i+3] = yt;
}
}
void arm_cfft_radix4by2_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef)
{
uint32_t i, l;
uint32_t n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
n2 = fftLen >> 1;
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2*ia];
sinVal = pCoef[2*ia + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 2) - (pSrc[2 * l] >> 2);
pSrc[2 * i] = (pSrc[2 * i] >> 2) + (pSrc[2 * l] >> 2);
yt = (pSrc[2 * i + 1] >> 2) - (pSrc[2 * l + 1] >> 2);
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2) + (pSrc[2 * i + 1] >> 2);
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0 << 1;
pSrc[2u * l + 1u] = p1 << 1;
}
// first col
arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2u);
// second col
arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2u);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
xt = pSrc[4*i+2];
yt = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
xt <<= 1;
yt <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = xt;
pSrc[4*i+3] = yt;
}
}

View file

@ -0,0 +1,485 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_f32.c
*
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Radix-2 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in] *S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_f32(
const arm_cfft_radix2_instance_f32 * S,
float32_t * pSrc)
{
if(S->ifftFlag == 1u)
{
/* Complex IFFT radix-2 */
arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier, S->onebyfftLen);
}
else
{
/* Complex FFT radix-2 */
arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
if(S->bitReverseFlag == 1u)
{
/* Bit Reversal */
arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
* @} end of ComplexFFT group
*/
/* ----------------------------------------------------------------------
** Internal helper function used by the FFTs
** ------------------------------------------------------------------- */
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#ifndef ARM_MATH_CM0_FAMILY
/* Initializations for the first stage */
n2 = fftLen >> 1;
ia = 0;
i = 0;
// loop for groups
for (k = n2; k > 0; k--)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
/* Twiddle coefficients index modifier */
ia += twidCoefModifier;
/* index calculation for the input as, */
/* pSrc[i + 0], pSrc[i + fftLen/1] */
l = i + n2;
/* Butterfly implementation */
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i++;
} // groups loop end
twidCoefModifier <<= 1u;
// loop for stage
for (k = n2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while( i < fftLen ); // butterfly loop end
j++;
} while( j < n2); // groups loop end
twidCoefModifier <<= 1u;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * i + 2] = xt;
pSrc[2 * i + 3] = yt;
} // groups loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 1; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while(i < fftLen);
j++;
} while(j < n2);
twidCoefModifier <<= 1u;
}
#endif // #ifndef ARM_MATH_CM0_FAMILY
}
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#ifndef ARM_MATH_CM0_FAMILY
n2 = fftLen >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
} // groups loop end
twidCoefModifier <<= 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while( i < fftLen ); // butterfly loop end
j++;
} while(j < n2); // groups loop end
twidCoefModifier <<= 1u;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2 * i + 1] = p1;
pSrc[2 * i + 2] = p2;
pSrc[2 * i + 3] = p3;
} // butterfly loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while( i < fftLen ); // butterfly loop end
j++;
} while( j < n2 ); // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2u * l] = p2;
pSrc[2 * i + 1] = p1;
pSrc[2u * l + 1u] = p3;
} // butterfly loop end
#endif // #ifndef ARM_MATH_CM0_FAMILY
}

View file

@ -0,0 +1,205 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_f32.c
*
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the floating-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in,out] *S points to an instance of the floating-point CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_f32(
arm_cfft_radix2_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 2048u:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.00048828125;
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 512u:
/* Initializations of structure parameters for 512 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.001953125;
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 128u:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32u;
S->bitRevFactor = 32u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
S->onebyfftLen = 0.0078125;
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 32u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 128u;
S->bitRevFactor = 128u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
S->onebyfftLen = 0.03125;
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,189 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q15.c
*
* Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the Q15 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
* @param[in,out] *S points to an instance of the Q15 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q15(
arm_cfft_radix2_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 2048u:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4u;
S->bitRevFactor = 4u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 512u:
/* Initializations of structure parameters for 512 point FFT */
S->twidCoefModifier = 8u;
S->bitRevFactor = 8u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128u:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32u;
S->bitRevFactor = 32u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32u:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128u;
S->bitRevFactor = 128u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,187 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q31.c
*
* Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
*
* @brief Initialization function for the Q31 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
* @param[in,out] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q31(
arm_cfft_radix2_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 2048 point FFT */
case 2048u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
/* Initializations of structure parameters for 512 point FFT */
case 512u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128u:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32u;
S->bitRevFactor = 32u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32u:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128u;
S->bitRevFactor = 128u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,742 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q15.c
*
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q15(
q15_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
* @param[in] *S points to an instance of the fixed-point CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_q15(
const arm_cfft_radix2_instance_q15 * S,
q15_t * pSrc)
{
if(S->ifftFlag == 1u)
{
arm_radix2_butterfly_inverse_q15(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q15(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier)
{
#ifndef ARM_MATH_CM0_FAMILY
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
i++;
l++;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2u * l)) = R;
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2u * l)) = R;
} // groups loop end
#else
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 1u) - (pSrc[2 * l] >> 1u);
pSrc[2 * i] = ((pSrc[2 * i] >> 1u) + (pSrc[2 * l] >> 1u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 1u) - (pSrc[2 * l + 1] >> 1u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1u) + (pSrc[2 * i + 1] >> 1u)) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
#endif // #ifndef ARM_MATH_CM0_FAMILY
}
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier)
{
#ifndef ARM_MATH_CM0_FAMILY
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
i++;
l++;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2u * l)) = R;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
#else
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 1u) - (pSrc[2 * l] >> 1u);
pSrc[2 * i] = ((pSrc[2 * i] >> 1u) + (pSrc[2 * l] >> 1u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 1u) - (pSrc[2 * l + 1] >> 1u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1u) + (pSrc[2 * i + 1] >> 1u)) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // groups loop end
#endif // #ifndef ARM_MATH_CM0_FAMILY
}

View file

@ -0,0 +1,351 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q31.c
*
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
* @param[in] *S points to an instance of the fixed-point CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_q31(
const arm_cfft_radix2_instance_q31 * S,
q31_t * pSrc)
{
if(S->ifftFlag == 1u)
{
arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l, m;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 1u) - (pSrc[2 * l] >> 1u);
pSrc[2 * i] = ((pSrc[2 * i] >> 1u) + (pSrc[2 * l] >> 1u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 1u) - (pSrc[2 * l + 1] >> 1u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1u) + (pSrc[2 * i + 1] >> 1u)) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
} // groups loop end
twidCoefModifier <<= 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
m = fftLen / n1;
do
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
i += n1;
m--;
} while( m > 0); // butterfly loop end
} // groups loop end
twidCoefModifier <<= 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // butterfly loop end
}
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 1u) - (pSrc[2 * l] >> 1u);
pSrc[2 * i] = ((pSrc[2 * i] >> 1u) + (pSrc[2 * l] >> 1u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 1u) - (pSrc[2 * l + 1] >> 1u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1u) + (pSrc[2 * i + 1] >> 1u)) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // butterfly loop end
}

View file

@ -0,0 +1,165 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_f32.c
*
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the floating-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in,out] *S points to an instance of the floating-point CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_f32(
arm_cfft_radix4_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q15.c
*
* Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the Q15 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
* @param[in,out] *S points to an instance of the Q15 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q15(
arm_cfft_radix4_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4u;
S->bitRevFactor = 4u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,148 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q31.c
*
* Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
*
* @brief Initialization function for the Q31 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
* @param[in,out] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q31(
arm_cfft_radix4_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View file

@ -0,0 +1,384 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix8_f32.c
*
* Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupTransforms
*/
/**
* @defgroup Radix8_CFFT_CIFFT Radix-8 Complex FFT Functions
*
* \par
* Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).
* Computational complexity of CFFT reduces drastically when compared to DFT.
* \par
* This set of functions implements CFFT/CIFFT
* for floating-point data types. The functions operates on in-place buffer which uses same buffer for input and output.
* Complex input is stored in input buffer in an interleaved fashion.
*
* \par
* The functions operate on blocks of input and output data and each call to the function processes
* <code>2*fftLen</code> samples through the transform. <code>pSrc</code> points to In-place arrays containing <code>2*fftLen</code> values.
* \par
* The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.
* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
*
* \par Lengths supported by the transform:
* \par
* Internally, the function utilize a Radix-8 decimation in frequency(DIF) algorithm
* and the size of the FFT supported are of the lengths [ 64, 512, 4096].
*
*
* \par Algorithm:
*
* <b>Complex Fast Fourier Transform:</b>
* \par
* Input real and imaginary data:
* <pre>
* x(n) = xa + j * ya
* x(n+N/4 ) = xb + j * yb
* x(n+N/2 ) = xc + j * yc
* x(n+3N 4) = xd + j * yd
* </pre>
* where N is length of FFT
* \par
* Output real and imaginary data:
* <pre>
* X(4r) = xa'+ j * ya'
* X(4r+1) = xb'+ j * yb'
* X(4r+2) = xc'+ j * yc'
* X(4r+3) = xd'+ j * yd'
* </pre>
* \par
* Twiddle factors for Radix-8 FFT:
* <pre>
* Wn = co1 + j * (- si1)
* W2n = co2 + j * (- si2)
* W3n = co3 + j * (- si3)
* </pre>
*
* \par
* \image html CFFT.gif "Radix-8 Decimation-in Frequency Complex Fast Fourier Transform"
*
* \par
* Output from Radix-8 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.
* \par
* <b> Butterfly CFFT equations:</b>
* <pre>
* xa' = xa + xb + xc + xd
* ya' = ya + yb + yc + yd
* xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
* yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
* xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
* yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
* </pre>
*
* \par
* where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);
* <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);
* <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.
* <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;
* <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.
* <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
*
* \par Fixed-Point Behavior
* Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.
* Refer to the function specific documentation below for usage guidelines.
*/
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
uint32_t id;
uint32_t n1, n2, j;
float32_t r1, r2, r3, r4, r5, r6, r7, r8;
float32_t t1, t2;
float32_t s1, s2, s3, s4, s5, s6, s7, s8;
float32_t p1, p2, p3, p4;
float32_t co2, co3, co4, co5, co6, co7, co8;
float32_t si2, si3, si4, si5, si6, si7, si8;
const float32_t C81 = 0.70710678118f;
n2 = fftLen;
do
{
n1 = n2;
n2 = n2 >> 3;
i1 = 0;
do
{
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
pSrc[2 * i5] = r1 - r2;
r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = r1 - s3;
r1 = r1 + s3;
s3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1 + 1] = r1 + r2;
pSrc[2 * i5 + 1] = r1 - r2;
pSrc[2 * i3] = t1 + s3;
pSrc[2 * i7] = t1 - s3;
pSrc[2 * i3 + 1] = t2 - r3;
pSrc[2 * i7 + 1] = t2 + r3;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
r2 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - r2;
s5 = s5 + r2;
s8 = s7 - s6;
s7 = s7 + s6;
pSrc[2 * i2] = r5 + s7;
pSrc[2 * i8] = r5 - s7;
pSrc[2 * i6] = t1 + s8;
pSrc[2 * i4] = t1 - s8;
pSrc[2 * i2 + 1] = s5 - r7;
pSrc[2 * i8 + 1] = s5 + r7;
pSrc[2 * i6 + 1] = t2 - r8;
pSrc[2 * i4 + 1] = t2 + r8;
i1 += n1;
} while(i1 < fftLen);
if(n2 < 8)
break;
ia1 = 0;
j = 1;
do
{
/* index calculation for the coefficients */
id = ia1 + twidCoefModifier;
ia1 = id;
ia2 = ia1 + id;
ia3 = ia2 + id;
ia4 = ia3 + id;
ia5 = ia4 + id;
ia6 = ia5 + id;
ia7 = ia6 + id;
co2 = pCoef[2 * ia1];
co3 = pCoef[2 * ia2];
co4 = pCoef[2 * ia3];
co5 = pCoef[2 * ia4];
co6 = pCoef[2 * ia5];
co7 = pCoef[2 * ia6];
co8 = pCoef[2 * ia7];
si2 = pCoef[2 * ia1 + 1];
si3 = pCoef[2 * ia2 + 1];
si4 = pCoef[2 * ia3 + 1];
si5 = pCoef[2 * ia4 + 1];
si6 = pCoef[2 * ia5 + 1];
si7 = pCoef[2 * ia6 + 1];
si8 = pCoef[2 * ia7 + 1];
i1 = j;
do
{
/* index calculation for the input */
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
r2 = r1 - r2;
s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = s1 - s3;
s1 = s1 + s3;
s3 = s2 - s4;
s2 = s2 + s4;
r1 = t1 + s3;
t1 = t1 - s3;
pSrc[2 * i1 + 1] = s1 + s2;
s2 = s1 - s2;
s1 = t2 - r3;
t2 = t2 + r3;
p1 = co5 * r2;
p2 = si5 * s2;
p3 = co5 * s2;
p4 = si5 * r2;
pSrc[2 * i5] = p1 + p2;
pSrc[2 * i5 + 1] = p3 - p4;
p1 = co3 * r1;
p2 = si3 * s1;
p3 = co3 * s1;
p4 = si3 * r1;
pSrc[2 * i3] = p1 + p2;
pSrc[2 * i3 + 1] = p3 - p4;
p1 = co7 * t1;
p2 = si7 * t2;
p3 = co7 * t2;
p4 = si7 * t1;
pSrc[2 * i7] = p1 + p2;
pSrc[2 * i7 + 1] = p3 - p4;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
s1 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - s1;
s5 = s5 + s1;
s8 = s7 - s6;
s7 = s7 + s6;
r1 = r5 + s7;
r5 = r5 - s7;
r6 = t1 + s8;
t1 = t1 - s8;
s1 = s5 - r7;
s5 = s5 + r7;
s6 = t2 - r8;
t2 = t2 + r8;
p1 = co2 * r1;
p2 = si2 * s1;
p3 = co2 * s1;
p4 = si2 * r1;
pSrc[2 * i2] = p1 + p2;
pSrc[2 * i2 + 1] = p3 - p4;
p1 = co8 * r5;
p2 = si8 * s5;
p3 = co8 * s5;
p4 = si8 * r5;
pSrc[2 * i8] = p1 + p2;
pSrc[2 * i8 + 1] = p3 - p4;
p1 = co6 * r6;
p2 = si6 * s6;
p3 = co6 * s6;
p4 = si6 * r6;
pSrc[2 * i6] = p1 + p2;
pSrc[2 * i6 + 1] = p3 - p4;
p1 = co4 * t1;
p2 = si4 * t2;
p3 = co4 * t2;
p4 = si4 * t1;
pSrc[2 * i4] = p1 + p2;
pSrc[2 * i4 + 1] = p3 - p4;
i1 += n1;
} while(i1 < fftLen);
j++;
} while(j < n2);
twidCoefModifier <<= 3;
} while(n2 > 7);
}
/**
* @} end of Radix8_CFFT_CIFFT group
*/

View file

@ -0,0 +1,461 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dct4_f32.c
*
* Description: Processing function of DCT4 & IDCT4 F32.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupTransforms
*/
/**
* @defgroup DCT4_IDCT4 DCT Type IV Functions
* Representation of signals by minimum number of values is important for storage and transmission.
* The possibility of large discontinuity between the beginning and end of a period of a signal
* in DFT can be avoided by extending the signal so that it is even-symmetric.
* Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
* spectrum and is very widely used in signal and image coding applications.
* The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
* DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
*
* DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
* Reordering of the input data makes the computation of DCT just a problem of
* computing the DFT of a real signal with a few additional operations.
* This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
*
* DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
* DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
* DCT2 implementation can be described in the following steps:
* - Re-ordering input
* - Calculating Real FFT
* - Multiplication of weights and Real FFT output and getting real part from the product.
*
* This process is explained by the block diagram below:
* \image html DCT4.gif "Discrete Cosine Transform - type-IV"
*
* \par Algorithm:
* The N-point type-IV DCT is defined as a real, linear transformation by the formula:
* \image html DCT4Equation.gif
* where <code>k = 0,1,2,.....N-1</code>
*\par
* Its inverse is defined as follows:
* \image html IDCT4Equation.gif
* where <code>n = 0,1,2,.....N-1</code>
*\par
* The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
* The symmetry of the transform matrix indicates that the fast algorithms for the forward
* and inverse transform computation are identical.
* Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
*
* \par Lengths supported by the transform:
* As DCT4 internally uses Real FFT, it supports all the lengths supported by arm_rfft_f32().
* The library provides separate functions for Q15, Q31, and floating-point data types.
* \par Instance Structure
* The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
* A separate instance structure must be defined for each transform.
* There are separate instance structure declarations for each of the 3 supported data types.
*
* \par Initialization Functions
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes Real FFT as its process function is used internally in DCT4, by calling arm_rfft_init_f32().
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
* To place an instance structure into a const data section, the instance structure must be manually initialized.
* Manually initialize the instance structure as follows:
* <pre>
*arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
*arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
*arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
* </pre>
* where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
* \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
* \c pTwiddle points to the twiddle factor table;
* \c pCosFactor points to the cosFactor table;
* \c pRfft points to the real FFT instance;
* \c pCfft points to the complex FFT instance;
* The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
* and arm_rfft_f32() respectively for details regarding static initialization.
*
* \par Fixed-Point Behavior
* Care must be taken when using the fixed-point versions of the DCT4 transform functions.
* In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
* Refer to the function specific documentation below for usage guidelines.
*/
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the floating-point DCT4/IDCT4.
* @param[in] *S points to an instance of the floating-point DCT4/IDCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
*/
void arm_dct4_f32(
const arm_dct4_instance_f32 * S,
float32_t * pState,
float32_t * pInlineBuffer)
{
uint32_t i; /* Loop counter */
float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */
float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
float32_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as,
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = (uint32_t) S->Nby2 >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = ((uint32_t) S->N - 1u) >> 2u;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = ((uint32_t) S->N - 1u) % 0x4u;
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while(i > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = (uint32_t) S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = (uint32_t) S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = ((uint32_t) S->N - 1u);
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = (uint32_t) S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while(i > 0u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,394 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dct4_q15.c
*
* Description: Processing function of DCT4 & IDCT4 Q15.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the Q15 DCT4/IDCT4.
* @param[in] *S points to an instance of the Q15 DCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
*
* \par Input an output formats:
* Internally inputs are downscaled in the RFFT process function to avoid overflows.
* Number of bits downscaled, depends on the size of the transform.
* The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
*
* \image html dct4FormatsQ15Table.gif
*/
void arm_dct4_q15(
const arm_dct4_instance_q15 * S,
q15_t * pState,
q15_t * pInlineBuffer)
{
uint32_t i; /* Loop counter */
q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q15_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = (uint32_t) S->Nby2 >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = ((uint32_t) S->N - 1u) >> 2u;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = ((uint32_t) S->N - 1u) % 0x4u;
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = (uint32_t) S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = (uint32_t) S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter */
i = ((uint32_t) S->N - 1u);
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = (uint32_t) S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,395 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_dct4_q31.c
*
* Description: Processing function of DCT4 & IDCT4 Q31.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the Q31 DCT4/IDCT4.
* @param[in] *S points to an instance of the Q31 DCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
* \par Input an output formats:
* Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
* as the conversion from DCT2 to DCT4 involves one subtraction.
* Internally inputs are downscaled in the RFFT process function to avoid overflows.
* Number of bits downscaled, depends on the size of the transform.
* The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
*
* \image html dct4FormatsQ31Table.gif
*/
void arm_dct4_q31(
const arm_dct4_instance_q31 * S,
q31_t * pState,
q31_t * pInlineBuffer)
{
uint16_t i; /* Loop counter */
q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */
q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q31_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = S->Nby2 >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2u;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = (S->N - 1u) >> 2u;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = (S->N - 1u) % 0x4u;
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2u;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = (S->N - 1u);
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of DCT4_IDCT4 group
*/

View file

@ -0,0 +1,329 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
*
* Description: RFFT & RIFFT Floating point process function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
extern void arm_radix4_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_f32(
float32_t * pSrc,
uint16_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/*--------------------------------------------------------------------
* Internal functions prototypes
*--------------------------------------------------------------------*/
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the floating-point RFFT/RIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed
* in the future.
* @param[in] *S points to an instance of the floating-point RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*/
void arm_rfft_f32(
const arm_rfft_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst)
{
const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
/* Calculation of Real IFFT of input */
if(S->ifftFlagR == 1u)
{
/* Real IFFT core process */
arm_split_rifft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex radix-4 IFFT process */
arm_radix4_butterfly_inverse_f32(pDst, S_CFFT->fftLen,
S_CFFT->pTwiddle,
S_CFFT->twidCoefModifier,
S_CFFT->onebyfftLen);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_f32(pDst, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
}
else
{
/* Calculation of RFFT of input */
/* Complex radix-4 FFT process */
arm_radix4_butterfly_f32(pSrc, S_CFFT->fftLen,
S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_f32(pSrc, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
/* Real FFT core process */
arm_split_rfft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
float32_t outR, outI; /* Temporary variables for output */
float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4u * fftLen) - 1u]; /* temp pointers for output buffer */
float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2u * fftLen) - 1u]; /* temp pointers for input buffer */
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2u];
pCoefB = &pBTable[modifier * 2u];
i = fftLen - 1u;
while(i > 0u)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
/* read pATable[2 * i] */
CoefA1 = *pCoefA++;
/* pATable[2 * i + 1] */
CoefA2 = *pCoefA;
/* pSrc[2 * i] * pATable[2 * i] */
outR = *pSrc1 * CoefA1;
/* pSrc[2 * i] * CoefA2 */
outI = *pSrc1++ * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR -= (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += *pSrc1++ * CoefA1;
CoefB1 = *pCoefB;
/* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI -= *pSrc2 * CoefA2;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2-- * CoefB1;
/* write output */
*pDst1++ = outR;
*pDst1++ = outI;
/* write complex conjugate output */
*pDst2-- = -outI;
*pDst2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
i--;
}
pDst[2u * fftLen] = pSrc[0] - pSrc[1];
pDst[(2u * fftLen) + 1u] = 0.0f;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0.0f;
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
float32_t outR, outI; /* Temporary variables for output */
float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2u * fftLen) + 1u];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while(fftLen > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * CoefA1 */
outR = *pSrc1 * CoefA1;
/* - pSrc[2 * i] * CoefA2 */
outI = -(*pSrc1++) * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR += (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += (*pSrc1++) * CoefA1;
CoefB1 = *pCoefB;
/* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2 * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI += *pSrc2-- * CoefA2;
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
/* Decrement loop count */
fftLen--;
}
}

View file

@ -0,0 +1,353 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
*
* Description: RFFT & RIFFT Floating point process function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void stage_rfft_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b; /* temporary variables */
float32_t p0, p1, p2, p3; /* temporary variables */
k = (S->Sint).fftLen - 1;
/* Pack first and last sample of the frequency domain together */
xBR = pB[0];
xBI = pB[1];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++ ;
twI = *pCoeff++ ;
// U1 = XA(1) + XB(1); % It is real
t1a = xBR + xAR ;
// U2 = XB(1) - XA(1); % It is imaginary
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5f * ( t1a + t1b );
*pOut++ = 0.5f * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
pA += 2;
do
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xBI = pB[1];
xBR = pB[0];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xBR - xAR ;
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
p0 = twR * t1a;
p1 = twI * t1a;
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
k--;
} while(k > 0u);
}
/* Prepares data for inverse cfft */
void merge_rfft_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b, r, s, t, u; /* temporary variables */
k = (S->Sint).fftLen - 1;
xAR = pA[0];
xAI = pA[1];
pCoeff += 2 ;
*pOut++ = 0.5f * ( xAR + xAI );
*pOut++ = 0.5f * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
while(k > 0u)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xBI = pB[1] ;
xBR = pB[0] ;
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xAR - xBR ;
t1b = xAI + xBI ;
r = twR * t1a;
s = twI * t1b;
t = twI * t1a;
u = twR * t1b;
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;
k--;
}
}
/**
* @ingroup groupTransforms
*/
/**
* @defgroup Fast Real FFT Functions
*
* \par
* The CMSIS DSP library includes specialized algorithms for computing the
* FFT of real data sequences. The FFT is defined over complex data but
* in many applications the input is real. Real FFT algorithms take advantage
* of the symmetry properties of the FFT and have a speed advantage over complex
* algorithms of the same length.
* \par
* The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
* \par
* The real length N forward FFT of a sequence is computed using the steps shown below.
* \par
* \image html RFFT.gif "Real Fast Fourier Transform"
* \par
* The real sequence is initially treated as if it were complex to perform a CFFT.
* Later, a processing stage reshapes the data to obtain half of the frequency spectrum
* in complex format. Except the first complex number that contains the two real numbers
* X[0] and X[N/2] all the data is complex. In other words, the first complex sample
* contains two real values packed.
* \par
* The input for the inverse RFFT should keep the same format as the output of the
* forward RFFT. A first processing stage pre-process the data to later perform an
* inverse CFFT.
* \par
* \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
* \par
* The algorithms for floating-point, Q15, and Q31 data are slightly different
* and we describe each algorithm in turn.
* \par Floating-point
* The main functions are <code>arm_rfft_fast_f32()</code>
* and <code>arm_rfft_fast_init_f32()</code>. The older functions
* <code>arm_rfft_f32()</code> and <code>arm_rfft_init_f32()</code> have been
* deprecated but are still documented.
* \par
* The FFT of a real N-point sequence has even symmetry in the frequency
* domain. The second half of the data equals the conjugate of the first half
* flipped in frequency:
* <pre>
*X[0] - real data
*X[1] - complex data
*X[2] - complex data
*...
*X[fftLen/2-1] - complex data
*X[fftLen/2] - real data
*X[fftLen/2+1] - conjugate of X[fftLen/2-1]
*X[fftLen/2+2] - conjugate of X[fftLen/2-2]
*...
*X[fftLen-1] - conjugate of X[1]
* </pre>
* Looking at the data, we see that we can uniquely represent the FFT using only
* <pre>
*N/2+1 samples:
*X[0] - real data
*X[1] - complex data
*X[2] - complex data
*...
*X[fftLen/2-1] - complex data
*X[fftLen/2] - real data
* </pre>
* Looking more closely we see that the first and last samples are real valued.
* They can be packed together and we can thus represent the FFT of an N-point
* real sequence by N/2 complex values:
* <pre>
*X[0],X[N/2] - packed real data: X[0] + jX[N/2]
*X[1] - complex data
*X[2] - complex data
*...
*X[fftLen/2-1] - complex data
* </pre>
* The real FFT functions pack the frequency domain data in this fashion. The
* forward transform outputs the data in this form and the inverse transform
* expects input data in this form. The function always performs the needed
* bitreversal so that the input and output data is always in normal order. The
* functions support lengths of [32, 64, 128, ..., 4096] samples.
* \par
* The forward and inverse real FFT functions apply the standard FFT scaling; no
* scaling on the forward transform and 1/fftLen scaling on the inverse
* transform.
* \par Q15 and Q31
* The real algorithms are defined in a similar manner and utilize N/2 complex
* transforms behind the scenes.
* \par
* The complex transforms used internally include scaling to prevent fixed-point
* overflows. The overall scaling equals 1/(fftLen/2).
* \par
* A separate instance structure must be defined for each transform used but
* twiddle factor and bit reversal tables can be reused.
* \par
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes twiddle factor table and bit reversal table pointers.
* - Initializes the internal complex FFT data structure.
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure
* cannot be placed into a const data section. To place an instance structure
* into a const data section, the instance structure should be manually
* initialized as follows:
* <pre>
*arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
*arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
* </pre>
* where <code>fftLenReal</code> is the length of the real transform;
* <code>fftLenBy2</code> length of the internal complex transform.
* <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
* <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
* output (=1).
* <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
* The value is based on the FFT length;
* <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
* <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
* <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
* must also be initialized. Refer to arm_cfft_radix4_f32() for details regarding
* static initialization of the complex FFT instance structure.
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the floating-point real FFT.
* @param[in] *S points to an arm_rfft_fast_instance_f32 structure.
* @param[in] *p points to the input buffer.
* @param[in] *pOut points to the output buffer.
* @param[in] ifftFlag RFFT if flag is 0, RIFFT if flag is 1
* @return none.
*/
void arm_rfft_fast_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut,
uint8_t ifftFlag)
{
arm_cfft_instance_f32 * Sint = &(S->Sint);
Sint->fftLen = S->fftLenRFFT / 2;
/* Calculation of Real FFT */
if(ifftFlag)
{
/* Real FFT compression */
merge_rfft_f32(S, p, pOut);
/* Complex radix-4 IFFT process */
arm_cfft_f32( Sint, pOut, ifftFlag, 1);
}
else
{
/* Calculation of RFFT of input */
arm_cfft_f32( Sint, p, ifftFlag, 1);
/* Real FFT extraction */
stage_rfft_f32(S, p, pOut);
}
}
/**
* @} end of RealFFT group
*/

View file

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f32.c
*
* Description: Split Radix Decimation in Frequency CFFT Floating point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Initialization function for the floating-point real FFT.
* @param[in,out] *S points to an arm_rfft_fast_instance_f32 structure.
* @param[in] fftLen length of the Real Sequence.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>fftLen</code> Specifies length of RFFT/CIFFT process. Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_rfft_fast_init_f32(
arm_rfft_fast_instance_f32 * S,
uint16_t fftLen)
{
arm_cfft_instance_f32 * Sint;
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
Sint = &(S->Sint);
Sint->fftLen = fftLen/2;
S->fftLenRFFT = fftLen;
/* Initializations of structure parameters depending on the FFT length */
switch (Sint->fftLen)
{
case 2048u:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the bit reversal table length */
Sint->bitRevLength = ARMBITREVINDEXTABLE2048_TABLE_LENGTH;
/* Initialise the bit reversal table pointer */
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
/* Initialise the Twiddle coefficient pointers */
Sint->pTwiddle = (float32_t *) twiddleCoef_2048;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096;
break;
case 1024u:
Sint->bitRevLength = ARMBITREVINDEXTABLE1024_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
Sint->pTwiddle = (float32_t *) twiddleCoef_1024;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048;
break;
case 512u:
Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
Sint->pTwiddle = (float32_t *) twiddleCoef_512;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024;
break;
case 256u:
Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
Sint->pTwiddle = (float32_t *) twiddleCoef_256;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512;
break;
case 128u:
Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
Sint->pTwiddle = (float32_t *) twiddleCoef_128;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256;
break;
case 64u:
Sint->bitRevLength = ARMBITREVINDEXTABLE__64_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
Sint->pTwiddle = (float32_t *) twiddleCoef_64;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128;
break;
case 32u:
Sint->bitRevLength = ARMBITREVINDEXTABLE__32_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
Sint->pTwiddle = (float32_t *) twiddleCoef_32;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64;
break;
case 16u:
Sint->bitRevLength = ARMBITREVINDEXTABLE__16_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
Sint->pTwiddle = (float32_t *) twiddleCoef_16;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of RealFFT group
*/

View file

@ -0,0 +1,439 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_rfft_q15.c
*
* Description: RFFT & RIFFT Q15 process function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/*--------------------------------------------------------------------
* Internal functions prototypes
--------------------------------------------------------------------*/
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the Q15 RFFT/RIFFT.
* @param[in] *S points to an instance of the Q15 RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*
* \par Input an output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different RFFT sizes.
* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
* \par
* \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
* \par
* \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
*/
void arm_rfft_q15(
const arm_rfft_instance_q15 * S,
q15_t * pSrc,
q15_t * pDst)
{
const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
uint32_t i;
uint32_t L2 = S->fftLenReal >> 1;
/* Calculation of RIFFT of input */
if(S->ifftFlagR == 1u)
{
/* Real IFFT core process */
arm_split_rifft_q15(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex IFFT process */
arm_cfft_q15(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
for(i=0;i<S->fftLenReal;i++)
{
pDst[i] = pDst[i] << 1;
}
}
else
{
/* Calculation of RFFT of input */
/* Complex FFT process */
arm_cfft_q15(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
/* Real FFT core process */
arm_split_rfft_q15(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param *pSrc points to the input buffer.
* @param fftLen length of FFT.
* @param *pATable points to the A twiddle Coef buffer.
* @param *pBTable points to the B twiddle Coef buffer.
* @param *pDst points to the output buffer.
* @param modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
* The function implements a Real FFT
*/
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
#ifndef ARM_MATH_CM0_FAMILY
q15_t *pD1, *pD2;
#endif
// pSrc[2u * fftLen] = pSrc[0];
// pSrc[(2u * fftLen) + 1u] = pSrc[1];
pCoefA = &pATable[modifier * 2u];
pCoefB = &pBTable[modifier * 2u];
pSrc1 = &pSrc[2];
pSrc2 = &pSrc[(2u * fftLen) - 2u];
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
i = 1u;
pD1 = pDst + 2;
pD2 = pDst + (4u * fftLen) - 2;
for(i = fftLen - 1; i > 0; i--)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
#ifndef ARM_MATH_BIG_ENDIAN
/* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
#else
/* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 16u;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
#else
outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
/* write output */
*pD1++ = (q15_t) outR;
*pD1++ = outI >> 16u;
/* write complex conjugate output */
pD2[0] = (q15_t) outR;
pD2[1] = -(outI >> 16u);
pD2 -= 2;
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
}
pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2u * fftLen) + 1u] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
#else
/* Run the below code for Cortex-M0 */
i = 1u;
while(i < fftLen)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc1 * *pCoefA;
outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
outR = outR + (*pSrc2 * *pCoefB);
outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *pSrc2 * *(pCoefB + 1);
outI = outI - (*(pSrc2 + 1) * *pCoefB);
outI = outI + (*(pSrc1 + 1) * *pCoefA);
outI = outI + (*pSrc1 * *(pCoefA + 1));
/* update input pointers */
pSrc1 += 2u;
pSrc2 -= 2u;
/* write output */
pDst[2u * i] = (q15_t) outR;
pDst[(2u * i) + 1u] = outI >> 16u;
/* write complex conjugate output */
pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 16u);
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i++;
}
pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2u * fftLen) + 1u] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
* The function implements a Real IFFT
*/
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
q15_t *pDst1 = &pDst[0];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
pSrc1 = &pSrc[0];
pSrc2 = &pSrc[2u * fftLen];
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
i = fftLen;
while(i > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
#ifndef ARM_MATH_BIG_ENDIAN
/* pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
#else
/* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] */
outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 16u;
/*
-pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
/* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
#else
outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* write output */
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 16u), 16);
#else
*__SIMD32(pDst1)++ = __PKHBT((outI >> 16u), outR, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i--;
}
#else
/* Run the below code for Cortex-M0 */
i = fftLen;
while(i > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc2 * *pCoefB;
outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
outR = outR + (*pSrc1 * *pCoefA);
outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
/*
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *(pSrc1 + 1) * *pCoefA;
outI = outI - (*pSrc1 * *(pCoefA + 1));
outI = outI - (*pSrc2 * *(pCoefB + 1));
outI = outI - (*(pSrc2 + 1) * *(pCoefB));
/* update input pointers */
pSrc1 += 2u;
pSrc2 -= 2u;
/* write output */
*pDst1++ = (q15_t) outR;
*pDst1++ = (q15_t) (outI >> 16);
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}

View file

@ -0,0 +1,296 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
*
* $Date: 19. March 2015
* $Revision: V.1.4.5
*
* Project: CMSIS DSP Library
* Title: arm_rfft_q31.c
*
* Description: RFFT & RIFFT Q31 process function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/*--------------------------------------------------------------------
* Internal functions prototypes
--------------------------------------------------------------------*/
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the Q31 RFFT/RIFFT.
* @param[in] *S points to an instance of the Q31 RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*
* \par Input an output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different RFFT sizes.
* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
* \par
* \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
*
* \par
* \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
*/
void arm_rfft_q31(
const arm_rfft_instance_q31 * S,
q31_t * pSrc,
q31_t * pDst)
{
const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
uint32_t i;
uint32_t L2 = S->fftLenReal >> 1;
/* Calculation of RIFFT of input */
if(S->ifftFlagR == 1u)
{
/* Real IFFT core process */
arm_split_rifft_q31(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex IFFT process */
arm_cfft_q31(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
for(i=0;i<S->fftLenReal;i++)
{
pDst[i] = pDst[i] << 1;
}
}
else
{
/* Calculation of RFFT of input */
/* Complex FFT process */
arm_cfft_q31(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
/* Real FFT core process */
arm_split_rfft_q31(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[(4u * fftLen) - 1u];
q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[(2u * fftLen) - 1u];
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2u];
pCoefB = &pBTable[modifier * 2u];
i = fftLen - 1u;
while(i > 0u)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * pATable[2 * i] */
mult_32x32_keep32_R(outR, *pIn1, CoefA1);
/* outI = pIn[2 * i] * pATable[2 * i + 1] */
mult_32x32_keep32_R(outI, *pIn1++, CoefA2);
/* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
multSub_32x32_keep32_R(outR, *pIn1, CoefA2);
/* (pIn[2 * i + 1] * pATable[2 * i] */
multAcc_32x32_keep32_R(outI, *pIn1++, CoefA1);
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
multSub_32x32_keep32_R(outR, *pIn2, CoefA2);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
multSub_32x32_keep32_R(outI, *pIn2--, CoefB1);
/* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
multAcc_32x32_keep32_R(outR, *pIn2, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
multSub_32x32_keep32_R(outI, *pIn2--, CoefA2);
/* write output */
*pOut1++ = outR;
*pOut1++ = outI;
/* write complex conjugate output */
*pOut2-- = -outI;
*pOut2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
i--;
}
pDst[2u * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2u * fftLen) + 1u] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
q31_t outR, outI; /* Temporary variables for output */
q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[(2u * fftLen) + 1u];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while(fftLen > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pIn[2 * i] * pATable[2 * i] */
mult_32x32_keep32_R(outR, *pIn1, CoefA1);
/* - pIn[2 * i] * pATable[2 * i + 1] */
mult_32x32_keep32_R(outI, *pIn1++, -CoefA2);
/* pIn[2 * i + 1] * pATable[2 * i + 1] */
multAcc_32x32_keep32_R(outR, *pIn1, CoefA2);
/* pIn[2 * i + 1] * pATable[2 * i] */
multAcc_32x32_keep32_R(outI, *pIn1++, CoefA1);
/* pIn[2 * n - 2 * i] * pBTable[2 * i] */
multAcc_32x32_keep32_R(outR, *pIn2, CoefA2);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
multSub_32x32_keep32_R(outI, *pIn2--, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
multAcc_32x32_keep32_R(outR, *pIn2, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
multAcc_32x32_keep32_R(outI, *pIn2--, CoefA2);
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
/* Decrement loop count */
fftLen--;
}
}