00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_biquad_cascade_df1_fast_q15.c 00009 * 00010 * Description: Fast processing function for the 00011 * Q15 Biquad cascade filter. 00012 * 00013 * Target Processor: Cortex-R4/R5 00014 * 00015 * Version 1.0.0 2011/03/08 00016 * Alpha release. 00017 * 00018 * Version 1.0.1 2011/09/30 00019 * Beta release. 00020 * 00021 * Version 2.0.0 2011/12/15 00022 * Final release. 00023 * 00024 * 00025 * -------------------------------------------------------------------- */ 00026 00027 #include "arm_math.h" 00028 00064 void arm_biquad_cascade_df1_fast_q15( 00065 const arm_biquad_casd_df1_inst_q15 * S, 00066 q15_t * pSrc, 00067 q15_t * pDst, 00068 uint32_t blockSize) 00069 { 00070 q15_t *pIn = pSrc; /* Source pointer */ 00071 q15_t *pOut = pDst; /* Destination pointer */ 00072 q31_t in; /* Temporary variable to hold input value */ 00073 q31_t out; /* Temporary variable to hold output value */ 00074 q31_t b0; /* Temporary variable to hold bo value */ 00075 q31_t b1, a1; /* Filter coefficients */ 00076 q31_t state_in, state_out; /* Filter state variables */ 00077 q31_t acc; /* Accumulator */ 00078 int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */ 00079 q15_t *pState = S->pState; /* State pointer */ 00080 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00081 uint32_t sample, stage = S->numStages; /* Stage loop counter */ 00082 00083 00084 do 00085 { 00086 00087 /* Read the b0 and 0 coefficients using SIMD */ 00088 b0 = *__SIMD32(pCoeffs)++; 00089 00090 /* Read the b1 and b2 coefficients using SIMD */ 00091 b1 = *__SIMD32(pCoeffs)++; 00092 00093 /* Read the a1 and a2 coefficients using SIMD */ 00094 a1 = *__SIMD32(pCoeffs)++; 00095 00096 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00097 state_in = *__SIMD32(pState)++; 00098 00099 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00100 state_out = *__SIMD32(pState)--; 00101 00102 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00103 /* The variables acc ... acc3 hold output values that are being computed: 00104 * 00105 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00106 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00107 */ 00108 sample = blockSize >> 1u; 00109 00110 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00111 ** a second loop below computes the remaining 1 sample. */ 00112 while(sample > 0u) 00113 { 00114 00115 /* Read the input */ 00116 in = *__SIMD32(pIn)++; 00117 00118 /* out = b1 * x[n-1] + b2 * x[n-2] */ 00119 out = __SMUAD(b1, state_in); 00120 00121 /* acc += b0 * x[n] + 0 * 0 */ 00122 acc = __SMLAD(b0, in, out); 00123 00124 /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */ 00125 acc = __SMLAD(a1, state_out, acc); 00126 00127 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00128 #ifdef CCS 00129 out = __SSATA((acc >> shift), 0, 16); 00130 #else 00131 out = __SSAT((acc >> shift), 16); 00132 #endif 00133 00134 /* Every time after the output is computed state should be updated. */ 00135 /* The states should be updated as: */ 00136 /* Xn2 = Xn1 */ 00137 /* Xn1 = Xn */ 00138 /* Yn2 = Yn1 */ 00139 /* Yn1 = acc */ 00140 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00141 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00142 00143 #ifndef ARM_MATH_BIG_ENDIAN 00144 state_in = __PKHBT(in, state_in, 16); 00145 state_out = __PKHBT(out, state_out, 16); 00146 #else 00147 state_in = __PKHBT(state_in >> 16, (in >> 16), 16); 00148 state_out = __PKHBT(state_out >> 16, (out), 16); 00149 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00150 00151 /* out = b0 * x[n] + 0 * 0 */ 00152 out = __SMUADX(b0, in); 00153 00154 /* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */ 00155 acc = __SMLAD(b1, state_in, out); 00156 00157 /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */ 00158 acc = __SMLAD(a1, state_out, acc); 00159 00160 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00161 #ifdef CCS 00162 out = __SSATA((acc >> shift), 0, 16); 00163 #else 00164 out = __SSAT((acc >> shift), 16); 00165 #endif 00166 00167 /* Store the output in the destination buffer. */ 00168 #ifndef ARM_MATH_BIG_ENDIAN 00169 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00170 #else 00171 *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16); 00172 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00173 00174 /* Every time after the output is computed state should be updated. */ 00175 /* The states should be updated as: */ 00176 /* Xn2 = Xn1 */ 00177 /* Xn1 = Xn */ 00178 /* Yn2 = Yn1 */ 00179 /* Yn1 = acc */ 00180 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00181 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00182 00183 #ifndef ARM_MATH_BIG_ENDIAN 00184 state_in = __PKHBT(in >> 16, state_in, 16); 00185 state_out = __PKHBT(out, state_out, 16); 00186 #else 00187 state_in = __PKHBT(state_in >> 16, in, 16); 00188 state_out = __PKHBT(state_out >> 16, out, 16); 00189 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00190 00191 /* Decrement the loop counter */ 00192 sample--; 00193 00194 } 00195 00196 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00197 ** No loop unrolling is used. */ 00198 00199 if((blockSize & 0x1u) != 0u) 00200 { 00201 /* Read the input */ 00202 in = *pIn++; 00203 00204 /* out = b0 * x[n] + 0 * 0 */ 00205 00206 #ifndef ARM_MATH_BIG_ENDIAN 00207 out = __SMUAD(b0, in); 00208 #else 00209 out = __SMUADX(b0, in); 00210 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00211 00212 /* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */ 00213 acc = __SMLAD(b1, state_in, out); 00214 00215 /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */ 00216 acc = __SMLAD(a1, state_out, acc); 00217 00218 /* The result is converted from 3.29 to 1.31 and then saturation is applied */ 00219 #ifdef CCS 00220 out = __SSATA((acc >> shift), 0, 16); 00221 #else 00222 out = __SSAT((acc >> shift), 16); 00223 #endif 00224 00225 /* Store the output in the destination buffer. */ 00226 *pOut++ = (q15_t) out; 00227 00228 /* Every time after the output is computed state should be updated. */ 00229 /* The states should be updated as: */ 00230 /* Xn2 = Xn1 */ 00231 /* Xn1 = Xn */ 00232 /* Yn2 = Yn1 */ 00233 /* Yn1 = acc */ 00234 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00235 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00236 00237 #ifndef ARM_MATH_BIG_ENDIAN 00238 state_in = __PKHBT(in, state_in, 16); 00239 state_out = __PKHBT(out, state_out, 16); 00240 #else 00241 state_in = __PKHBT(state_in >> 16, in, 16); 00242 state_out = __PKHBT(state_out >> 16, out, 16); 00243 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00244 00245 } 00246 00247 /* The first stage goes from the input buffer to the output buffer. */ 00248 /* Subsequent (numStages - 1) occur in-place in the output buffer */ 00249 pIn = pDst; 00250 00251 /* Reset the output pointer */ 00252 pOut = pDst; 00253 00254 /* Store the updated state variables back into the state array */ 00255 *__SIMD32(pState)++ = state_in; 00256 *__SIMD32(pState)++ = state_out; 00257 00258 /* Decrement the loop counter */ 00259 stage--; 00260 00261 } while(stage > 0u); 00262 } 00263 00264