00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_biquad_cascade_df1_q15.c 00009 * 00010 * Description: Processing function for the 00011 * Q15 Biquad cascade DirectFormI(DF1) filter. 00012 * 00013 * Target Processor: Cortex-R4/R5 00014 * 00015 * Version 1.0.0 2011/03/08 00016 * Alpha release. 00017 * 00018 * Version 1.0.1 2011/09/30 00019 * Beta release. 00020 * 00021 * Version 2.0.0 2011/12/15 00022 * Final release. 00023 * 00024 * -------------------------------------------------------------------- */ 00025 00026 #include "arm_math.h" 00027 00062 void arm_biquad_cascade_df1_q15( 00063 const arm_biquad_casd_df1_inst_q15 * S, 00064 q15_t * pSrc, 00065 q15_t * pDst, 00066 uint32_t blockSize) 00067 { 00068 q15_t *pIn = pSrc; /* Source pointer */ 00069 q15_t *pOut = pDst; /* Destination pointer */ 00070 q31_t in; /* Temporary variable to hold input value */ 00071 q31_t out; /* Temporary variable to hold output value */ 00072 q31_t b0; /* Temporary variable to hold bo value */ 00073 q31_t b1, a1; /* Filter coefficients */ 00074 q31_t state_in, state_out; /* Filter state variables */ 00075 q31_t acc_l, acc_h; 00076 q63_t acc; /* Accumulator */ 00077 int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */ 00078 q15_t *pState = S->pState; /* State pointer */ 00079 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00080 uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */ 00081 int32_t uShift = (32 - lShift); 00082 00083 do 00084 { 00085 /* Read the b0 and 0 coefficients using SIMD */ 00086 b0 = *__SIMD32(pCoeffs)++; 00087 00088 /* Read the b1 and b2 coefficients using SIMD */ 00089 b1 = *__SIMD32(pCoeffs)++; 00090 00091 /* Read the a1 and a2 coefficients using SIMD */ 00092 a1 = *__SIMD32(pCoeffs)++; 00093 00094 /* Read the input state values from the state buffer: x[n-1], x[n-2] */ 00095 state_in = *__SIMD32(pState)++; 00096 00097 /* Read the output state values from the state buffer: y[n-1], y[n-2] */ 00098 state_out = *__SIMD32(pState)--; 00099 00100 /* Apply loop unrolling and compute 2 output values simultaneously. */ 00101 /* The variable acc hold output values that are being computed: 00102 * 00103 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00104 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00105 */ 00106 sample = blockSize >> 1u; 00107 00108 /* First part of the processing with loop unrolling. Compute 2 outputs at a time. 00109 ** a second loop below computes the remaining 1 sample. */ 00110 while(sample > 0u) 00111 { 00112 00113 /* Read the input */ 00114 in = *__SIMD32(pIn)++; 00115 00116 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00117 out = __SMUAD(b1, state_in); 00118 00119 /* out = b0 * x[n] + 0 * 0 */ 00120 acc = __SMLALD(b0, in, out); 00121 00122 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00123 acc = __SMLALD(a1, state_out, acc); 00124 00125 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00126 00127 /* Calc lower part of acc */ 00128 acc_l = acc & 0xffffffff; 00129 00130 /* Calc upper part of acc */ 00131 acc_h = (acc >> 32) & 0xffffffff; 00132 00133 /* Apply shift for lower part of acc and upper part of acc */ 00134 out = (uint32_t)acc_l >> lShift | acc_h << uShift; 00135 00136 /* Saturare output */ 00137 #ifdef CCS 00138 out = __SSATA(out, 0, 16); 00139 #else 00140 out = __SSAT(out, 16); 00141 #endif 00142 /* Every time after the output is computed state should be updated. */ 00143 /* The states should be updated as: */ 00144 /* Xn2 = Xn1 */ 00145 /* Xn1 = Xn */ 00146 /* Yn2 = Yn1 */ 00147 /* Yn1 = acc */ 00148 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00149 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00150 00151 #ifndef ARM_MATH_BIG_ENDIAN 00152 00153 state_in = __PKHBT(in, state_in, 16); 00154 state_out = __PKHBT(out, state_out, 16); 00155 00156 #else 00157 00158 state_in = __PKHBT(state_in >> 16, (in >> 16), 16); 00159 state_out = __PKHBT(state_out >> 16, (out), 16); 00160 00161 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00162 00163 /* out = b0 * x[n] + 0 * 0 */ 00164 out = __SMUADX(b0, in); 00165 00166 /* acc += b1 * x[n-1] + b2 * x[n-2] + out */ 00167 acc = __SMLALD(b1, state_in, out); 00168 00169 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00170 acc = __SMLALD(a1, state_out, acc); 00171 00172 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00173 00174 /* Calc lower part of acc */ 00175 acc_l = acc & 0xffffffff; 00176 00177 /* Calc upper part of acc */ 00178 acc_h = (acc >> 32) & 0xffffffff; 00179 00180 /* Apply shift for lower part of acc and upper part of acc */ 00181 out = (uint32_t)acc_l >> lShift | acc_h << uShift; 00182 00183 /* Saturare output */ 00184 #ifdef CCS 00185 out = __SSATA(out, 0, 16); 00186 #else 00187 out = __SSAT(out, 16); 00188 #endif 00189 00190 #ifndef ARM_MATH_BIG_ENDIAN 00191 00192 /* Store the output in the destination buffer. */ 00193 *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16); 00194 00195 #else 00196 00197 *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16); 00198 00199 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00200 00201 /* Every time after the output is computed state should be updated. */ 00202 /* The states should be updated as: */ 00203 /* Xn2 = Xn1 */ 00204 /* Xn1 = Xn */ 00205 /* Yn2 = Yn1 */ 00206 /* Yn1 = acc */ 00207 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00208 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00209 00210 #ifndef ARM_MATH_BIG_ENDIAN 00211 state_in = __PKHBT(in >> 16, state_in, 16); 00212 state_out = __PKHBT(out, state_out, 16); 00213 00214 #else 00215 00216 state_in = __PKHBT(state_in >> 16, in, 16); 00217 state_out = __PKHBT(state_out >> 16, out, 16); 00218 00219 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00220 /* Decrement the loop counter */ 00221 sample--; 00222 00223 } 00224 00225 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00226 ** No loop unrolling is used. */ 00227 00228 if((blockSize & 0x1u) != 0u) 00229 { 00230 /* Read the input */ 00231 in = *pIn++; 00232 00233 /* out = b0 * x[n] + 0 * 0 */ 00234 #ifndef ARM_MATH_BIG_ENDIAN 00235 out = __SMUAD(b0, in); 00236 00237 #else 00238 00239 out = __SMUADX(b0, in); 00240 00241 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00242 00243 /* acc = b1 * x[n-1] + b2 * x[n-2] + out */ 00244 acc = __SMLALD(b1, state_in, out); 00245 00246 /* acc += a1 * y[n-1] + a2 * y[n-2] */ 00247 acc = __SMLALD(a1, state_out, acc); 00248 00249 /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */ 00250 00251 /* Calc lower part of acc */ 00252 acc_l = acc & 0xffffffff; 00253 00254 /* Calc upper part of acc */ 00255 acc_h = (acc >> 32) & 0xffffffff; 00256 00257 /* Apply shift for lower part of acc and upper part of acc */ 00258 out = (uint32_t)acc_l >> lShift | acc_h << uShift; 00259 00260 /* Saturare output */ 00261 #ifdef CCS 00262 out = __SSATA(out, 0, 16); 00263 #else 00264 out = __SSAT(out, 16); 00265 #endif 00266 00267 /* Store the output in the destination buffer. */ 00268 *pOut++ = (q15_t) out; 00269 00270 /* Every time after the output is computed state should be updated. */ 00271 /* The states should be updated as: */ 00272 /* Xn2 = Xn1 */ 00273 /* Xn1 = Xn */ 00274 /* Yn2 = Yn1 */ 00275 /* Yn1 = acc */ 00276 /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */ 00277 /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */ 00278 00279 #ifndef ARM_MATH_BIG_ENDIAN 00280 00281 state_in = __PKHBT(in, state_in, 16); 00282 state_out = __PKHBT(out, state_out, 16); 00283 00284 #else 00285 00286 state_in = __PKHBT(state_in >> 16, in, 16); 00287 state_out = __PKHBT(state_out >> 16, out, 16); 00288 00289 #endif /* #ifndef ARM_MATH_BIG_ENDIAN */ 00290 } 00291 00292 /* The first stage goes from the input wire to the output wire. */ 00293 /* Subsequent numStages occur in-place in the output wire */ 00294 pIn = pDst; 00295 00296 /* Reset the output pointer */ 00297 pOut = pDst; 00298 00299 /* Store the updated state variables back into the state array */ 00300 *__SIMD32(pState)++ = state_in; 00301 *__SIMD32(pState)++ = state_out; 00302 00303 /* Decrement the loop counter */ 00304 stage--; 00305 00306 } while(stage > 0u); 00307 } 00308 00309