00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_biquad_cascade_df1_32x64_q31.c 00009 * 00010 * Description: High precision Q31 Biquad cascade filter processing function 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * -------------------------------------------------------------------- */ 00024 #include "arm_math.h" 00025 00167 void arm_biquad_cas_df1_32x64_q31( 00168 const arm_biquad_cas_df1_32x64_ins_q31 * S, 00169 q31_t * pSrc, 00170 q31_t * pDst, 00171 uint32_t blockSize) 00172 { 00173 q31_t *pIn = pSrc; /* input pointer initialization */ 00174 q31_t *pOut = pDst; /* output pointer initialization */ 00175 q63_t *pState = S->pState; /* state pointer initialization */ 00176 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00177 q63_t acc; /* accumulator */ 00178 q31_t Xn1, Xn2; /* Input Filter state variables */ 00179 q63_t Yn1, Yn2; /* Output Filter state variables */ 00180 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00181 q31_t Xn; /* temporary input */ 00182 int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */ 00183 uint32_t sample, stage = S->numStages; /* loop counters */ 00184 q31_t acc_l, acc_h; /* temporary output */ 00185 uint32_t uShift = ((uint32_t) S->postShift + 1u); 00186 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */ 00187 00188 00189 do 00190 { 00191 /* Reading the coefficients */ 00192 b0 = *pCoeffs++; 00193 b1 = *pCoeffs++; 00194 b2 = *pCoeffs++; 00195 a1 = *pCoeffs++; 00196 a2 = *pCoeffs++; 00197 00198 /* Reading the state values */ 00199 Xn1 = (q31_t)(pState[0]); 00200 Xn2 = (q31_t)(pState[1]); 00201 Yn1 = pState[2]; 00202 Yn2 = pState[3]; 00203 00204 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00205 /* The variable acc hold output value that is being computed and 00206 * stored in the destination buffer 00207 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00208 */ 00209 00210 sample = blockSize >> 2u; 00211 00212 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00213 ** a second loop below computes the remaining 1 to 3 samples. */ 00214 while(sample > 0u) 00215 { 00216 /* Read the input */ 00217 Xn = *pIn++; 00218 00219 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00220 00221 /* acc = b0 * x[n] */ 00222 acc = (q63_t)Xn * b0; 00223 00224 /* acc += b1 * x[n-1] */ 00225 acc += (q63_t)Xn1 * b1; 00226 00227 /* acc += b[2] * x[n-2] */ 00228 acc += (q63_t)Xn2 * b2; 00229 00230 /* acc += a1 * y[n-1] */ 00231 acc += mult32x64(Yn1, a1); 00232 00233 /* acc += a2 * y[n-2] */ 00234 acc += mult32x64(Yn2, a2); 00235 00236 /* The result is converted to 1.63 , Yn2 variable is reused */ 00237 Yn2 = acc << shift; 00238 00239 /* Calc lower part of acc */ 00240 acc_l = acc & 0xffffffff; 00241 00242 /* Calc upper part of acc */ 00243 acc_h = (acc >> 32) & 0xffffffff; 00244 00245 /* Apply shift for lower part of acc and upper part of acc */ 00246 acc_h = (uint32_t)acc_l >> lShift | acc_h << uShift; 00247 00248 /* Store the output in the destination buffer in 1.31 format. */ 00249 *pOut = acc_h; 00250 00251 /* Read the second input into Xn2, to reuse the value */ 00252 Xn2 = *pIn++; 00253 00254 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00255 00256 /* acc += b1 * x[n-1] */ 00257 acc = (q63_t)Xn * b1; 00258 00259 /* acc = b0 * x[n] */ 00260 acc += (q63_t)Xn2* b0; 00261 00262 /* acc += b[2] * x[n-2] */ 00263 acc += (q63_t)Xn1 * b2; 00264 00265 /* acc += a1 * y[n-1] */ 00266 acc += mult32x64(Yn2, a1); 00267 00268 /* acc += a2 * y[n-2] */ 00269 acc += mult32x64(Yn1, a2); 00270 00271 /* The result is converted to 1.63, Yn1 variable is reused */ 00272 Yn1 = acc << shift; 00273 00274 /* Calc lower part of acc */ 00275 acc_l = acc & 0xffffffff; 00276 00277 /* Calc upper part of acc */ 00278 acc_h = (acc >> 32) & 0xffffffff; 00279 00280 /* Apply shift for lower part of acc and upper part of acc */ 00281 acc_h = (uint32_t)acc_l >> lShift | acc_h << uShift; 00282 00283 /* Read the third input into Xn1, to reuse the value */ 00284 Xn1 = *pIn++; 00285 00286 /* The result is converted to 1.31 */ 00287 /* Store the output in the destination buffer. */ 00288 *(pOut + 1u) = acc_h; 00289 00290 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00291 00292 /* acc = b0 * x[n] */ 00293 acc = (q63_t)Xn1 * b0; 00294 00295 /* acc += b1 * x[n-1] */ 00296 acc += (q63_t)Xn2 * b1; 00297 00298 /* acc += b[2] * x[n-2] */ 00299 acc += (q63_t)Xn * b2; 00300 00301 /* acc += a1 * y[n-1] */ 00302 acc += mult32x64(Yn1, a1); 00303 00304 /* acc += a2 * y[n-2] */ 00305 acc += mult32x64(Yn2, a2); 00306 00307 /* The result is converted to 1.63, Yn2 variable is reused */ 00308 Yn2 = acc << shift; 00309 00310 /* Calc lower part of acc */ 00311 acc_l = acc & 0xffffffff; 00312 00313 /* Calc upper part of acc */ 00314 acc_h = (acc >> 32) & 0xffffffff; 00315 00316 /* Apply shift for lower part of acc and upper part of acc */ 00317 acc_h = (uint32_t)acc_l >> lShift | acc_h << uShift; 00318 00319 /* Store the output in the destination buffer in 1.31 format. */ 00320 *(pOut + 2u) = acc_h; 00321 00322 /* Read the fourth input into Xn, to reuse the value */ 00323 Xn = *pIn++; 00324 00325 00326 00327 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00328 /* acc = b0 * x[n] */ 00329 acc = (q63_t)Xn * b0; 00330 00331 /* acc += b1 * x[n-1] */ 00332 acc += (q63_t)Xn1 * b1; 00333 00334 /* acc += b[2] * x[n-2] */ 00335 acc += (q63_t)Xn2 * b2; 00336 00337 /* acc += a1 * y[n-1] */ 00338 acc += mult32x64(Yn2, a1); 00339 00340 /* acc += a2 * y[n-2] */ 00341 acc += mult32x64(Yn1, a2); 00342 00343 /* The result is converted to 1.63, Yn1 variable is reused */ 00344 Yn1 = acc << shift; 00345 00346 /* Calc lower part of acc */ 00347 acc_l = acc & 0xffffffff; 00348 00349 /* Calc upper part of acc */ 00350 acc_h = (acc >> 32) & 0xffffffff; 00351 00352 /* Apply shift for lower part of acc and upper part of acc */ 00353 acc_h = (uint32_t)acc_l >> lShift | acc_h << uShift; 00354 00355 /* Store the output in the destination buffer in 1.31 format. */ 00356 *(pOut + 3u) = acc_h; 00357 00358 /* Every time after the output is computed state should be updated. */ 00359 /* The states should be updated as: */ 00360 /* Xn2 = Xn1 */ 00361 /* Xn1 = Xn */ 00362 /* Yn2 = Yn1 */ 00363 /* Yn1 = acc */ 00364 Xn2 = Xn1; 00365 Xn1 = Xn; 00366 00367 /* update output pointer */ 00368 pOut += 4u; 00369 00370 /* decrement the loop counter */ 00371 sample--; 00372 } 00373 00374 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00375 ** No loop unrolling is used. */ 00376 sample = (blockSize & 0x3u); 00377 00378 while(sample > 0u) 00379 { 00380 /* Read the input */ 00381 Xn = *pIn++; 00382 00383 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00384 00385 /* acc = b0 * x[n] */ 00386 acc = (q63_t)Xn * b0; 00387 /* acc += b1 * x[n-1] */ 00388 acc += (q63_t)Xn1 * b1; 00389 /* acc += b[2] * x[n-2] */ 00390 acc += (q63_t)Xn2 * b2; 00391 /* acc += a1 * y[n-1] */ 00392 acc += mult32x64(Yn1, a1); 00393 /* acc += a2 * y[n-2] */ 00394 acc += mult32x64(Yn2, a2); 00395 00396 /* Every time after the output is computed state should be updated. */ 00397 /* The states should be updated as: */ 00398 /* Xn2 = Xn1 */ 00399 /* Xn1 = Xn */ 00400 /* Yn2 = Yn1 */ 00401 /* Yn1 = acc */ 00402 Xn2 = Xn1; 00403 Xn1 = Xn; 00404 Yn2 = Yn1; 00405 00406 Yn1 = acc << shift; 00407 00408 /* Store the output in the destination buffer in 1.31 format. */ 00409 *pOut++ = (q31_t) (acc >> (32 - shift)); 00410 00411 /* decrement the loop counter */ 00412 sample--; 00413 } 00414 00415 /* The first stage output is given as input to the second stage. */ 00416 pIn = pDst; 00417 00418 /* Reset to destination buffer working pointer */ 00419 pOut = pDst; 00420 00421 /* Store the updated state variables back into the pState array */ 00422 *pState++ = (q63_t)Xn1; 00423 *pState++ = (q63_t)Xn2; 00424 *pState++ = Yn1; 00425 *pState++ = Yn2; 00426 00427 } while(--stage); 00428 } 00429