00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_fir_f32.c 00009 * 00010 * Description: Floating-point FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * -------------------------------------------------------------------- */ 00024 00025 #include "arm_math.h" 00026 00116 void arm_fir_f32( 00117 const arm_fir_instance_f32 * S, 00118 float32_t * pSrc, 00119 float32_t * pDst, 00120 uint32_t blockSize) 00121 { 00122 float32_t *pState = S->pState; /* State pointer */ 00123 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00124 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00125 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00126 float32_t acc0, acc1, acc2, acc3; /* Accumulators */ 00127 float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */ 00128 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00129 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00130 00131 00132 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00133 /* pStateCurnt points to the location where the new input data should be written */ 00134 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00135 00136 /* Apply loop unrolling and compute 4 output values simultaneously. 00137 * The variables acc0 ... acc3 hold output values that are being computed: 00138 * 00139 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00140 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00141 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00142 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00143 */ 00144 blkCnt = blockSize >> 2; 00145 00146 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00147 ** a second loop below computes the remaining 1 to 3 samples. */ 00148 while(blkCnt > 0u) 00149 { 00150 /* Copy four new input samples into the state buffer */ 00151 *pStateCurnt++ = *pSrc++; 00152 *pStateCurnt++ = *pSrc++; 00153 *pStateCurnt++ = *pSrc++; 00154 *pStateCurnt++ = *pSrc++; 00155 00156 /* Set all accumulators to zero */ 00157 acc0 = 0.0f; 00158 acc1 = 0.0f; 00159 acc2 = 0.0f; 00160 acc3 = 0.0f; 00161 00162 /* Initialize state pointer */ 00163 px = pState; 00164 00165 /* Initialize coeff pointer */ 00166 pb = (pCoeffs); 00167 00168 /* Read the first three samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00169 x0 = *px++; 00170 x1 = *px++; 00171 x2 = *px++; 00172 00173 /* Loop unrolling. Process 4 taps at a time. */ 00174 tapCnt = numTaps >> 2u; 00175 00176 /* Loop over the number of taps. Unroll by a factor of 4. 00177 ** Repeat until we've computed numTaps-4 coefficients. */ 00178 while(tapCnt > 0u) 00179 { 00180 /* Read the b[numTaps-1] coefficient */ 00181 c0 = *(pb++); 00182 00183 /* Read x[n-numTaps-3] sample */ 00184 x3 = *(px++); 00185 00186 /* acc0 += b[numTaps-1] * x[n-numTaps] */ 00187 acc0 += x0 * c0; 00188 00189 /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ 00190 acc1 += x1 * c0; 00191 00192 /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ 00193 acc2 += x2 * c0; 00194 00195 /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ 00196 acc3 += x3 * c0; 00197 00198 /* Read the b[numTaps-2] coefficient */ 00199 c0 = *(pb++); 00200 00201 /* Read x[n-numTaps-4] sample */ 00202 x0 = *(px++); 00203 00204 /* Perform the multiply-accumulate */ 00205 acc0 += x1 * c0; 00206 acc1 += x2 * c0; 00207 acc2 += x3 * c0; 00208 acc3 += x0 * c0; 00209 00210 /* Read the b[numTaps-3] coefficient */ 00211 c0 = *(pb++); 00212 00213 /* Read x[n-numTaps-5] sample */ 00214 x1 = *(px++); 00215 00216 /* Perform the multiply-accumulates */ 00217 acc0 += x2 * c0; 00218 acc1 += x3 * c0; 00219 acc2 += x0 * c0; 00220 acc3 += x1 * c0; 00221 00222 /* Read the b[numTaps-4] coefficient */ 00223 c0 = *(pb++); 00224 00225 /* Read x[n-numTaps-6] sample */ 00226 x2 = *(px++); 00227 00228 /* Perform the multiply-accumulates */ 00229 acc0 += x3 * c0; 00230 acc1 += x0 * c0; 00231 acc2 += x1 * c0; 00232 acc3 += x2 * c0; 00233 00234 tapCnt--; 00235 } 00236 00237 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00238 tapCnt = numTaps % 0x4u; 00239 00240 while(tapCnt > 0u) 00241 { 00242 /* Read coefficients */ 00243 c0 = *(pb++); 00244 00245 /* Fetch 1 state variable */ 00246 x3 = *(px++); 00247 00248 /* Perform the multiply-accumulates */ 00249 acc0 += x0 * c0; 00250 acc1 += x1 * c0; 00251 acc2 += x2 * c0; 00252 acc3 += x3 * c0; 00253 00254 /* Reuse the present sample states for next sample */ 00255 x0 = x1; 00256 x1 = x2; 00257 x2 = x3; 00258 00259 /* Decrement the loop counter */ 00260 tapCnt--; 00261 } 00262 00263 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00264 pState = pState + 4; 00265 00266 /* The results in the 4 accumulators, store in the destination buffer. */ 00267 *pDst++ = acc0; 00268 *pDst++ = acc1; 00269 *pDst++ = acc2; 00270 *pDst++ = acc3; 00271 00272 blkCnt--; 00273 } 00274 00275 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00276 ** No loop unrolling is used. */ 00277 blkCnt = blockSize % 0x4u; 00278 00279 while(blkCnt > 0u) 00280 { 00281 /* Copy one sample at a time into state buffer */ 00282 *pStateCurnt++ = *pSrc++; 00283 00284 /* Set the accumulator to zero */ 00285 acc0 = 0.0f; 00286 00287 /* Initialize state pointer */ 00288 px = pState; 00289 00290 /* Initialize Coefficient pointer */ 00291 pb = (pCoeffs); 00292 00293 /* Initialize loop counter */ 00294 i = numTaps; 00295 00296 /* Perform the multiply-accumulates */ 00297 do 00298 { 00299 acc0 += *px++ * *pb++; 00300 i--; 00301 00302 } while(i > 0u); 00303 00304 /* The result is store in the destination buffer. */ 00305 *pDst++ = acc0; 00306 00307 /* Advance state pointer by 1 for the next sample */ 00308 pState = pState + 1; 00309 00310 blkCnt--; 00311 } 00312 00313 /* Processing is complete. 00314 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00315 ** This prepares the state buffer for the next function call. */ 00316 00317 /* Points to the start of the state buffer */ 00318 pStateCurnt = S->pState; 00319 00320 tapCnt = (numTaps - 1u) >> 2u; 00321 00322 /* copy data */ 00323 while(tapCnt > 0u) 00324 { 00325 00326 x1 = *pState++; 00327 x2 = *pState++; 00328 *pStateCurnt++ = x1; 00329 *pStateCurnt++ = x2; 00330 00331 x1 = *pState++; 00332 x2 = *pState++; 00333 *pStateCurnt++ = x1; 00334 *pStateCurnt++ = x2; 00335 00336 00337 /* Decrement the loop counter */ 00338 tapCnt--; 00339 } 00340 00341 /* Calculate remaining number of copies */ 00342 tapCnt = (numTaps - 1u) % 0x4u; 00343 00344 /* Copy the remaining q31_t data */ 00345 while(tapCnt > 0u) 00346 { 00347 *pStateCurnt++ = *pState++; 00348 00349 /* Decrement the loop counter */ 00350 tapCnt--; 00351 } 00352 } 00353