Cortex-R DSP Software Library: arm_fir

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------      
00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 
00003 *      
00004 * $Date:        15. December 2011   
00005 * $Revision:    V2.0.0  
00006 *      
00007 * Project:      Cortex-R DSP Library 
00008 * Title:        arm_fir_f32.c      
00009 *      
00010 * Description:  Floating-point FIR filter processing function.      
00011 *      
00012 * Target Processor:          Cortex-R4/R5
00013 *
00014 * Version 1.0.0 2011/03/08
00015 *     Alpha release.
00016 *
00017 * Version 1.0.1 2011/09/30
00018 *     Beta release.
00019 *
00020 * Version 2.0.0 2011/12/15
00021 *     Final release. 
00022 * 
00023 * -------------------------------------------------------------------- */     
00024      
00025 #include "arm_math.h"     
00026      
00116 void arm_fir_f32(     
00117   const arm_fir_instance_f32 * S,     
00118   float32_t * pSrc,     
00119   float32_t * pDst,     
00120   uint32_t blockSize)     
00121 {     
00122   float32_t *pState = S->pState;                 /* State pointer */     
00123   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */     
00124   float32_t *pStateCurnt;                        /* Points to the current sample of the state */     
00125   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */     
00126   float32_t acc0, acc1, acc2, acc3;              /* Accumulators */     
00127   float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */     
00128   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */     
00129   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */    
00130       
00131      
00132   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */     
00133   /* pStateCurnt points to the location where the new input data should be written */     
00134   pStateCurnt = &(S->pState[(numTaps - 1u)]);     
00135      
00136   /* Apply loop unrolling and compute 4 output values simultaneously.      
00137    * The variables acc0 ... acc3 hold output values that are being computed:      
00138    *      
00139    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]      
00140    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]      
00141    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]      
00142    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]      
00143    */     
00144   blkCnt = blockSize >> 2;     
00145      
00146   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.      
00147    ** a second loop below computes the remaining 1 to 3 samples. */     
00148   while(blkCnt > 0u)     
00149   {     
00150     /* Copy four new input samples into the state buffer */   
00151     *pStateCurnt++ = *pSrc++;     
00152     *pStateCurnt++ = *pSrc++;     
00153     *pStateCurnt++ = *pSrc++;     
00154     *pStateCurnt++ = *pSrc++;     
00155      
00156     /* Set all accumulators to zero */     
00157     acc0 = 0.0f;     
00158     acc1 = 0.0f;     
00159     acc2 = 0.0f;     
00160     acc3 = 0.0f;     
00161      
00162     /* Initialize state pointer */     
00163     px = pState;     
00164      
00165     /* Initialize coeff pointer */     
00166     pb = (pCoeffs);     
00167      
00168     /* Read the first three samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */     
00169     x0 = *px++;     
00170     x1 = *px++;     
00171     x2 = *px++;     
00172      
00173     /* Loop unrolling.  Process 4 taps at a time. */     
00174     tapCnt = numTaps >> 2u;     
00175      
00176     /* Loop over the number of taps.  Unroll by a factor of 4.      
00177      ** Repeat until we've computed numTaps-4 coefficients. */     
00178     while(tapCnt > 0u)     
00179     {     
00180       /* Read the b[numTaps-1] coefficient */     
00181       c0 = *(pb++);     
00182      
00183       /* Read x[n-numTaps-3] sample */     
00184       x3 = *(px++);     
00185      
00186       /* acc0 +=  b[numTaps-1] * x[n-numTaps] */     
00187       acc0 += x0 * c0;     
00188      
00189       /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */     
00190       acc1 += x1 * c0;     
00191      
00192       /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */     
00193       acc2 += x2 * c0;     
00194      
00195       /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */     
00196       acc3 += x3 * c0;     
00197      
00198       /* Read the b[numTaps-2] coefficient */     
00199       c0 = *(pb++);     
00200      
00201       /* Read x[n-numTaps-4] sample */     
00202       x0 = *(px++);     
00203      
00204       /* Perform the multiply-accumulate */     
00205       acc0 += x1 * c0;     
00206       acc1 += x2 * c0;     
00207       acc2 += x3 * c0;     
00208       acc3 += x0 * c0;     
00209      
00210       /* Read the b[numTaps-3] coefficient */     
00211       c0 = *(pb++);     
00212      
00213       /* Read x[n-numTaps-5] sample */     
00214       x1 = *(px++);     
00215      
00216       /* Perform the multiply-accumulates */     
00217       acc0 += x2 * c0;     
00218       acc1 += x3 * c0;     
00219       acc2 += x0 * c0;     
00220       acc3 += x1 * c0;     
00221      
00222       /* Read the b[numTaps-4] coefficient */     
00223       c0 = *(pb++);     
00224      
00225       /* Read x[n-numTaps-6] sample */     
00226       x2 = *(px++);     
00227      
00228       /* Perform the multiply-accumulates */     
00229       acc0 += x3 * c0;     
00230       acc1 += x0 * c0;     
00231       acc2 += x1 * c0;     
00232       acc3 += x2 * c0;     
00233      
00234       tapCnt--;     
00235     }     
00236      
00237     /* If the filter length is not a multiple of 4, compute the remaining filter taps */     
00238     tapCnt = numTaps % 0x4u;     
00239      
00240     while(tapCnt > 0u)     
00241     {     
00242       /* Read coefficients */     
00243       c0 = *(pb++);     
00244      
00245       /* Fetch 1 state variable */     
00246       x3 = *(px++);     
00247      
00248       /* Perform the multiply-accumulates */     
00249       acc0 += x0 * c0;     
00250       acc1 += x1 * c0;     
00251       acc2 += x2 * c0;     
00252       acc3 += x3 * c0;     
00253      
00254       /* Reuse the present sample states for next sample */     
00255       x0 = x1;     
00256       x1 = x2;     
00257       x2 = x3;     
00258      
00259       /* Decrement the loop counter */     
00260       tapCnt--;     
00261     }     
00262      
00263     /* Advance the state pointer by 4 to process the next group of 4 samples */     
00264     pState = pState + 4;     
00265      
00266     /* The results in the 4 accumulators, store in the destination buffer. */     
00267     *pDst++ = acc0;     
00268     *pDst++ = acc1;     
00269     *pDst++ = acc2;     
00270     *pDst++ = acc3;     
00271      
00272     blkCnt--;     
00273   }     
00274      
00275   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.      
00276    ** No loop unrolling is used. */     
00277   blkCnt = blockSize % 0x4u;     
00278      
00279   while(blkCnt > 0u)     
00280   {     
00281     /* Copy one sample at a time into state buffer */     
00282     *pStateCurnt++ = *pSrc++;     
00283      
00284     /* Set the accumulator to zero */     
00285     acc0 = 0.0f;     
00286      
00287     /* Initialize state pointer */     
00288     px = pState;     
00289      
00290     /* Initialize Coefficient pointer */     
00291     pb = (pCoeffs);     
00292      
00293     /* Initialize loop counter */  
00294     i = numTaps;     
00295      
00296     /* Perform the multiply-accumulates */     
00297     do     
00298     {     
00299       acc0 += *px++ * *pb++;     
00300       i--;     
00301      
00302     } while(i > 0u);     
00303      
00304     /* The result is store in the destination buffer. */     
00305     *pDst++ = acc0;     
00306      
00307     /* Advance state pointer by 1 for the next sample */     
00308     pState = pState + 1;     
00309      
00310     blkCnt--;     
00311   }     
00312      
00313   /* Processing is complete.      
00314    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.      
00315    ** This prepares the state buffer for the next function call. */     
00316      
00317   /* Points to the start of the state buffer */     
00318   pStateCurnt = S->pState;     
00319      
00320   tapCnt = (numTaps - 1u) >> 2u;     
00321      
00322   /* copy data */     
00323   while(tapCnt > 0u)     
00324   {   
00325      
00326     x1 =  *pState++;    
00327     x2 =  *pState++;   
00328     *pStateCurnt++ = x1;     
00329     *pStateCurnt++ = x2;   
00330        
00331     x1 =  *pState++;    
00332     x2 =  *pState++;   
00333     *pStateCurnt++ = x1;     
00334     *pStateCurnt++ = x2;       
00335    
00336      
00337     /* Decrement the loop counter */     
00338     tapCnt--;     
00339   }     
00340      
00341   /* Calculate remaining number of copies */     
00342   tapCnt = (numTaps - 1u) % 0x4u;     
00343      
00344   /* Copy the remaining q31_t data */     
00345   while(tapCnt > 0u)     
00346   {     
00347     *pStateCurnt++ = *pState++;     
00348      
00349     /* Decrement the loop counter */     
00350     tapCnt--;     
00351   }     
00352 }     
00353