00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_fir_q31.c 00009 * 00010 * Description: Q31 FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * -------------------------------------------------------------------- */ 00024 00025 #include "arm_math.h" 00026 00056 void arm_fir_q31( 00057 const arm_fir_instance_q31 * S, 00058 q31_t * pSrc, 00059 q31_t * pDst, 00060 uint32_t blockSize) 00061 { 00062 q31_t *pState = S->pState; /* State pointer */ 00063 q31_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00064 q31_t *pStateCurnt; /* Points to the current sample of the state */ 00065 q31_t x0, x1, x2; /* Temporary variables to hold state */ 00066 q31_t c0; /* Temporary variable to hold coefficient value */ 00067 q31_t *px; /* Temporary pointer for state */ 00068 q31_t *pb; /* Temporary pointer for coefficient buffer */ 00069 q63_t acc0, acc1, acc2; /* Accumulators */ 00070 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00071 uint32_t i, tapCnt, blkCnt, tapCntN3; /* Loop counters */ 00072 00073 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00074 /* pStateCurnt points to the location where the new input data should be written */ 00075 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00076 00077 /* Apply loop unrolling and compute 4 output values simultaneously. 00078 * The variables acc0 ... acc3 hold output values that are being computed: 00079 * 00080 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00081 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00082 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00083 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00084 */ 00085 blkCnt = blockSize / 3; 00086 blockSize = blockSize - (3*blkCnt); 00087 00088 tapCnt = numTaps / 3; 00089 tapCntN3 = numTaps - (3*tapCnt); 00090 00091 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00092 ** a second loop below computes the remaining 1 to 3 samples. */ 00093 while(blkCnt > 0u) 00094 { 00095 /* Copy four new input samples into the state buffer */ 00096 *pStateCurnt++ = *pSrc++; 00097 *pStateCurnt++ = *pSrc++; 00098 *pStateCurnt++ = *pSrc++; 00099 00100 /* Set all accumulators to zero */ 00101 acc0 = 0; 00102 acc1 = 0; 00103 acc2 = 0; 00104 00105 /* Initialize state pointer */ 00106 px = pState; 00107 00108 /* Initialize coefficient pointer */ 00109 pb = pCoeffs; 00110 00111 /* Read the first three samples from the state buffer: 00112 * x[n-numTaps], x[n-numTaps-1] */ 00113 x0 = *(px++); 00114 x1 = *(px++); 00115 00116 /* Loop unrolling. Process 4 taps at a time. */ 00117 i = tapCnt; 00118 00119 while(i > 0u) 00120 { 00121 /* Read the b[numTaps] coefficient */ 00122 c0 = *pb; 00123 00124 /* Read x[n-numTaps-2] sample */ 00125 x2 = *(px++); 00126 00127 /* Perform the multiply-accumulates */ 00128 acc0 += ((q63_t) x0 * c0); 00129 acc1 += ((q63_t) x1 * c0); 00130 acc2 += ((q63_t) x2 * c0); 00131 00132 c0 = *(pb + 1u); 00133 x0 = *(px++); 00134 00135 /* Perform the multiply-accumulates */ 00136 acc0 += ((q63_t) x1 * c0); 00137 acc1 += ((q63_t) x2 * c0); 00138 acc2 += ((q63_t) x0 * c0); 00139 00140 c0 = *(pb + 2u); 00141 x1 = *(px++); 00142 pb += 3u; 00143 00144 acc0 += ((q63_t) x2 * c0); 00145 acc1 += ((q63_t) x0 * c0); 00146 acc2 += ((q63_t) x1 * c0); 00147 00148 i--; 00149 } 00150 00151 /* If the filter length is not a multiple of 2, compute the remaining filter taps */ 00152 00153 i=tapCntN3; 00154 00155 while(i > 0u) 00156 { 00157 /* Read coefficients */ 00158 c0 = *(pb++); 00159 00160 /* Fetch 1 state variable */ 00161 x2 = *(px++); 00162 00163 /* Perform the multiply-accumulates */ 00164 acc0 += ((q63_t) x0 * c0); 00165 acc1 += ((q63_t) x1 * c0); 00166 acc2 += ((q63_t) x2 * c0); 00167 00168 /* Reuse the present sample states for next sample */ 00169 x0 = x1; 00170 x1 = x2; 00171 00172 /* Decrement the loop counter */ 00173 i--; 00174 } 00175 00176 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00177 pState = pState + 3; 00178 00179 /* The results in the 4 accumulators are in 2.30 format. Convert to 1.31 00180 ** Then store the 4 outputs in the destination buffer. */ 00181 *pDst++ = (q31_t) (acc0 >> 31u); 00182 *pDst++ = (q31_t) (acc1 >> 31u); 00183 *pDst++ = (q31_t) (acc2 >> 31u); 00184 00185 /* Decrement the samples loop counter */ 00186 blkCnt--; 00187 } 00188 00189 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00190 ** No loop unrolling is used. */ 00191 00192 while(blockSize > 0u) 00193 { 00194 /* Copy one sample at a time into state buffer */ 00195 *pStateCurnt++ = *pSrc++; 00196 00197 /* Set the accumulator to zero */ 00198 acc0 = 0; 00199 00200 /* Initialize state pointer */ 00201 px = pState; 00202 00203 /* Initialize Coefficient pointer */ 00204 pb = (pCoeffs); 00205 00206 i = numTaps; 00207 00208 /* Perform the multiply-accumulates */ 00209 do 00210 { 00211 acc0 += (q63_t) * (px++) * (*(pb++)); 00212 i--; 00213 } while(i > 0u); 00214 00215 /* The result is in 2.62 format. Convert to 1.31 00216 ** Then store the output in the destination buffer. */ 00217 *pDst++ = (q31_t) (acc0 >> 31u); 00218 00219 /* Advance state pointer by 1 for the next sample */ 00220 pState = pState + 1; 00221 00222 /* Decrement the samples loop counter */ 00223 blockSize--; 00224 } 00225 00226 /* Processing is complete. 00227 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00228 ** This prepares the state buffer for the next function call. */ 00229 00230 /* Points to the start of the state buffer */ 00231 pStateCurnt = S->pState; 00232 00233 tapCnt = (numTaps - 1u) >> 2u; 00234 00235 /* copy data */ 00236 while(tapCnt > 0u) 00237 { 00238 *pStateCurnt++ = *pState++; 00239 *pStateCurnt++ = *pState++; 00240 *pStateCurnt++ = *pState++; 00241 *pStateCurnt++ = *pState++; 00242 00243 /* Decrement the loop counter */ 00244 tapCnt--; 00245 } 00246 00247 /* Calculate remaining number of copies */ 00248 tapCnt = (numTaps - 1u) % 0x4u; 00249 00250 /* Copy the remaining q31_t data */ 00251 while(tapCnt > 0u) 00252 { 00253 *pStateCurnt++ = *pState++; 00254 00255 /* Decrement the loop counter */ 00256 tapCnt--; 00257 } 00258 00259 } 00260