00001 /*----------------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_fir_interpolate_q15.c 00009 * 00010 * Description: Q15 FIR interpolation. 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * ---------------------------------------------------------------------------*/ 00024 #include "arm_math.h" 00025 00054 void arm_fir_interpolate_q15( 00055 const arm_fir_interpolate_instance_q15 * S, 00056 q15_t * pSrc, 00057 q15_t * pDst, 00058 uint32_t blockSize) 00059 { 00060 q15_t *pState = S->pState; /* State pointer */ 00061 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00062 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00063 q15_t *ptr1, *ptr2; /* Temporary pointers for state and coefficient buffers */ 00064 q63_t sum0; /* Accumulators */ 00065 q15_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00066 uint32_t i, blkCnt, j, tapCnt; /* Loop counters */ 00067 uint16_t phaseLen = S->phaseLength; /* Length of each polyphase filter component */ 00068 uint32_t blkCntN2; 00069 q63_t acc0, acc1; 00070 q15_t x1; 00071 00072 /* S->pState buffer contains previous frame (phaseLen - 1) samples */ 00073 /* pStateCurnt points to the location where the new input data should be written */ 00074 pStateCurnt = S->pState + ((q31_t) phaseLen - 1); 00075 00076 /* Initialise blkCnt */ 00077 blkCnt = blockSize / 2; 00078 blkCntN2 = blockSize - (2*blkCnt); 00079 00080 /* Samples loop unrolled by 2 */ 00081 while(blkCnt > 0u) 00082 { 00083 /* Copy new input sample into the state buffer */ 00084 *pStateCurnt++ = *pSrc++; 00085 *pStateCurnt++ = *pSrc++; 00086 00087 /* Address modifier index of coefficient buffer */ 00088 j = 1u; 00089 00090 /* Loop over the Interpolation factor. */ 00091 i = (S->L); 00092 00093 while(i > 0u) 00094 { 00095 /* Set accumulator to zero */ 00096 acc0 = 0; 00097 acc1 = 0; 00098 00099 /* Initialize state pointer */ 00100 ptr1 = pState; 00101 00102 /* Initialize coefficient pointer */ 00103 ptr2 = pCoeffs + (S->L - j); 00104 00105 /* Loop over the polyPhase length. Unroll by a factor of 4. 00106 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00107 tapCnt = phaseLen >> 2u; 00108 00109 x0 = *(ptr1++); 00110 00111 while(tapCnt > 0u) 00112 { 00113 00114 /* Read the input sample */ 00115 x1 = *(ptr1++); 00116 00117 /* Read the coefficient */ 00118 c0 = *(ptr2); 00119 00120 /* Perform the multiply-accumulate */ 00121 acc0 += (q63_t)x0 * c0; 00122 acc1 += (q63_t)x1 * c0; 00123 00124 00125 /* Read the coefficient */ 00126 c0 = *(ptr2 + S->L); 00127 00128 /* Read the input sample */ 00129 x0 = *(ptr1++); 00130 00131 /* Perform the multiply-accumulate */ 00132 acc0 += (q63_t)x1 * c0; 00133 acc1 += (q63_t)x0 * c0; 00134 00135 00136 /* Read the coefficient */ 00137 c0 = *(ptr2 + S->L * 2); 00138 00139 /* Read the input sample */ 00140 x1 = *(ptr1++); 00141 00142 /* Perform the multiply-accumulate */ 00143 acc0 += (q63_t)x0 * c0; 00144 acc1 += (q63_t)x1 * c0; 00145 00146 /* Read the coefficient */ 00147 c0 = *(ptr2 + S->L * 3); 00148 00149 /* Read the input sample */ 00150 x0 = *(ptr1++); 00151 00152 /* Perform the multiply-accumulate */ 00153 acc0 += (q63_t)x1 * c0; 00154 acc1 += (q63_t)x0 * c0; 00155 00156 00157 /* Upsampling is done by stuffing L-1 zeros between each sample. 00158 * So instead of multiplying zeros with coefficients, 00159 * Increment the coefficient pointer by interpolation factor times. */ 00160 ptr2 += 4 * S->L; 00161 00162 /* Decrement the loop counter */ 00163 tapCnt--; 00164 } 00165 00166 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00167 tapCnt = phaseLen % 0x4u; 00168 00169 while(tapCnt > 0u) 00170 { 00171 00172 /* Read the input sample */ 00173 x1 = *(ptr1++); 00174 00175 /* Read the coefficient */ 00176 c0 = *(ptr2); 00177 00178 /* Perform the multiply-accumulate */ 00179 acc0 += (q63_t)x0 * c0; 00180 acc1 += (q63_t)x1 * c0; 00181 00182 /* Increment the coefficient pointer by interpolation factor times. */ 00183 ptr2 += S->L; 00184 00185 /* update states for next sample processing */ 00186 x0 = x1; 00187 00188 /* Decrement the loop counter */ 00189 tapCnt--; 00190 } 00191 00192 /* The result is in the accumulator, store in the destination buffer. */ 00193 #ifdef CCS 00194 *pDst = (q15_t) __SSATA(acc0, 15, 16); 00195 *(pDst + S->L) = (q15_t) __SSATA(acc1, 15, 16); 00196 #else 00197 *pDst = (q15_t) (__SSAT((acc0 >> 15), 16)); 00198 *(pDst + S->L) = (q15_t) (__SSAT((acc1 >> 15), 16)); 00199 #endif 00200 pDst++; 00201 00202 /* Increment the address modifier index of coefficient buffer */ 00203 j++; 00204 00205 /* Decrement the loop counter */ 00206 i--; 00207 } 00208 00209 /* Advance the state pointer by 1 00210 * to process the next group of interpolation factor number samples */ 00211 pState = pState + 2; 00212 00213 pDst += S->L; 00214 00215 /* Decrement the loop counter */ 00216 blkCnt--; 00217 } 00218 00219 /* If the blockSize is not a multiple of 2, compute any remaining output samples here. 00220 ** No loop unrolling is used. */ 00221 blkCnt = blkCntN2; 00222 00223 /* Loop over the blockSize. */ 00224 while(blkCnt > 0u) 00225 { 00226 /* Copy new input sample into the state buffer */ 00227 *pStateCurnt++ = *pSrc++; 00228 00229 /* Address modifier index of coefficient buffer */ 00230 j = 1u; 00231 00232 /* Loop over the Interpolation factor. */ 00233 i = S->L; 00234 while(i > 0u) 00235 { 00236 /* Set accumulator to zero */ 00237 sum0 = 0; 00238 00239 /* Initialize state pointer */ 00240 ptr1 = pState; 00241 00242 /* Initialize coefficient pointer */ 00243 ptr2 = pCoeffs + (S->L - j); 00244 00245 /* Loop over the polyPhase length. Unroll by a factor of 4. 00246 ** Repeat until we've computed numTaps-(4*S->L) coefficients. */ 00247 tapCnt = phaseLen >> 2; 00248 while(tapCnt > 0u) 00249 { 00250 00251 /* Read the coefficient */ 00252 c0 = *(ptr2); 00253 00254 /* Upsampling is done by stuffing L-1 zeros between each sample. 00255 * So instead of multiplying zeros with coefficients, 00256 * Increment the coefficient pointer by interpolation factor times. */ 00257 ptr2 += S->L; 00258 00259 /* Read the input sample */ 00260 x0 = *(ptr1++); 00261 00262 /* Perform the multiply-accumulate */ 00263 sum0 += (q63_t) x0 *c0; 00264 00265 /* Read the coefficient */ 00266 c0 = *(ptr2); 00267 00268 /* Increment the coefficient pointer by interpolation factor times. */ 00269 ptr2 += S->L; 00270 00271 /* Read the input sample */ 00272 x0 = *(ptr1++); 00273 00274 /* Perform the multiply-accumulate */ 00275 sum0 += (q63_t) x0 *c0; 00276 00277 /* Read the coefficient */ 00278 c0 = *(ptr2); 00279 00280 /* Increment the coefficient pointer by interpolation factor times. */ 00281 ptr2 += S->L; 00282 00283 /* Read the input sample */ 00284 x0 = *(ptr1++); 00285 00286 /* Perform the multiply-accumulate */ 00287 sum0 += (q63_t) x0 *c0; 00288 00289 /* Read the coefficient */ 00290 c0 = *(ptr2); 00291 00292 /* Increment the coefficient pointer by interpolation factor times. */ 00293 ptr2 += S->L; 00294 00295 /* Read the input sample */ 00296 x0 = *(ptr1++); 00297 00298 /* Perform the multiply-accumulate */ 00299 sum0 += (q63_t) x0 *c0; 00300 00301 /* Decrement the loop counter */ 00302 tapCnt--; 00303 } 00304 00305 /* If the polyPhase length is not a multiple of 4, compute the remaining filter taps */ 00306 tapCnt = phaseLen & 0x3u; 00307 00308 while(tapCnt > 0u) 00309 { 00310 /* Read the coefficient */ 00311 c0 = *(ptr2); 00312 00313 /* Increment the coefficient pointer by interpolation factor times. */ 00314 ptr2 += S->L; 00315 00316 /* Read the input sample */ 00317 x0 = *(ptr1++); 00318 00319 /* Perform the multiply-accumulate */ 00320 sum0 += (q63_t) x0 *c0; 00321 00322 /* Decrement the loop counter */ 00323 tapCnt--; 00324 } 00325 00326 /* The result is in the accumulator, store in the destination buffer. */ 00327 #ifdef CCS 00328 00329 *pDst++ = (q15_t) (__SSATA(sum0, 15, 16)); 00330 00331 #else 00332 00333 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16)); 00334 00335 #endif /* Increment the address modifier index of coefficient buffer */ 00336 j++; 00337 00338 /* Decrement the loop counter */ 00339 i--; 00340 } 00341 00342 /* Advance the state pointer by 1 00343 * to process the next group of interpolation factor number samples */ 00344 pState = pState + 1; 00345 00346 /* Decrement the loop counter */ 00347 blkCnt--; 00348 } 00349 00350 /* Processing is complete. 00351 ** Now copy the last phaseLen - 1 samples to the satrt of the state buffer. 00352 ** This prepares the state buffer for the next function call. */ 00353 00354 /* Points to the start of the state buffer */ 00355 pStateCurnt = S->pState; 00356 00357 tapCnt = (phaseLen - 1u) >> 2u; 00358 00359 /* copy data */ 00360 while(tapCnt > 0u) 00361 { 00362 *pStateCurnt++ = *pState++; 00363 *pStateCurnt++ = *pState++; 00364 *pStateCurnt++ = *pState++; 00365 *pStateCurnt++ = *pState++; 00366 00367 /* Decrement the loop counter */ 00368 tapCnt--; 00369 } 00370 00371 tapCnt = (phaseLen - 1u) % 0x04u; 00372 00373 /* copy data */ 00374 while(tapCnt > 0u) 00375 { 00376 *pStateCurnt++ = *pState++; 00377 00378 /* Decrement the loop counter */ 00379 tapCnt--; 00380 } 00381 00382 } 00383 00384