00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_biquad_cascade_df1_q31.c 00009 * 00010 * Description: Processing function for the 00011 * Q31 Biquad cascade filter 00012 * 00013 * Target Processor: Cortex-R4/R5 00014 * 00015 * Version 1.0.0 2011/03/08 00016 * Alpha release. 00017 * 00018 * Version 1.0.1 2011/09/30 00019 * Beta release. 00020 * 00021 * Version 2.0.0 2011/12/15 00022 * Final release. 00023 * 00024 * -------------------------------------------------------------------- */ 00025 00026 #include "arm_math.h" 00027 00058 void arm_biquad_cascade_df1_q31( 00059 const arm_biquad_casd_df1_inst_q31 * S, 00060 q31_t * pSrc, 00061 q31_t * pDst, 00062 uint32_t blockSize) 00063 { 00064 q63_t acc; /* accumulator */ 00065 uint32_t uShift = ((uint32_t) S->postShift + 1u); 00066 uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */ 00067 q31_t *pIn = pSrc; /* input pointer initialization */ 00068 q31_t *pOut = pDst; /* output pointer initialization */ 00069 q31_t *pState = S->pState; /* pState pointer initialization */ 00070 q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */ 00071 q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */ 00072 q31_t b0, b1, b2, a1, a2; /* Filter coefficients */ 00073 q31_t Xn; 00074 q31_t acc_l, acc_h; /* temporary input */ 00075 uint32_t sample, stage = S->numStages; /* loop counters */ 00076 00077 do 00078 { 00079 /* Reading the coefficients */ 00080 b0 = *pCoeffs++; 00081 b1 = *pCoeffs++; 00082 b2 = *pCoeffs++; 00083 a1 = *pCoeffs++; 00084 a2 = *pCoeffs++; 00085 00086 /* Reading the state values */ 00087 Xn1 = pState[0]; 00088 Xn2 = pState[1]; 00089 Yn1 = pState[2]; 00090 Yn2 = pState[3]; 00091 00092 /* Apply loop unrolling and compute 4 output values simultaneously. */ 00093 /* The variable acc hold output values that are being computed: 00094 * 00095 * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] 00096 */ 00097 00098 sample = blockSize >> 2u; 00099 00100 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00101 ** a second loop below computes the remaining 1 to 3 samples. */ 00102 while(sample > 0u) 00103 { 00104 /* Read the input */ 00105 Xn = *pIn; 00106 00107 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00108 00109 /* acc = b0 * x[n] */ 00110 acc = (q63_t) b1 * Xn1; 00111 /* acc += b1 * x[n-1] */ 00112 acc += (q63_t) b0 *Xn; 00113 /* acc += b[2] * x[n-2] */ 00114 acc += (q63_t) b2 *Xn2; 00115 /* acc += a1 * y[n-1] */ 00116 acc += (q63_t) a1 *Yn1; 00117 /* acc += a2 * y[n-2] */ 00118 acc += (q63_t) a2 *Yn2; 00119 00120 /* The result is converted to 1.31 , Yn2 variable is reused */ 00121 00122 /* Calc lower part of acc */ 00123 acc_l = acc & 0xffffffff; 00124 00125 /* Calc upper part of acc */ 00126 acc_h = (acc >> 32) & 0xffffffff; 00127 00128 /* Read the second input */ 00129 Xn2 = *(pIn + 1u); 00130 00131 /* Apply shift for lower part of acc and upper part of acc */ 00132 Yn2 = (uint32_t)acc_l >> lShift | acc_h << uShift; 00133 00134 /* Store the output in the destination buffer. */ 00135 *pOut = Yn2; 00136 00137 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00138 00139 /* acc = b0 * x[n] */ 00140 acc = (q63_t) b0 *Xn2; 00141 /* acc += b1 * x[n-1] */ 00142 acc += (q63_t) b1 *Xn; 00143 /* acc += b[2] * x[n-2] */ 00144 acc += (q63_t) b2 *Xn1; 00145 /* acc += a1 * y[n-1] */ 00146 acc += (q63_t) a1 *Yn2; 00147 /* acc += a2 * y[n-2] */ 00148 acc += (q63_t) a2 *Yn1; 00149 00150 /* The result is converted to 1.31, Yn1 variable is reused */ 00151 00152 /* Calc lower part of acc */ 00153 acc_l = acc & 0xffffffff; 00154 00155 /* Calc upper part of acc */ 00156 acc_h = (acc >> 32) & 0xffffffff; 00157 00158 /* Read the third input */ 00159 Xn1 = *(pIn + 2u); 00160 00161 /* Apply shift for lower part of acc and upper part of acc */ 00162 Yn1 = (uint32_t)acc_l >> lShift | acc_h << uShift; 00163 00164 /* Store the output in the destination buffer. */ 00165 *(pOut + 1u) = Yn1; 00166 00167 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00168 00169 /* acc = b0 * x[n] */ 00170 acc = (q63_t) b0 *Xn1; 00171 /* acc += b1 * x[n-1] */ 00172 acc += (q63_t) b1 *Xn2; 00173 /* acc += b[2] * x[n-2] */ 00174 acc += (q63_t) b2 *Xn; 00175 /* acc += a1 * y[n-1] */ 00176 acc += (q63_t) a1 *Yn1; 00177 /* acc += a2 * y[n-2] */ 00178 acc += (q63_t) a2 *Yn2; 00179 00180 00181 /* The result is converted to 1.31, Yn2 variable is reused */ 00182 00183 /* Calc lower part of acc */ 00184 acc_l = acc & 0xffffffff; 00185 00186 /* Calc upper part of acc */ 00187 acc_h = (acc >> 32) & 0xffffffff; 00188 00189 /* Read the forth input */ 00190 Xn = *(pIn + 3u); 00191 00192 /* Apply shift for lower part of acc and upper part of acc */ 00193 Yn2 = (uint32_t)acc_l >> lShift | acc_h << uShift; 00194 00195 /* Store the output in the destination buffer. */ 00196 *(pOut + 2u) = Yn2; 00197 00198 pIn += 4u; 00199 00200 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00201 00202 /* acc = b0 * x[n] */ 00203 acc = (q63_t) b0 *Xn; 00204 /* acc += b1 * x[n-1] */ 00205 acc += (q63_t) b1 *Xn1; 00206 /* acc += b[2] * x[n-2] */ 00207 acc += (q63_t) b2 *Xn2; 00208 /* acc += a1 * y[n-1] */ 00209 acc += (q63_t) a1 *Yn2; 00210 /* acc += a2 * y[n-2] */ 00211 acc += (q63_t) a2 *Yn1; 00212 00213 /* The result is converted to 1.31, Yn1 variable is reused */ 00214 00215 /* Calc lower part of acc */ 00216 acc_l = acc & 0xffffffff; 00217 00218 /* Calc upper part of acc */ 00219 acc_h = (acc >> 32) & 0xffffffff; 00220 00221 /* Every time after the output is computed state should be updated. */ 00222 /* The states should be updated as: */ 00223 /* Xn2 = Xn1 */ 00224 /* Xn1 = Xn */ 00225 /* Yn2 = Yn1 */ 00226 /* Yn1 = acc */ 00227 Xn2 = Xn1; 00228 Xn1 = Xn; 00229 00230 /* Apply shift for lower part of acc and upper part of acc */ 00231 Yn1 = (uint32_t)acc_l >> lShift | acc_h << uShift; 00232 00233 /* Store the output in the destination buffer. */ 00234 *(pOut + 3u) = Yn1; 00235 00236 pOut += 4u; 00237 00238 /* decrement the loop counter */ 00239 sample--; 00240 } 00241 00242 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00243 ** No loop unrolling is used. */ 00244 sample = (blockSize & 0x3u); 00245 00246 while(sample > 0u) 00247 { 00248 /* Read the input */ 00249 Xn = *pIn++; 00250 00251 /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */ 00252 00253 /* acc = b0 * x[n] */ 00254 acc = (q63_t) b0 *Xn; 00255 /* acc += b1 * x[n-1] */ 00256 acc += (q63_t) b1 *Xn1; 00257 /* acc += b[2] * x[n-2] */ 00258 acc += (q63_t) b2 *Xn2; 00259 /* acc += a1 * y[n-1] */ 00260 acc += (q63_t) a1 *Yn1; 00261 /* acc += a2 * y[n-2] */ 00262 acc += (q63_t) a2 *Yn2; 00263 00264 /* The result is converted to 1.31 */ 00265 acc = acc >> lShift; 00266 00267 /* Every time after the output is computed state should be updated. */ 00268 /* The states should be updated as: */ 00269 /* Xn2 = Xn1 */ 00270 /* Xn1 = Xn */ 00271 /* Yn2 = Yn1 */ 00272 /* Yn1 = acc */ 00273 Xn2 = Xn1; 00274 Xn1 = Xn; 00275 Yn2 = Yn1; 00276 Yn1 = (q31_t) acc; 00277 00278 /* Store the output in the destination buffer. */ 00279 *pOut++ = (q31_t) acc; 00280 00281 /* decrement the loop counter */ 00282 sample--; 00283 } 00284 00285 /* The first stage goes from the input buffer to the output buffer. */ 00286 /* Subsequent stages occur in-place in the output buffer */ 00287 pIn = pDst; 00288 00289 /* Reset to destination pointer */ 00290 pOut = pDst; 00291 00292 /* Store the updated state variables back into the pState array */ 00293 *pState++ = Xn1; 00294 *pState++ = Xn2; 00295 *pState++ = Yn1; 00296 *pState++ = Yn2; 00297 00298 } while(--stage); 00299 } 00300