00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "arm_math.h"
00026
00053 void arm_fir_q7(
00054 const arm_fir_instance_q7 * S,
00055 q7_t * pSrc,
00056 q7_t * pDst,
00057 uint32_t blockSize)
00058 {
00059 uint32_t numTaps = S->numTaps;
00060 uint32_t tapCnt, blkCnt, i;
00061 q7_t *pState = S->pState;
00062 q7_t *pCoeffs = S->pCoeffs;
00063 q7_t *px7, *pb7;
00064 q7_t *pStateCurnt;
00065 q31_t x20, b20, x31, b31, xin1, xin2;
00066 q31_t acc0, acc1, acc2, acc3;
00067 q31_t x0;
00068 q7_t x1, x2, x3, x4, c0;
00069
00070
00071
00072 pStateCurnt = S->pState + (numTaps - 1u);
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082 blkCnt = blockSize >> 2;
00083
00084
00085
00086 while(blkCnt > 0u)
00087 {
00088
00089 *pStateCurnt++ = *pSrc++;
00090 *pStateCurnt++ = *pSrc++;
00091 *pStateCurnt++ = *pSrc++;
00092 *pStateCurnt++ = *pSrc++;
00093
00094
00095 acc0 = 0;
00096 acc1 = 0;
00097 acc2 = 0;
00098 acc3 = 0;
00099
00100
00101
00102 px7 = pState;
00103
00104
00105
00106 pb7 = pCoeffs;
00107
00108
00109
00110 xin1 = *__SIMD32(px7)++;
00111
00112
00113 tapCnt = numTaps >> 2u;
00114
00115
00116 i = tapCnt;
00117
00118
00119
00120 while(i > 0u)
00121 {
00122
00123
00124 xin2 = *__SIMD32(px7)++;
00125
00126
00127 b20 = *__SIMD32(pb7)++;;
00128
00129 #ifdef CCS
00130
00131
00132 x20 = __SXTB16(xin1, 0);
00133
00134
00135 x31 = __SXTB16(xin1, 8);
00136
00137
00138 b31 = __SXTB16(b20, 8);
00139
00140
00141 b20 = __SXTB16(b20, 0);
00142
00143 #else
00144
00145
00146 x20 = __SXTB16(xin1);
00147
00148
00149 x31 = __SXTB16(__ROR(xin1, 8));
00150
00151
00152 b31 = __SXTB16(__ROR(b20, 8));
00153
00154
00155 b20 = __SXTB16(b20);
00156
00157 #endif
00158 acc0 = __SMLAD(x20, b20, acc0);
00159
00160
00161 acc0 = __SMLAD(x31, b31, acc0);
00162
00163
00164
00165
00166
00167 #ifndef ARM_MATH_BIG_ENDIAN
00168 x0 = xin1 >> 8;
00169 x0 = (x0 & 0x00FFFFFF)|((xin2 & 0xFF) << 24);
00170 #else
00171 x0 = xin1 << 8;
00172 x0 = (x0 & 0xFFFFFF00)|((xin2 & 0xFF000000) >> 24);
00173 #endif
00174
00175
00176 #ifdef CCS
00177
00178 x20 = __SXTB16(x0, 0);
00179 x31 = __SXTB16(x0, 8);
00180
00181 #else
00182
00183 x20 = __SXTB16(x0);
00184 x31 = __SXTB16(__ROR(x0, 8));
00185
00186 #endif
00187
00188 acc1 = __SMLAD(x20, b20, acc1);
00189 acc1 = __SMLAD(x31, b31, acc1);
00190
00191
00192
00193
00194 #ifndef ARM_MATH_BIG_ENDIAN
00195 x0 = xin1 >> 16;
00196 x0 = (x0 & 0x0000FFFF)|((xin2 & 0xFFFF) << 16);
00197 #else
00198 x0 = xin1 << 16;
00199 x0 = (x0 & 0xFFFF0000)|((xin2 & 0xFFFF0000) >> 16);
00200
00201 #endif
00202
00203
00204 #ifdef CCS
00205
00206 x20 = __SXTB16(x0, 0);
00207 x31 = __SXTB16(x0, 8);
00208
00209 #else
00210
00211 x20 = __SXTB16(x0);
00212 x31 = __SXTB16(__ROR(x0, 8));
00213
00214 #endif
00215
00216
00217 acc2 = __SMLAD(x20, b20, acc2);
00218 acc2 = __SMLAD(x31, b31, acc2);
00219
00220
00221
00222
00223 #ifndef ARM_MATH_BIG_ENDIAN
00224 x0 = xin1>>24;
00225 x0 = (x0& 0x000000FF)|((xin2 & 0xFFFFFF) << 8);
00226 #else
00227 x0 = xin1 << 24;
00228 x0 = (x0 & 0xFF000000)|((xin2 & 0xFFFFFF00) >> 8);
00229 #endif
00230
00231
00232 #ifdef CCS
00233
00234 x20 = __SXTB16(x0, 0);
00235 x31 = __SXTB16(x0, 8);
00236
00237 #else
00238
00239 x20 = __SXTB16(x0);
00240 x31 = __SXTB16(__ROR(x0, 8));
00241
00242 #endif
00243
00244
00245 acc3=__SMLAD(x20, b20, acc3);
00246 acc3=__SMLAD(x31, b31, acc3);
00247
00248
00249 xin1 = xin2;
00250
00251
00252
00253 i--;
00254 }
00255
00256
00257 px7 = px7 - 4u;
00258
00259
00260 x1 = (*px7++);
00261 x2 = (*px7++);
00262 x3 = (*px7++);
00263
00264
00265
00266 i = numTaps - (tapCnt * 4u);
00267 while(i > 0u)
00268 {
00269
00270 c0 = (*pb7++);
00271
00272
00273 x4 = (*px7++);
00274
00275
00276 acc0 += ((q15_t) x1 * c0);
00277 acc1 += ((q15_t) x2 * c0);
00278 acc2 += ((q15_t) x3 * c0);
00279 acc3 += ((q15_t) x4 * c0);
00280
00281
00282 x1 = x2;
00283 x2 = x3;
00284 x3 = x4;
00285
00286
00287 i--;
00288 }
00289
00290 #ifdef CCS
00291
00292 acc0 = __SSATA(acc0, 7u, 8);
00293
00294 *pDst++ = acc0;
00295 acc1 = __SSATA(acc1, 7u, 8);
00296 *pDst++ = acc1;
00297 acc2 = __SSATA(acc2, 7u, 8);
00298 *pDst++ = acc2;
00299 acc3 = __SSATA(acc3, 7u, 8);
00300 *pDst++ = acc3;
00301 #else
00302
00303 acc0 = __SSAT((acc0 >> 7u), 8);
00304
00305 *pDst++ = acc0;
00306 acc1 = __SSAT((acc1 >> 7u), 8);
00307 *pDst++ = acc1;
00308 acc2 = __SSAT((acc2 >> 7u), 8);
00309 *pDst++ = acc2;
00310 acc3 = __SSAT((acc3 >> 7u), 8);
00311 *pDst++ = acc3;
00312 #endif
00313
00314
00315 pState = pState + 4;
00316
00317
00318 blkCnt--;
00319 }
00320
00321
00322
00323 blkCnt = blockSize % 4u;
00324
00325 while(blkCnt > 0u)
00326 {
00327
00328 *pStateCurnt++ = *pSrc++;
00329
00330
00331 acc0 = 0;
00332
00333
00334 px7 = pState;
00335
00336
00337 pb7 = (pCoeffs);
00338
00339 tapCnt = numTaps;
00340
00341
00342 do
00343 {
00344 acc0 += (q15_t) (*px7++) * (*(pb7++));
00345
00346
00347 tapCnt--;
00348
00349 } while(tapCnt > 0u);
00350
00351
00352
00353 #ifdef CCS
00354 *pDst++ = __SSATA(acc0, 7u, 8);
00355 #else
00356 *pDst++ = __SSAT((acc0 >> 7u), 8);
00357 #endif
00358
00359
00360 pState = pState + 1u;
00361
00362
00363 blkCnt--;
00364 }
00365
00366
00367
00368
00369
00370
00371 pStateCurnt = S->pState;
00372
00373 tapCnt = (numTaps - 1u) >> 3u;
00374
00375
00376 while(tapCnt > 0u)
00377 {
00378
00379 *pStateCurnt++ = *pState++;
00380 *pStateCurnt++ = *pState++;
00381 *pStateCurnt++ = *pState++;
00382 *pStateCurnt++ = *pState++;
00383
00384 *pStateCurnt++ = *pState++;
00385 *pStateCurnt++ = *pState++;
00386 *pStateCurnt++ = *pState++;
00387 *pStateCurnt++ = *pState++;
00388
00389
00390 tapCnt--;
00391 }
00392
00393
00394 tapCnt = (numTaps - 1u) % 8u;
00395
00396
00397 while(tapCnt > 0u)
00398 {
00399 *pStateCurnt++ = *pState++;
00400
00401
00402 tapCnt--;
00403 }
00404
00405 }
00406