00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "arm_math.h"
00026
00064 #ifndef UNALIGNED_SUPPORT_DISABLE
00065
00066 void arm_fir_fast_q15(
00067 const arm_fir_instance_q15 * S,
00068 q15_t * pSrc,
00069 q15_t * pDst,
00070 uint32_t blockSize)
00071 {
00072 q15_t *pState = S->pState;
00073 q15_t *pCoeffs = S->pCoeffs;
00074 q15_t *pStateCurnt;
00075 q15_t *px1;
00076 q15_t *pb;
00077 q31_t x0, x1, x2, x3, c0;
00078 q31_t acc0, acc1, acc2, acc3;
00079 uint32_t numTaps = S->numTaps;
00080 uint32_t tapCnt, blkCnt;
00081
00082
00083
00084
00085 pStateCurnt = &(S->pState[(numTaps - 1u)]);
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096 blkCnt = blockSize >> 2;
00097
00098
00099
00100 while(blkCnt > 0u)
00101 {
00102
00103
00104 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
00105 *__SIMD32(pStateCurnt)++ = *__SIMD32(pSrc)++;
00106
00107
00108 acc0 = 0;
00109 acc1 = 0;
00110 acc2 = 0;
00111 acc3 = 0;
00112
00113
00114 px1 = pState;
00115
00116
00117 pb = pCoeffs;
00118
00119
00120 x0 = _SIMD32_OFFSET(px1);
00121
00122
00123 x1 = _SIMD32_OFFSET(px1 + 1u);
00124
00125 px1 += 2u;
00126
00127
00128
00129 tapCnt = numTaps >> 2;
00130
00131 while(tapCnt > 0u)
00132 {
00133
00134 c0 = *__SIMD32(pb)++;
00135
00136
00137 acc0 = __SMLAD(x0, c0, acc0);
00138
00139
00140 acc1 = __SMLAD(x1, c0, acc1);
00141
00142
00143 x2 = _SIMD32_OFFSET(px1);
00144
00145
00146 x3 = _SIMD32_OFFSET(px1 + 1u);
00147
00148
00149 acc2 = __SMLAD(x2, c0, acc2);
00150
00151
00152 acc3 = __SMLAD(x3, c0, acc3);
00153
00154
00155 c0 = *__SIMD32(pb)++;
00156
00157
00158 acc0 = __SMLAD(x2, c0, acc0);
00159
00160
00161 acc1 = __SMLAD(x3, c0, acc1);
00162
00163
00164 x0 = _SIMD32_OFFSET(px1 + 2u);
00165
00166
00167 x1 = _SIMD32_OFFSET(px1 + 3u);
00168
00169
00170 acc2 = __SMLAD(x0, c0, acc2);
00171
00172
00173 acc3 = __SMLAD(x1, c0, acc3);
00174
00175 px1 += 4u;
00176
00177 tapCnt--;
00178
00179 }
00180
00181
00182
00183
00184 if((numTaps & 0x3u) != 0u)
00185 {
00186
00187 c0 = *__SIMD32(pb)++;
00188
00189
00190 x2 = _SIMD32_OFFSET(px1);
00191
00192 x3 = _SIMD32_OFFSET(px1 + 1u);
00193
00194
00195 acc0 = __SMLAD(x0, c0, acc0);
00196
00197 px1 += 2u;
00198
00199 acc1 = __SMLAD(x1, c0, acc1);
00200 acc2 = __SMLAD(x2, c0, acc2);
00201 acc3 = __SMLAD(x3, c0, acc3);
00202 }
00203
00204
00205
00206
00207 #ifndef ARM_MATH_BIG_ENDIAN
00208
00209 *__SIMD32(pDst)++ =
00210 #ifdef CCS
00211 __PKHBT(__SSATA(acc0, 15, 16), __SSATA(acc1, 15, 16), 16);
00212 *__SIMD32(pDst)++ =
00213 __PKHBT(__SSATA(acc2, 15, 16), __SSATA(acc3, 15, 16), 16);
00214 #else
00215 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
00216 *__SIMD32(pDst)++ =
00217 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
00218 #endif
00219
00220 #else
00221
00222 *__SIMD32(pDst)++ =
00223 #ifdef CCS
00224 __PKHBT(__SSATA(acc1, 15, 16), __SSATA(acc0, 15, 16), 16);
00225 *__SIMD32(pDst)++ =
00226 __PKHBT(__SSATA(acc3, 15, 16), __SSATA(acc2, 15, 16), 16);
00227 #else
00228
00229 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
00230 *__SIMD32(pDst)++ =
00231 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
00232
00233 #endif
00234
00235 #endif
00236
00237
00238
00239 pState = pState + 4;
00240
00241
00242 blkCnt--;
00243 }
00244
00245
00246
00247 blkCnt = blockSize % 0x4u;
00248 while(blkCnt > 0u)
00249 {
00250
00251 *pStateCurnt++ = *pSrc++;
00252
00253
00254 acc0 = 0;
00255
00256
00257 px1 = pState;
00258
00259
00260 pb = pCoeffs;
00261
00262 tapCnt = numTaps >> 1;
00263
00264 do
00265 {
00266
00267 c0 = *__SIMD32(pb)++;
00268 x0 = *__SIMD32(px1)++;
00269
00270 acc0 = __SMLAD(x0, c0, acc0);
00271 tapCnt--;
00272 }
00273 while(tapCnt > 0u);
00274
00275
00276
00277 #ifdef CCS
00278 *pDst++ = (q15_t) (__SSATA(acc0, 15, 16));
00279 #else
00280 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00281
00282 #endif
00283
00284 pState = pState + 1;
00285
00286
00287 blkCnt--;
00288 }
00289
00290
00291
00292
00293
00294
00295 pStateCurnt = S->pState;
00296
00297
00298 tapCnt = (numTaps - 1u) >> 3;
00299
00300 while(tapCnt > 0u)
00301 {
00302
00303
00304 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00305 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00306 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00307 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00308
00309 tapCnt--;
00310
00311 }
00312
00313
00314 tapCnt = (numTaps - 1u) % 8u;
00315
00316
00317 while(tapCnt > 0u)
00318 {
00319 *pStateCurnt++ = *pState++;
00320
00321
00322 tapCnt--;
00323 }
00324 }
00325
00326 #else
00327
00328
00329 void arm_fir_fast_q15(
00330 const arm_fir_instance_q15 * S,
00331 q15_t * pSrc,
00332 q15_t * pDst,
00333 uint32_t blockSize)
00334 {
00335 q15_t *pState = S->pState;
00336 q15_t *pCoeffs = S->pCoeffs;
00337 q15_t *pStateCurnt;
00338 q31_t acc0, acc1, acc2, acc3;
00339 q15_t *pb;
00340 q15_t *px;
00341 q31_t x0, x1, x2, c0;
00342 uint32_t numTaps = S->numTaps;
00343 uint32_t tapCnt, blkCnt;
00344
00345
00346
00347
00348 pStateCurnt = &(S->pState[(numTaps - 1u)]);
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359 blkCnt = blockSize >> 2;
00360
00361
00362
00363 while(blkCnt > 0u)
00364 {
00365
00366
00367 *pStateCurnt++ = *pSrc++;
00368 *pStateCurnt++ = *pSrc++;
00369 *pStateCurnt++ = *pSrc++;
00370 *pStateCurnt++ = *pSrc++;
00371
00372
00373
00374 acc0 = 0;
00375 acc1 = 0;
00376 acc2 = 0;
00377 acc3 = 0;
00378
00379
00380 px = pState;
00381
00382
00383 pb = pCoeffs;
00384
00385
00386 x0 = *__SIMD32(px)++;
00387
00388
00389 x2 = *__SIMD32(px)++;
00390
00391
00392
00393 tapCnt = numTaps >> 2;
00394
00395 while(tapCnt > 0)
00396 {
00397
00398 c0 = *__SIMD32(pb)++;
00399
00400
00401 acc0 = __SMLAD(x0, c0, acc0);
00402
00403
00404 acc2 = __SMLAD(x2, c0, acc2);
00405
00406
00407 #ifndef ARM_MATH_BIG_ENDIAN
00408 x1 = __PKHBT(x2, x0, 0);
00409 #else
00410 x1 = __PKHBT(x0, x2, 0);
00411 #endif
00412
00413
00414 x0 = _SIMD32_OFFSET(px);
00415
00416
00417 acc1 = __SMLADX(x1, c0, acc1);
00418
00419
00420 #ifndef ARM_MATH_BIG_ENDIAN
00421 x1 = __PKHBT(x0, x2, 0);
00422 #else
00423 x1 = __PKHBT(x2, x0, 0);
00424 #endif
00425
00426
00427 acc3 = __SMLADX(x1, c0, acc3);
00428
00429
00430 c0 = *__SIMD32(pb)++;
00431
00432
00433 acc0 = __SMLAD(x2, c0, acc0);
00434
00435
00436 x2 = _SIMD32_OFFSET(px + 2u);
00437
00438
00439 acc2 = __SMLAD(x0, c0, acc2);
00440
00441
00442 acc1 = __SMLADX(x1, c0, acc1);
00443
00444
00445 #ifndef ARM_MATH_BIG_ENDIAN
00446 x1 = __PKHBT(x2, x0, 0);
00447 #else
00448 x1 = __PKHBT(x0, x2, 0);
00449 #endif
00450
00451
00452 acc3 = __SMLADX(x1, c0, acc3);
00453
00454
00455 px += 4u;
00456
00457
00458 tapCnt--;
00459
00460 }
00461
00462
00463
00464 if((numTaps & 0x3u) != 0u)
00465 {
00466
00467
00468 c0 = *__SIMD32(pb)++;
00469
00470
00471 acc0 = __SMLAD(x0, c0, acc0);
00472 acc2 = __SMLAD(x2, c0, acc2);
00473
00474
00475 #ifndef ARM_MATH_BIG_ENDIAN
00476 x1 = __PKHBT(x2, x0, 0);
00477 #else
00478 x1 = __PKHBT(x0, x2, 0);
00479 #endif
00480
00481
00482 x0 = *__SIMD32(px);
00483
00484
00485 acc1 = __SMLADX(x1, c0, acc1);
00486
00487
00488 #ifndef ARM_MATH_BIG_ENDIAN
00489 x1 = __PKHBT(x0, x2, 0);
00490 #else
00491 x1 = __PKHBT(x2, x0, 0);
00492 #endif
00493
00494
00495 acc3 = __SMLADX(x1, c0, acc3);
00496 }
00497
00498
00499
00500
00501 #ifndef ARM_MATH_BIG_ENDIAN
00502
00503 *__SIMD32(pDst)++ =
00504 #ifdef CCS
00505 __PKHBT(__SSATA(acc0, 15, 16), __SSATA(acc1, 15, 16), 16);
00506
00507 *__SIMD32(pDst)++ =
00508 __PKHBT(__SSATA(acc2, 15, 16), __SSATA(acc3, 15, 16), 16);
00509 #else
00510 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
00511
00512 *__SIMD32(pDst)++ =
00513 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
00514 #endif
00515
00516 #else
00517
00518 *__SIMD32(pDst)++ =
00519 #ifdef CCS
00520 __PKHBT(__SSATA(acc1, 15, 16), __SSATA(acc0, 15, 16), 16);
00521
00522 *__SIMD32(pDst)++ =
00523 __PKHBT(__SSATA(acc3, 15, 16), __SSATA(acc2, 15, 16), 16);
00524 #else
00525 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
00526
00527 *__SIMD32(pDst)++ =
00528 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
00529
00530 #endif
00531
00532 #endif
00533
00534
00535 pState = pState + 4;
00536
00537
00538 blkCnt--;
00539 }
00540
00541
00542
00543 blkCnt = blockSize % 0x4u;
00544 while(blkCnt > 0u)
00545 {
00546
00547 *pStateCurnt++ = *pSrc++;
00548
00549
00550 acc0 = 0;
00551
00552
00553 px = pState;
00554 pb = pCoeffs;
00555
00556 tapCnt = numTaps >> 1u;
00557
00558 do
00559 {
00560
00561 x0 = *__SIMD32(px)++;
00562 c0 = *__SIMD32(pb)++;
00563
00564 acc0 = __SMLAD(x0, c0, acc0);
00565 tapCnt--;
00566 }
00567 while(tapCnt > 0u);
00568
00569
00570
00571 #ifdef CCS
00572 *pDst++ = (q15_t) (__SSATA(acc0, 15, 16));
00573 #else
00574 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00575 #endif
00576
00577 pState = pState + 1u;
00578
00579
00580 blkCnt--;
00581 }
00582
00583
00584
00585
00586
00587
00588 pStateCurnt = S->pState;
00589
00590
00591 tapCnt = (numTaps - 1u) >> 2;
00592
00593 while(tapCnt > 0u)
00594 {
00595 *pStateCurnt++ = *pState++;
00596 *pStateCurnt++ = *pState++;
00597 *pStateCurnt++ = *pState++;
00598 *pStateCurnt++ = *pState++;
00599
00600 tapCnt--;
00601
00602 }
00603
00604
00605 tapCnt = (numTaps - 1u) % 0x4u;
00606
00607
00608 while(tapCnt > 0u)
00609 {
00610 *pStateCurnt++ = *pState++;
00611
00612
00613 tapCnt--;
00614 }
00615 }
00616
00617
00618 #endif
00619
00620
00621