00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00066 #ifndef UNALIGNED_SUPPORT_DISABLE
00067
00068 void arm_fir_decimate_fast_q15(
00069 const arm_fir_decimate_instance_q15 * S,
00070 q15_t * pSrc,
00071 q15_t * pDst,
00072 uint32_t blockSize)
00073 {
00074 q15_t *pState = S->pState;
00075 q15_t *pCoeffs = S->pCoeffs;
00076 q15_t *pStateCurnt;
00077 q15_t *px;
00078 q15_t *pb;
00079 q31_t x0, x1, c0, c1;
00080 q63_t sum0;
00081 q31_t acc0, acc1;
00082 q15_t *px0, *px1;
00083 uint32_t blkCntN3;
00084 uint32_t numTaps = S->numTaps;
00085 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;
00086
00087
00088
00089
00090 pStateCurnt = S->pState + (numTaps - 1u);
00091
00092
00093
00094 blkCnt = outBlockSize / 2;
00095 blkCntN3 = outBlockSize - (2*blkCnt);
00096
00097
00098 while(blkCnt > 0u)
00099 {
00100
00101 i = 2 * S->M;
00102
00103 do
00104 {
00105 *pStateCurnt++ = *pSrc++;
00106
00107 } while(--i);
00108
00109
00110 acc0 = 0;
00111 acc1 = 0;
00112
00113
00114 px0 = pState;
00115
00116 px1 = pState + S->M;
00117
00118
00119
00120 pb = pCoeffs;
00121
00122
00123 tapCnt = numTaps >> 2;
00124
00125
00126
00127 while(tapCnt > 0u)
00128 {
00129
00130 c0 = *__SIMD32(pb)++;
00131
00132
00133 x0 = *__SIMD32(px0)++;
00134
00135 x1 = *__SIMD32(px1)++;
00136
00137
00138 acc0 = __SMLAD(x0, c0, acc0);
00139
00140 acc1 = __SMLAD(x1, c0, acc1);
00141
00142
00143 c0 = *__SIMD32(pb)++;
00144
00145
00146 x0 = *__SIMD32(px0)++;
00147
00148 x1 = *__SIMD32(px1)++;
00149
00150
00151 acc0 = __SMLAD(x0, c0, acc0);
00152
00153 acc1 = __SMLAD(x1, c0, acc1);
00154
00155
00156 tapCnt--;
00157 }
00158
00159
00160 tapCnt = numTaps % 0x4u;
00161
00162 while(tapCnt > 0u)
00163 {
00164
00165 c0 = *pb++;
00166
00167
00168 x0 = *px0++;
00169
00170 x1 = *px1++;
00171
00172
00173 acc0 = __SMLAD(x0, c0, acc0);
00174 acc1 = __SMLAD(x1, c0, acc1);
00175
00176
00177 tapCnt--;
00178 }
00179
00180
00181
00182 pState = pState + S->M * 2;
00183
00184
00185
00186 #ifdef CCS
00187 *pDst++ = (q15_t) (__SSATA(acc0, 15, 16));
00188 *pDst++ = (q15_t) (__SSATA(acc1, 15, 16));
00189 #else
00190 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00191 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
00192 #endif
00193
00194 blkCnt--;
00195 }
00196
00197
00198
00199 while(blkCntN3 > 0u)
00200 {
00201
00202 i = S->M;
00203
00204 do
00205 {
00206 *pStateCurnt++ = *pSrc++;
00207
00208 } while(--i);
00209
00210
00211 sum0 = 0;
00212
00213
00214 px = pState;
00215
00216
00217 pb = pCoeffs;
00218
00219
00220 tapCnt = numTaps >> 2;
00221
00222
00223
00224 while(tapCnt > 0u)
00225 {
00226
00227 c0 = *__SIMD32(pb)++;
00228
00229
00230 x0 = *__SIMD32(px)++;
00231
00232
00233 c1 = *__SIMD32(pb)++;
00234
00235
00236 sum0 = __SMLAD(x0, c0, sum0);
00237
00238
00239 x0 = *__SIMD32(px)++;
00240
00241
00242 sum0 = __SMLAD(x0, c1, sum0);
00243
00244
00245 tapCnt--;
00246 }
00247
00248
00249 tapCnt = numTaps % 0x4u;
00250
00251 while(tapCnt > 0u)
00252 {
00253
00254 c0 = *pb++;
00255
00256
00257 x0 = *px++;
00258
00259
00260 sum0 = __SMLAD(x0, c0, sum0);
00261
00262
00263 tapCnt--;
00264 }
00265
00266
00267
00268 pState = pState + S->M;
00269
00270
00271
00272 #ifdef CCS
00273 *pDst++ = (q15_t) (__SSATA(sum0, 15, 16));
00274 #else
00275 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00276 #endif
00277
00278 blkCntN3--;
00279 }
00280
00281
00282
00283
00284
00285
00286 pStateCurnt = S->pState;
00287
00288 i = (numTaps - 1u) >> 2u;
00289
00290
00291 while(i > 0u)
00292 {
00293 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00294 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00295
00296
00297 i--;
00298 }
00299
00300 i = (numTaps - 1u) % 0x04u;
00301
00302
00303 while(i > 0u)
00304 {
00305 *pStateCurnt++ = *pState++;
00306
00307
00308 i--;
00309 }
00310 }
00311
00312 #else
00313
00314
00315 void arm_fir_decimate_fast_q15(
00316 const arm_fir_decimate_instance_q15 * S,
00317 q15_t * pSrc,
00318 q15_t * pDst,
00319 uint32_t blockSize)
00320 {
00321 q15_t *pState = S->pState;
00322 q15_t *pCoeffs = S->pCoeffs;
00323 q15_t *pStateCurnt;
00324 q15_t *px;
00325 q15_t *pb;
00326 q15_t x0, x1, c0;
00327 q31_t sum0;
00328 q31_t acc0, acc1;
00329 q15_t *px0, *px1;
00330 uint32_t blkCntN3;
00331 uint32_t numTaps = S->numTaps;
00332 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;
00333
00334
00335
00336
00337 pStateCurnt = S->pState + (numTaps - 1u);
00338
00339
00340
00341 blkCnt = outBlockSize / 2;
00342 blkCntN3 = outBlockSize - (2*blkCnt);
00343
00344 while(blkCnt > 0u)
00345 {
00346
00347 i = 2 * S->M;
00348
00349 do
00350 {
00351 *pStateCurnt++ = *pSrc++;
00352
00353 } while(--i);
00354
00355
00356 acc0 = 0;
00357 acc1 = 0;
00358
00359
00360 px0 = pState;
00361
00362 px1 = pState + S->M;
00363
00364
00365
00366 pb = pCoeffs;
00367
00368
00369 tapCnt = numTaps >> 2;
00370
00371
00372
00373 while(tapCnt > 0u)
00374 {
00375
00376 c0 = *pb++;
00377
00378
00379 x0 = *px0++;
00380 x1 = *px1++;
00381
00382
00383 acc0 += x0 * c0;
00384 acc1 += x1 * c0;
00385
00386
00387 c0 = *pb++;
00388
00389
00390 x0 = *px0++;
00391 x1 = *px1++;
00392
00393
00394 acc0 += x0 * c0;
00395 acc1 += x1 * c0;
00396
00397
00398 c0 = *pb++;
00399
00400
00401 x0 = *px0++;
00402 x1 = *px1++;
00403
00404
00405 acc0 += x0 * c0;
00406 acc1 += x1 * c0;
00407
00408
00409 c0 = *pb++;
00410
00411
00412 x0 = *px0++;
00413 x1 = *px1++;
00414
00415
00416 acc0 += x0 * c0;
00417 acc1 += x1 * c0;
00418
00419
00420 tapCnt--;
00421 }
00422
00423
00424 tapCnt = numTaps % 0x4u;
00425
00426 while(tapCnt > 0u)
00427 {
00428
00429 c0 = *pb++;
00430
00431
00432 x0 = *px0++;
00433 x1 = *px1++;
00434
00435
00436 acc0 += x0 * c0;
00437 acc1 += x1 * c0;
00438
00439
00440 tapCnt--;
00441 }
00442
00443
00444
00445 pState = pState + S->M * 2;
00446
00447
00448
00449
00450 #ifdef CCS
00451 *pDst++ = (q15_t) (__SSATA(acc0, 15, 16));
00452 *pDst++ = (q15_t) (__SSATA(acc1, 15, 16));
00453 #else
00454 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00455 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
00456 #endif
00457
00458
00459 blkCnt--;
00460 }
00461
00462 while(blkCntN3 > 0u)
00463 {
00464
00465 i = S->M;
00466
00467 do
00468 {
00469 *pStateCurnt++ = *pSrc++;
00470
00471 } while(--i);
00472
00473
00474 sum0 = 0;
00475
00476
00477 px = pState;
00478
00479
00480 pb = pCoeffs;
00481
00482
00483 tapCnt = numTaps >> 2;
00484
00485
00486
00487 while(tapCnt > 0u)
00488 {
00489
00490 c0 = *pb++;
00491
00492
00493 x0 = *px++;
00494
00495
00496 sum0 += x0 * c0;
00497
00498
00499 c0 = *pb++;
00500
00501
00502 x0 = *px++;
00503
00504
00505 sum0 += x0 * c0;
00506
00507
00508 c0 = *pb++;
00509
00510
00511 x0 = *px++;
00512
00513
00514 sum0 += x0 * c0;
00515
00516
00517 c0 = *pb++;
00518
00519
00520 x0 = *px++;
00521
00522
00523 sum0 += x0 * c0;
00524
00525
00526 tapCnt--;
00527 }
00528
00529
00530 tapCnt = numTaps % 0x4u;
00531
00532 while(tapCnt > 0u)
00533 {
00534
00535 c0 = *pb++;
00536
00537
00538 x0 = *px++;
00539
00540
00541 sum0 += x0 * c0;
00542
00543
00544 tapCnt--;
00545 }
00546
00547
00548
00549 pState = pState + S->M;
00550
00551
00552
00553 #ifdef CCS
00554 *pDst++ = (q15_t) (__SSATA(sum0, 15, 16));
00555 #else
00556 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00557 #endif
00558
00559 blkCntN3--;
00560 }
00561
00562
00563
00564
00565
00566
00567 pStateCurnt = S->pState;
00568
00569 i = (numTaps - 1u) >> 2u;
00570
00571
00572 while(i > 0u)
00573 {
00574 *pStateCurnt++ = *pState++;
00575 *pStateCurnt++ = *pState++;
00576 *pStateCurnt++ = *pState++;
00577 *pStateCurnt++ = *pState++;
00578
00579
00580 i--;
00581 }
00582
00583 i = (numTaps - 1u) % 0x04u;
00584
00585
00586 while(i > 0u)
00587 {
00588 *pStateCurnt++ = *pState++;
00589
00590
00591 i--;
00592 }
00593 }
00594
00595
00596 #endif
00597