00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00065 #ifndef UNALIGNED_SUPPORT_DISABLE
00066
00067 void arm_fir_decimate_q15(
00068 const arm_fir_decimate_instance_q15 * S,
00069 q15_t * pSrc,
00070 q15_t * pDst,
00071 uint32_t blockSize)
00072 {
00073 q15_t *pState = S->pState;
00074 q15_t *pCoeffs = S->pCoeffs;
00075 q15_t *pStateCurnt;
00076 q15_t *px;
00077 q15_t *pb;
00078 q31_t x0, x1, c0, c1;
00079 q63_t sum0;
00080 q63_t acc0, acc1;
00081 q15_t *px0, *px1;
00082 uint32_t blkCntN3;
00083 uint32_t numTaps = S->numTaps;
00084 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;
00085
00086
00087
00088
00089 pStateCurnt = S->pState + (numTaps - 1u);
00090
00091
00092
00093 blkCnt = outBlockSize / 2;
00094 blkCntN3 = outBlockSize - (2*blkCnt);
00095
00096
00097 while(blkCnt > 0u)
00098 {
00099
00100 i = 2 * S->M;
00101
00102 do
00103 {
00104 *pStateCurnt++ = *pSrc++;
00105
00106 } while(--i);
00107
00108
00109 acc0 = 0;
00110 acc1 = 0;
00111
00112
00113 px0 = pState;
00114
00115 px1 = pState + S->M;
00116
00117
00118
00119 pb = pCoeffs;
00120
00121
00122 tapCnt = numTaps >> 2;
00123
00124
00125
00126 while(tapCnt > 0u)
00127 {
00128
00129 c0 = *__SIMD32(pb)++;
00130
00131
00132 x0 = *__SIMD32(px0)++;
00133
00134 x1 = *__SIMD32(px1)++;
00135
00136
00137 acc0 = __SMLALD(x0, c0, acc0);
00138
00139 acc1 = __SMLALD(x1, c0, acc1);
00140
00141
00142 c0 = *__SIMD32(pb)++;
00143
00144
00145 x0 = *__SIMD32(px0)++;
00146
00147 x1 = *__SIMD32(px1)++;
00148
00149
00150 acc0 = __SMLALD(x0, c0, acc0);
00151
00152 acc1 = __SMLALD(x1, c0, acc1);
00153
00154
00155 tapCnt--;
00156 }
00157
00158
00159 tapCnt = numTaps % 0x4u;
00160
00161 while(tapCnt > 0u)
00162 {
00163
00164 c0 = *pb++;
00165
00166
00167 x0 = *px0++;
00168
00169 x1 = *px1++;
00170
00171
00172 acc0 = __SMLALD(x0, c0, acc0);
00173 acc1 = __SMLALD(x1, c0, acc1);
00174
00175
00176 tapCnt--;
00177 }
00178
00179
00180
00181 pState = pState + S->M * 2;
00182
00183
00184
00185 #ifdef CCS
00186 *pDst++ = (q15_t) (__SSATA(acc0, 15, 16));
00187 *pDst++ = (q15_t) (__SSATA(acc1, 15, 16));
00188 #else
00189 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00190 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
00191 #endif
00192
00193 blkCnt--;
00194 }
00195
00196
00197
00198 while(blkCntN3 > 0u)
00199 {
00200
00201 i = S->M;
00202
00203 do
00204 {
00205 *pStateCurnt++ = *pSrc++;
00206
00207 } while(--i);
00208
00209
00210 sum0 = 0;
00211
00212
00213 px = pState;
00214
00215
00216 pb = pCoeffs;
00217
00218
00219 tapCnt = numTaps >> 2;
00220
00221
00222
00223 while(tapCnt > 0u)
00224 {
00225
00226 c0 = *__SIMD32(pb)++;
00227
00228
00229 x0 = *__SIMD32(px)++;
00230
00231
00232 c1 = *__SIMD32(pb)++;
00233
00234
00235 sum0 = __SMLALD(x0, c0, sum0);
00236
00237
00238 x0 = *__SIMD32(px)++;
00239
00240
00241 sum0 = __SMLALD(x0, c1, sum0);
00242
00243
00244 tapCnt--;
00245 }
00246
00247
00248 tapCnt = numTaps % 0x4u;
00249
00250 while(tapCnt > 0u)
00251 {
00252
00253 c0 = *pb++;
00254
00255
00256 x0 = *px++;
00257
00258
00259 sum0 = __SMLALD(x0, c0, sum0);
00260
00261
00262 tapCnt--;
00263 }
00264
00265
00266
00267 pState = pState + S->M;
00268
00269
00270
00271 #ifdef CCS
00272 *pDst++ = (q15_t) (__SSATA(sum0, 15, 16));
00273 #else
00274 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00275 #endif
00276
00277 blkCntN3--;
00278 }
00279
00280
00281
00282
00283
00284
00285 pStateCurnt = S->pState;
00286
00287 i = (numTaps - 1u) >> 2u;
00288
00289
00290 while(i > 0u)
00291 {
00292 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00293 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++;
00294
00295
00296 i--;
00297 }
00298
00299 i = (numTaps - 1u) % 0x04u;
00300
00301
00302 while(i > 0u)
00303 {
00304 *pStateCurnt++ = *pState++;
00305
00306
00307 i--;
00308 }
00309 }
00310
00311 #else
00312
00313
00314 void arm_fir_decimate_q15(
00315 const arm_fir_decimate_instance_q15 * S,
00316 q15_t * pSrc,
00317 q15_t * pDst,
00318 uint32_t blockSize)
00319 {
00320 q15_t *pState = S->pState;
00321 q15_t *pCoeffs = S->pCoeffs;
00322 q15_t *pStateCurnt;
00323 q15_t *px;
00324 q15_t *pb;
00325 q15_t x0, x1, c0;
00326 q63_t sum0;
00327 q63_t acc0, acc1;
00328 q15_t *px0, *px1;
00329 uint32_t blkCntN3;
00330 uint32_t numTaps = S->numTaps;
00331 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M;
00332
00333
00334
00335
00336 pStateCurnt = S->pState + (numTaps - 1u);
00337
00338
00339
00340 blkCnt = outBlockSize / 2;
00341 blkCntN3 = outBlockSize - (2*blkCnt);
00342
00343 while(blkCnt > 0u)
00344 {
00345
00346 i = 2 * S->M;
00347
00348 do
00349 {
00350 *pStateCurnt++ = *pSrc++;
00351
00352 } while(--i);
00353
00354
00355 acc0 = 0;
00356 acc1 = 0;
00357
00358
00359 px0 = pState;
00360
00361 px1 = pState + S->M;
00362
00363
00364
00365 pb = pCoeffs;
00366
00367
00368 tapCnt = numTaps >> 2;
00369
00370
00371
00372 while(tapCnt > 0u)
00373 {
00374
00375 c0 = *pb++;
00376
00377
00378 x0 = *px0++;
00379 x1 = *px1++;
00380
00381
00382 acc0 += x0 * c0;
00383 acc1 += x1 * c0;
00384
00385
00386 c0 = *pb++;
00387
00388
00389 x0 = *px0++;
00390 x1 = *px1++;
00391
00392
00393 acc0 += x0 * c0;
00394 acc1 += x1 * c0;
00395
00396
00397 c0 = *pb++;
00398
00399
00400 x0 = *px0++;
00401 x1 = *px1++;
00402
00403
00404 acc0 += x0 * c0;
00405 acc1 += x1 * c0;
00406
00407
00408 c0 = *pb++;
00409
00410
00411 x0 = *px0++;
00412 x1 = *px1++;
00413
00414
00415 acc0 += x0 * c0;
00416 acc1 += x1 * c0;
00417
00418
00419 tapCnt--;
00420 }
00421
00422
00423 tapCnt = numTaps % 0x4u;
00424
00425 while(tapCnt > 0u)
00426 {
00427
00428 c0 = *pb++;
00429
00430
00431 x0 = *px0++;
00432 x1 = *px1++;
00433
00434
00435 acc0 += x0 * c0;
00436 acc1 += x1 * c0;
00437
00438
00439 tapCnt--;
00440 }
00441
00442
00443
00444 pState = pState + S->M * 2;
00445
00446
00447
00448
00449 #ifdef CCS
00450 *pDst++ = (q15_t) (__SSATA(acc0, 15, 16));
00451 *pDst++ = (q15_t) (__SSATA(acc1, 15, 16));
00452 #else
00453 *pDst++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00454 *pDst++ = (q15_t) (__SSAT((acc1 >> 15), 16));
00455 #endif
00456
00457
00458 blkCnt--;
00459 }
00460
00461 while(blkCntN3 > 0u)
00462 {
00463
00464 i = S->M;
00465
00466 do
00467 {
00468 *pStateCurnt++ = *pSrc++;
00469
00470 } while(--i);
00471
00472
00473 sum0 = 0;
00474
00475
00476 px = pState;
00477
00478
00479 pb = pCoeffs;
00480
00481
00482 tapCnt = numTaps >> 2;
00483
00484
00485
00486 while(tapCnt > 0u)
00487 {
00488
00489 c0 = *pb++;
00490
00491
00492 x0 = *px++;
00493
00494
00495 sum0 += x0 * c0;
00496
00497
00498 c0 = *pb++;
00499
00500
00501 x0 = *px++;
00502
00503
00504 sum0 += x0 * c0;
00505
00506
00507 c0 = *pb++;
00508
00509
00510 x0 = *px++;
00511
00512
00513 sum0 += x0 * c0;
00514
00515
00516 c0 = *pb++;
00517
00518
00519 x0 = *px++;
00520
00521
00522 sum0 += x0 * c0;
00523
00524
00525 tapCnt--;
00526 }
00527
00528
00529 tapCnt = numTaps % 0x4u;
00530
00531 while(tapCnt > 0u)
00532 {
00533
00534 c0 = *pb++;
00535
00536
00537 x0 = *px++;
00538
00539
00540 sum0 += x0 * c0;
00541
00542
00543 tapCnt--;
00544 }
00545
00546
00547
00548 pState = pState + S->M;
00549
00550
00551
00552 #ifdef CCS
00553 *pDst++ = (q15_t) (__SSATA(sum0, 15, 16));
00554 #else
00555 *pDst++ = (q15_t) (__SSAT((sum0 >> 15), 16));
00556 #endif
00557
00558 blkCntN3--;
00559 }
00560
00561
00562
00563
00564
00565
00566 pStateCurnt = S->pState;
00567
00568 i = (numTaps - 1u) >> 2u;
00569
00570
00571 while(i > 0u)
00572 {
00573 *pStateCurnt++ = *pState++;
00574 *pStateCurnt++ = *pState++;
00575 *pStateCurnt++ = *pState++;
00576 *pStateCurnt++ = *pState++;
00577
00578
00579 i--;
00580 }
00581
00582 i = (numTaps - 1u) % 0x04u;
00583
00584
00585 while(i > 0u)
00586 {
00587 *pStateCurnt++ = *pState++;
00588
00589
00590 i--;
00591 }
00592 }
00593
00594
00595 #endif
00596