00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00052 void arm_fir_sparse_q15(
00053 arm_fir_sparse_instance_q15 * S,
00054 q15_t * pSrc,
00055 q15_t * pDst,
00056 q15_t * pScratchIn,
00057 q31_t * pScratchOut,
00058 uint32_t blockSize)
00059 {
00060
00061 q15_t *pState = S->pState;
00062 q15_t *pIn = pSrc;
00063 q15_t *pOut = pDst;
00064 q15_t *pCoeffs = S->pCoeffs;
00065 q15_t *px;
00066 q15_t *pb = pScratchIn;
00067 q15_t *py = pState;
00068 int32_t *pTapDelay = S->pTapDelay;
00069 uint32_t delaySize = S->maxDelay + blockSize;
00070 uint16_t numTaps = S->numTaps;
00071 int32_t readIndex;
00072 uint32_t tapCnt, blkCnt;
00073 q15_t coeff = *pCoeffs++;
00074 q31_t *pScr2 = pScratchOut;
00075 q31_t in1, in2;
00076 q15_t x0, x1;
00077 q31_t y0, y1;
00078
00079
00080
00081
00082 arm_circularWrite_q15(py, delaySize, &S->stateIndex, 1, pIn, 1, blockSize);
00083
00084
00085 tapCnt = numTaps;
00086
00087
00088 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00089
00090
00091 if(readIndex < 0)
00092 {
00093 readIndex += (int32_t) delaySize;
00094 }
00095
00096
00097 py = pState;
00098
00099
00100 arm_circularRead_q15(py, delaySize, &readIndex, 1,
00101 pb, pb, blockSize, 1, blockSize);
00102
00103
00104 px = pb;
00105
00106
00107 pScratchOut = pScr2;
00108
00109
00110
00111 blkCnt = blockSize >> 2;
00112
00113 while(blkCnt > 0u)
00114 {
00115
00116
00117
00118 x0 = *px;
00119 x1 = *(px + 1u);
00120
00121
00122 y0 = (q31_t)x0 * coeff;
00123 y1 = (q31_t)x1 * coeff;
00124
00125
00126 *pScratchOut++ = y0;
00127 *pScratchOut++ = y1;
00128
00129
00130 x0 = *(px + 2u);
00131 x1 = *(px + 3u);
00132
00133
00134 y0 = (q31_t)x0 * coeff;
00135 y1 = (q31_t)x1 * coeff;
00136
00137
00138 *pScratchOut++ = y0;
00139 px += 4u;
00140 *pScratchOut++ = y1;
00141
00142
00143
00144 blkCnt--;
00145 }
00146
00147
00148
00149 blkCnt = blockSize % 0x4u;
00150
00151 while(blkCnt > 0u)
00152 {
00153
00154 *pScratchOut++ = ((q31_t) * px++ * coeff);
00155
00156
00157 blkCnt--;
00158 }
00159
00160
00161
00162 coeff = *pCoeffs++;
00163
00164
00165 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00166
00167
00168 if(readIndex < 0)
00169 {
00170 readIndex += (int32_t) delaySize;
00171 }
00172
00173
00174 tapCnt = (uint32_t) numTaps - 1u;
00175
00176 while(tapCnt > 0u)
00177 {
00178
00179 py = pState;
00180
00181
00182 arm_circularRead_q15(py, delaySize, &readIndex, 1,
00183 pb, pb, blockSize, 1, blockSize);
00184
00185
00186 px = pb;
00187
00188
00189 pScratchOut = pScr2;
00190
00191
00192
00193 blkCnt = blockSize >> 2;
00194
00195 while(blkCnt > 0u)
00196 {
00197
00198 x0 = *px;
00199 x1 = *(px + 1u);
00200
00201
00202 y0 = *pScratchOut;
00203 y1 = *(pScratchOut + 1u);
00204
00205
00206 y0 = (q31_t)x0 * coeff + y0;
00207 y1 = (q31_t)x1 * coeff + y1;
00208
00209
00210 *pScratchOut = y0;
00211 *(pScratchOut + 1u) = y1;
00212
00213
00214 x0 = *(px + 2u);
00215 x1 = *(px + 3u);
00216
00217 y0 = *(pScratchOut + 2u);
00218 y1 = *(pScratchOut + 3u);
00219
00220
00221 y0 = (q31_t)x0 * coeff + y0;
00222 y1 = (q31_t)x1 * coeff + y1;
00223
00224
00225 *(pScratchOut + 2u) = y0;
00226 *(pScratchOut + 3u) = y1;
00227
00228
00229 px += 4u;
00230 pScratchOut += 4u;
00231
00232
00233
00234 blkCnt--;
00235 }
00236
00237
00238
00239 blkCnt = blockSize % 0x4u;
00240
00241 while(blkCnt > 0u)
00242 {
00243
00244
00245 *pScratchOut++ += (q31_t) * px++ * coeff;
00246
00247
00248 blkCnt--;
00249 }
00250
00251
00252
00253 coeff = *pCoeffs++;
00254
00255
00256 readIndex = (S->stateIndex - blockSize) - *pTapDelay++;
00257
00258
00259 if(readIndex < 0)
00260 {
00261 readIndex += (int32_t) delaySize;
00262 }
00263
00264
00265 tapCnt--;
00266 }
00267
00268
00269
00270
00271 blkCnt = blockSize >> 2;
00272
00273 while(blkCnt > 0u)
00274 {
00275
00276 in1 = *pScr2++;
00277 in2 = *pScr2++;
00278
00279 #ifndef ARM_MATH_BIG_ENDIAN
00280
00281 *__SIMD32(pOut)++ =
00282 #ifdef CCS
00283 __PKHBT((q15_t) __SSATA(in1, 15, 16), (q15_t) __SSATA(in2, 15, 16), 16);
00284 #else
00285 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16), 16);
00286 #endif // #ifdef CCS
00287
00288 #else
00289 *__SIMD32(pOut)++ =
00290 #ifdef CCS
00291 __PKHBT((q15_t) __SSATA(in2, 15, 16), (q15_t) __SSATA(in1, 15, 16), 16);
00292 #else
00293 __PKHBT((q15_t) __SSAT(in2 >> 15, 16), (q15_t) __SSAT(in1 >> 15, 16), 16);
00294 #endif // #ifdef CCS
00295
00296 #endif
00297
00298
00299 in1 = *pScr2++;
00300 in2 = *pScr2++;
00301
00302 #ifndef ARM_MATH_BIG_ENDIAN
00303
00304 *__SIMD32(pOut)++ =
00305 #ifdef CCS
00306 __PKHBT((q15_t) __SSATA(in1, 15, 16), (q15_t) __SSATA(in2, 15, 16), 16);
00307 #else
00308 __PKHBT((q15_t) __SSAT(in1 >> 15, 16), (q15_t) __SSAT(in2 >> 15, 16), 16);
00309 #endif // #ifdef CCS
00310
00311 #else
00312
00313 *__SIMD32(pOut)++ =
00314 #ifdef CCS
00315 __PKHBT((q15_t) __SSATA(in2, 15, 16), (q15_t) __SSATA(in1, 15, 16), 16);
00316 #else
00317 __PKHBT((q15_t) __SSAT(in2 >> 15, 16), (q15_t) __SSAT(in1 >> 15, 16), 16);
00318 #endif // #ifdef CCS
00319
00320
00321
00322 #endif
00323
00324 blkCnt--;
00325
00326 }
00327
00328
00329
00330 blkCnt = blockSize % 0x4u;
00331
00332 while(blkCnt > 0u)
00333 {
00334 #ifdef CCS
00335 *pOut++ = (q15_t) __SSATA(*pScr2++, 15, 16);
00336 #else
00337 *pOut++ = (q15_t) __SSAT(*pScr2++ >> 15, 16);
00338 #endif
00339
00340 blkCnt--;
00341 }
00342 }
00343