00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00026
00057 void arm_fir_sparse_q7(
00058 arm_fir_sparse_instance_q7 * S,
00059 q7_t * pSrc,
00060 q7_t * pDst,
00061 q7_t * pScratchIn,
00062 q31_t * pScratchOut,
00063 uint32_t blockSize)
00064 {
00065
00066 q7_t *pState = S->pState;
00067 q7_t *pCoeffs = S->pCoeffs;
00068 q7_t *px;
00069 q7_t *py = pState;
00070 q7_t *pb = pScratchIn;
00071 q7_t *pOut = pDst;
00072 int32_t *pTapDelay = S->pTapDelay;
00073 uint32_t delaySize = S->maxDelay + blockSize;
00074 uint16_t numTaps = S->numTaps;
00075 int32_t readIndex;
00076 uint32_t tapCnt, blkCnt;
00077 q7_t coeff = *pCoeffs++;
00078 q31_t *pScr2 = pScratchOut;
00079 q31_t in;
00080 q7_t in1, in2, in3, in4;
00081 q7_t x0, x1;
00082 q31_t y0, y1;
00083
00084
00085
00086
00087 arm_circularWrite_q7(py, (int32_t) delaySize, &S->stateIndex, 1, pSrc, 1,
00088 blockSize);
00089
00090
00091 tapCnt = numTaps;
00092
00093
00094 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00095
00096
00097 if(readIndex < 0)
00098 {
00099 readIndex += (int32_t) delaySize;
00100 }
00101
00102
00103 py = pState;
00104
00105
00106 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb,
00107 (int32_t) blockSize, 1, blockSize);
00108
00109
00110 px = pb;
00111
00112
00113 pScratchOut = pScr2;
00114
00115
00116
00117 blkCnt = blockSize >> 2;
00118
00119 while(blkCnt > 0u)
00120 {
00121
00122
00123 x0 = *px;
00124 x1 = *(px + 1u);
00125
00126
00127 y0 = (q31_t)x0 * coeff;
00128 y1 = (q31_t)x1 * coeff;
00129
00130
00131 *pScratchOut++ = y0;
00132 *pScratchOut++ = y1;
00133
00134
00135 x0 = *(px + 2u);
00136 x1 = *(px + 3u);
00137
00138
00139 y0 = (q31_t)x0 * coeff;
00140 y1 = (q31_t)x1 * coeff;
00141
00142
00143 *pScratchOut++ = y0;
00144 px += 4u;
00145 *pScratchOut++ = y1;
00146
00147
00148
00149 blkCnt--;
00150 }
00151
00152
00153
00154 blkCnt = blockSize % 0x4u;
00155
00156 while(blkCnt > 0u)
00157 {
00158
00159 *pScratchOut++ = ((q31_t) * px++ * coeff);
00160
00161
00162 blkCnt--;
00163 }
00164
00165
00166
00167 coeff = *pCoeffs++;
00168
00169
00170 readIndex = ((int32_t) S->stateIndex - (int32_t) blockSize) - *pTapDelay++;
00171
00172
00173 if(readIndex < 0)
00174 {
00175 readIndex += (int32_t) delaySize;
00176 }
00177
00178
00179 tapCnt = (uint32_t) numTaps - 1u;
00180
00181 while(tapCnt > 0u)
00182 {
00183
00184 py = pState;
00185
00186
00187 arm_circularRead_q7(py, (int32_t) delaySize, &readIndex, 1, pb, pb,
00188 (int32_t) blockSize, 1, blockSize);
00189
00190
00191 px = pb;
00192
00193
00194 pScratchOut = pScr2;
00195
00196
00197
00198 blkCnt = blockSize >> 2;
00199
00200 while(blkCnt > 0u)
00201 {
00202
00203 x0 = *px;
00204 x1 = *(px + 1u);
00205
00206
00207 y0 = *pScratchOut;
00208 y1 = *(pScratchOut + 1u);
00209
00210
00211 y0 = (q31_t)x0 * coeff + y0;
00212 y1 = (q31_t)x1 * coeff + y1;
00213
00214
00215 *pScratchOut = y0;
00216 *(pScratchOut + 1u) = y1;
00217
00218
00219 x0 = *(px + 2u);
00220 x1 = *(px + 3u);
00221
00222 y0 = *(pScratchOut + 2u);
00223 y1 = *(pScratchOut + 3u);
00224
00225
00226 y0 = (q31_t)x0 * coeff + y0;
00227 y1 = (q31_t)x1 * coeff + y1;
00228
00229
00230 *(pScratchOut + 2u) = y0;
00231 *(pScratchOut + 3u) = y1;
00232
00233
00234 px += 4u;
00235 pScratchOut += 4u;
00236
00237
00238
00239 blkCnt--;
00240 }
00241
00242
00243
00244 blkCnt = blockSize % 0x4u;
00245
00246 while(blkCnt > 0u)
00247 {
00248
00249 in = *pScratchOut + ((q31_t) * px++ * coeff);
00250 *pScratchOut++ = in;
00251
00252
00253 blkCnt--;
00254 }
00255
00256
00257
00258 coeff = *pCoeffs++;
00259
00260
00261 readIndex = ((int32_t) S->stateIndex -
00262 (int32_t) blockSize) - *pTapDelay++;
00263
00264
00265 if(readIndex < 0)
00266 {
00267 readIndex += (int32_t) delaySize;
00268 }
00269
00270
00271 tapCnt--;
00272 }
00273
00274
00275
00276
00277 blkCnt = blockSize >> 2;
00278
00279 while(blkCnt > 0u)
00280 {
00281 #ifdef CCS
00282 in1 = (q7_t) __SSATA(*pScr2++, 7, 8);
00283 in2 = (q7_t) __SSATA(*pScr2++, 7, 8);
00284 in3 = (q7_t) __SSATA(*pScr2++, 7, 8);
00285 in4 = (q7_t) __SSATA(*pScr2++, 7, 8);
00286 #else
00287 in1 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00288 in2 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00289 in3 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00290 in4 = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00291
00292 #endif
00293 *__SIMD32(pOut)++ = __PACKq7(in1, in2, in3, in4);
00294
00295
00296 blkCnt--;
00297 }
00298
00299
00300
00301 blkCnt = blockSize % 0x4u;
00302
00303 while(blkCnt > 0u)
00304 {
00305 #ifdef CCS
00306 *pOut++ = (q7_t) __SSATA(*pScr2++, 7, 8);
00307 #else
00308 *pOut++ = (q7_t) __SSAT(*pScr2++ >> 7, 8);
00309 #endif
00310
00311
00312 blkCnt--;
00313 }
00314 }
00315