00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00070 void arm_correlate_fast_q15(
00071 q15_t * pSrcA,
00072 uint32_t srcALen,
00073 q15_t * pSrcB,
00074 uint32_t srcBLen,
00075 q15_t * pDst,
00076 q15_t * pScratch)
00077 {
00078 q15_t *pIn1;
00079 q15_t *pIn2;
00080 q31_t acc0, acc1, acc2, acc3;
00081 q15_t *py;
00082 q31_t x1, x2, x3;
00083 uint32_t j, blkCnt, outBlockSize;
00084 int32_t inc = 1;
00085 uint32_t tapCnt;
00086 q31_t y1, y2;
00087 q15_t *pScr;
00088 q15_t *pOut = pDst;
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104 if(srcALen >= srcBLen)
00105 {
00106
00107 pIn1 = (pSrcA);
00108
00109
00110 pIn2 = (pSrcB);
00111
00112
00113 outBlockSize = (2u * srcALen) - 1u;
00114
00115
00116
00117
00118
00119 j = outBlockSize - (srcALen + (srcBLen - 1u));
00120
00121
00122 pOut += j;
00123
00124 }
00125 else
00126 {
00127
00128 pIn1 = (pSrcB);
00129
00130
00131 pIn2 = (pSrcA);
00132
00133
00134 j = srcBLen;
00135 srcBLen = srcALen;
00136 srcALen = j;
00137
00138
00139
00140 pOut = pDst + ((srcALen + srcBLen) - 2u);
00141
00142
00143 inc = -1;
00144
00145 }
00146
00147 pScr = pScratch;
00148
00149
00150 arm_fill_q15(0, pScr, (srcBLen - 1u));
00151
00152
00153 pScr += (srcBLen - 1u);
00154
00155 #ifndef UNALIGNED_SUPPORT_DISABLE
00156
00157
00158 arm_copy_q15(pIn1, pScr, srcALen);
00159
00160
00161 pScr += srcALen;
00162
00163 #else
00164
00165
00166 j = srcALen >> 2u;
00167
00168
00169
00170 while(j > 0u)
00171 {
00172
00173 *pScr++ = *pIn1++;
00174 *pScr++ = *pIn1++;
00175 *pScr++ = *pIn1++;
00176 *pScr++ = *pIn1++;
00177
00178
00179 j--;
00180 }
00181
00182
00183
00184 j = srcALen % 0x4u;
00185
00186 while(j > 0u)
00187 {
00188
00189 *pScr++ = *pIn1++;
00190
00191
00192 j--;
00193 }
00194
00195 #endif
00196
00197 #ifndef UNALIGNED_SUPPORT_DISABLE
00198
00199
00200 arm_fill_q15(0, pScr, (srcBLen - 1u));
00201
00202
00203 pScr += (srcBLen - 1u);
00204
00205 #else
00206
00207
00208 j = (srcBLen - 1u) >> 2u;
00209
00210
00211
00212 while(j > 0u)
00213 {
00214
00215 *pScr++ = 0;
00216 *pScr++ = 0;
00217 *pScr++ = 0;
00218 *pScr++ = 0;
00219
00220
00221 j--;
00222 }
00223
00224
00225
00226 j = (srcBLen - 1u) % 0x4u;
00227
00228 while(j > 0u)
00229 {
00230
00231 *pScr++ = 0;
00232
00233
00234 j--;
00235 }
00236
00237 #endif
00238
00239
00240 py = pIn2;
00241
00242
00243
00244 blkCnt = (srcALen + srcBLen - 1u) >> 2;
00245
00246 while(blkCnt > 0)
00247 {
00248
00249 pScr = pScratch;
00250
00251
00252 acc0 = 0;
00253 acc1 = 0;
00254 acc2 = 0;
00255 acc3 = 0;
00256
00257
00258 x1 = *__SIMD32(pScr)++;
00259
00260
00261 x2 = *__SIMD32(pScr)++;
00262
00263 tapCnt = (srcBLen) >> 2u;
00264
00265 while(tapCnt > 0u)
00266 {
00267
00268
00269 y1 = _SIMD32_OFFSET(pIn2);
00270 y2 = _SIMD32_OFFSET(pIn2 + 2u);
00271
00272 acc0 = __SMLAD(x1, y1, acc0);
00273
00274 acc2 = __SMLAD(x2, y1, acc2);
00275
00276 #ifndef ARM_MATH_BIG_ENDIAN
00277 x3 = __PKHBT(x2, x1, 0);
00278 #else
00279 x3 = __PKHBT(x1, x2, 0);
00280 #endif
00281
00282 acc1 = __SMLADX(x3, y1, acc1);
00283
00284 x1 = _SIMD32_OFFSET(pScr);
00285
00286 acc0 = __SMLAD(x2, y2, acc0);
00287
00288 acc2 = __SMLAD(x1, y2, acc2);
00289
00290 #ifndef ARM_MATH_BIG_ENDIAN
00291 x3 = __PKHBT(x1, x2, 0);
00292 #else
00293 x3 = __PKHBT(x2, x1, 0);
00294 #endif
00295
00296 acc3 = __SMLADX(x3, y1, acc3);
00297
00298 acc1 = __SMLADX(x3, y2, acc1);
00299
00300 x2 = _SIMD32_OFFSET(pScr + 2u);
00301
00302 #ifndef ARM_MATH_BIG_ENDIAN
00303 x3 = __PKHBT(x2, x1, 0);
00304 #else
00305 x3 = __PKHBT(x1, x2, 0);
00306 #endif
00307
00308 acc3 = __SMLADX(x3, y2, acc3);
00309
00310 pIn2 += 4u;
00311
00312 pScr += 4u;
00313
00314
00315
00316 tapCnt--;
00317 }
00318
00319
00320
00321
00322 pScr -= 4u;
00323
00324
00325
00326 tapCnt = (srcBLen) & 3u;
00327
00328 while(tapCnt > 0u)
00329 {
00330
00331
00332 acc0 += (*pScr++ * *pIn2);
00333 acc1 += (*pScr++ * *pIn2);
00334 acc2 += (*pScr++ * *pIn2);
00335 acc3 += (*pScr++ * *pIn2++);
00336
00337 pScr -= 3u;
00338
00339
00340 tapCnt--;
00341 }
00342
00343 blkCnt--;
00344
00345
00346
00347 #ifdef CCS
00348 *pOut = (__SSATA(acc0, 15u, 16));
00349 pOut += inc;
00350 *pOut = (__SSATA(acc1, 15u, 16));
00351 pOut += inc;
00352 *pOut = (__SSATA(acc2, 15u, 16));
00353 pOut += inc;
00354 *pOut = (__SSATA(acc3, 15u, 16));
00355 pOut += inc;
00356 #else
00357 *pOut = (__SSAT(acc0 >> 15u, 16));
00358 pOut += inc;
00359 *pOut = (__SSAT(acc1 >> 15u, 16));
00360 pOut += inc;
00361 *pOut = (__SSAT(acc2 >> 15u, 16));
00362 pOut += inc;
00363 *pOut = (__SSAT(acc3 >> 15u, 16));
00364 pOut += inc;
00365
00366 #endif
00367
00368
00369 pIn2 = py;
00370
00371 pScratch += 4u;
00372
00373 }
00374
00375
00376 blkCnt = (srcALen + srcBLen - 1u) & 0x3;
00377
00378
00379 while(blkCnt > 0)
00380 {
00381
00382 pScr = pScratch;
00383
00384
00385 acc0 = 0;
00386
00387 tapCnt = (srcBLen) >> 1u;
00388
00389 while(tapCnt > 0u)
00390 {
00391
00392
00393 x1 = *__SIMD32(pScr)++;
00394
00395
00396 y1 = *__SIMD32(pIn2)++;
00397
00398 acc0 = __SMLAD(x1, y1, acc0);
00399
00400
00401 tapCnt--;
00402 }
00403
00404 tapCnt = (srcBLen) & 1u;
00405
00406
00407 while(tapCnt > 0u)
00408 {
00409
00410
00411 acc0 += (*pScr++ * *pIn2++);
00412
00413
00414 tapCnt--;
00415 }
00416
00417 blkCnt--;
00418
00419
00420 #ifdef CCS
00421 *pOut = (q15_t) (__SSATA((acc0), 15, 16));
00422 #else
00423 *pOut = (q15_t) (__SSAT((acc0 >> 15), 16));
00424 #endif
00425 pOut += inc;
00426
00427
00428 pIn2 = py;
00429
00430 pScratch += 1u;
00431
00432 }
00433 }
00434