00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "arm_math.h"
00026
00068 void arm_correlate_q15(
00069 q15_t * pSrcA,
00070 uint32_t srcALen,
00071 q15_t * pSrcB,
00072 uint32_t srcBLen,
00073 q15_t * pDst,
00074 q15_t * pScratch)
00075 {
00076 q15_t *pIn1;
00077 q15_t *pIn2;
00078 q63_t acc0, acc1, acc2, acc3;
00079 q15_t *py;
00080 q31_t x1, x2, x3;
00081 uint32_t j, blkCnt, outBlockSize;
00082 int32_t inc = 1;
00083 uint32_t tapCnt;
00084 q31_t y1, y2;
00085 q15_t *pScr;
00086 q15_t *pOut = pDst;
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 if(srcALen >= srcBLen)
00103 {
00104
00105 pIn1 = (pSrcA);
00106
00107
00108 pIn2 = (pSrcB);
00109
00110
00111 outBlockSize = (2u * srcALen) - 1u;
00112
00113
00114
00115
00116
00117 j = outBlockSize - (srcALen + (srcBLen - 1u));
00118
00119
00120 pOut += j;
00121
00122 }
00123 else
00124 {
00125
00126 pIn1 = (pSrcB);
00127
00128
00129 pIn2 = (pSrcA);
00130
00131
00132 j = srcBLen;
00133 srcBLen = srcALen;
00134 srcALen = j;
00135
00136
00137
00138 pOut = pDst + ((srcALen + srcBLen) - 2u);
00139
00140
00141 inc = -1;
00142
00143 }
00144
00145 pScr = pScratch;
00146
00147
00148 arm_fill_q15(0, pScr, (srcBLen - 1u));
00149
00150
00151 pScr += (srcBLen - 1u);
00152
00153 #ifdef UNALIGNED_SUPPORT_ENABLE
00154
00155
00156 arm_copy_q15(pIn1, pScr, srcALen);
00157
00158
00159
00160 pScr += srcALen;
00161
00162 #else
00163
00164
00165 j = srcALen >> 2u;
00166
00167
00168
00169 while(j > 0u)
00170 {
00171
00172 *pScr++ = *pIn1++;
00173 *pScr++ = *pIn1++;
00174 *pScr++ = *pIn1++;
00175 *pScr++ = *pIn1++;
00176
00177
00178 j--;
00179 }
00180
00181
00182
00183 j = srcALen % 0x4u;
00184
00185 while(j > 0u)
00186 {
00187
00188 *pScr++ = *pIn1++;
00189
00190
00191 j--;
00192 }
00193
00194 #endif
00195
00196 #ifdef UNALIGNED_SUPPORT_ENABLE
00197
00198
00199 arm_fill_q15(0, pScr, (srcBLen - 1u));
00200
00201
00202 pScr += (srcBLen - 1u);
00203
00204 #else
00205
00206
00207 j = (srcBLen - 1u) >> 2u;
00208
00209
00210
00211 while(j > 0u)
00212 {
00213
00214 *pScr++ = 0;
00215 *pScr++ = 0;
00216 *pScr++ = 0;
00217 *pScr++ = 0;
00218
00219
00220 j--;
00221 }
00222
00223
00224
00225 j = (srcBLen - 1u) % 0x4u;
00226
00227 while(j > 0u)
00228 {
00229
00230 *pScr++ = 0;
00231
00232
00233 j--;
00234 }
00235
00236 #endif
00237
00238
00239 py = pIn2;
00240
00241
00242
00243 blkCnt = (srcALen + srcBLen - 1u) >> 2;
00244
00245 while(blkCnt > 0)
00246 {
00247
00248 pScr = pScratch;
00249
00250
00251 acc0 = 0;
00252 acc1 = 0;
00253 acc2 = 0;
00254 acc3 = 0;
00255
00256
00257 x1 = *__SIMD32(pScr)++;
00258
00259
00260 x2 = *__SIMD32(pScr)++;
00261
00262 tapCnt = (srcBLen) >> 2u;
00263
00264 while(tapCnt > 0u)
00265 {
00266
00267
00268 y1 = _SIMD32_OFFSET(pIn2);
00269 y2 = _SIMD32_OFFSET(pIn2 + 2u);
00270
00271 acc0 = __SMLALD(x1, y1, acc0);
00272
00273 acc2 = __SMLALD(x2, y1, acc2);
00274
00275 #ifndef ARM_MATH_BIG_ENDIAN
00276 x3 = __PKHBT(x2, x1, 0);
00277 #else
00278 x3 = __PKHBT(x1, x2, 0);
00279 #endif
00280
00281 acc1 = __SMLALDX(x3, y1, acc1);
00282
00283 x1 = _SIMD32_OFFSET(pScr);
00284
00285 acc0 = __SMLALD(x2, y2, acc0);
00286
00287 acc2 = __SMLALD(x1, y2, acc2);
00288
00289 #ifndef ARM_MATH_BIG_ENDIAN
00290 x3 = __PKHBT(x1, x2, 0);
00291 #else
00292 x3 = __PKHBT(x2, x1, 0);
00293 #endif
00294
00295 acc3 = __SMLALDX(x3, y1, acc3);
00296
00297 acc1 = __SMLALDX(x3, y2, acc1);
00298
00299 x2 = _SIMD32_OFFSET(pScr + 2u);
00300
00301 #ifndef ARM_MATH_BIG_ENDIAN
00302 x3 = __PKHBT(x2, x1, 0);
00303 #else
00304 x3 = __PKHBT(x1, x2, 0);
00305 #endif
00306
00307 acc3 = __SMLALDX(x3, y2, acc3);
00308
00309 pIn2 += 4u;
00310
00311 pScr += 4u;
00312
00313
00314
00315 tapCnt--;
00316 }
00317
00318
00319
00320
00321 pScr -= 4u;
00322
00323
00324
00325 tapCnt = (srcBLen) & 3u;
00326
00327 while(tapCnt > 0u)
00328 {
00329
00330
00331 acc0 += (*pScr++ * *pIn2);
00332 acc1 += (*pScr++ * *pIn2);
00333 acc2 += (*pScr++ * *pIn2);
00334 acc3 += (*pScr++ * *pIn2++);
00335
00336 pScr -= 3u;
00337
00338
00339 tapCnt--;
00340 }
00341
00342 blkCnt--;
00343
00344
00345
00346 #ifdef CCS
00347 *pOut = (__SSATA(acc0, 15u, 16));
00348 pOut += inc;
00349 *pOut = (__SSATA(acc1, 15u, 16));
00350 pOut += inc;
00351 *pOut = (__SSATA(acc2, 15u, 16));
00352 pOut += inc;
00353 *pOut = (__SSATA(acc3, 15u, 16));
00354 pOut += inc;
00355 #else
00356 *pOut = (__SSAT(acc0 >> 15u, 16));
00357 pOut += inc;
00358 *pOut = (__SSAT(acc1 >> 15u, 16));
00359 pOut += inc;
00360 *pOut = (__SSAT(acc2 >> 15u, 16));
00361 pOut += inc;
00362 *pOut = (__SSAT(acc3 >> 15u, 16));
00363 pOut += inc;
00364
00365 #endif
00366
00367
00368 pIn2 = py;
00369
00370 pScratch += 4u;
00371
00372 }
00373
00374
00375 blkCnt = (srcALen + srcBLen - 1u) & 0x3;
00376
00377
00378 while(blkCnt > 0)
00379 {
00380
00381 pScr = pScratch;
00382
00383
00384 acc0 = 0;
00385
00386 tapCnt = (srcBLen) >> 1u;
00387
00388 while(tapCnt > 0u)
00389 {
00390
00391
00392 x1 = *__SIMD32(pScr)++;
00393
00394
00395 y1 = *__SIMD32(pIn2)++;
00396
00397 acc0 = __SMLALD(x1, y1, acc0);
00398
00399
00400 tapCnt--;
00401 }
00402
00403 tapCnt = (srcBLen) & 1u;
00404
00405
00406 while(tapCnt > 0u)
00407 {
00408
00409
00410 acc0 += (*pScr++ * *pIn2++);
00411
00412
00413 tapCnt--;
00414 }
00415
00416 blkCnt--;
00417
00418
00419 #ifdef CCS
00420 *pOut = (q15_t) (__SSATA(acc0, 15, 16));
00421 #else
00422 *pOut = (q15_t) (__SSAT((acc0 >> 15), 16));
00423 #endif
00424
00425 pOut += inc;
00426
00427
00428 pIn2 = py;
00429
00430 pScratch += 1u;
00431
00432 }
00433
00434
00435 }
00436