00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00070 void arm_conv_fast_q15(
00071 q15_t * pSrcA,
00072 uint32_t srcALen,
00073 q15_t * pSrcB,
00074 uint32_t srcBLen,
00075 q15_t * pDst,
00076 q15_t * pScratch1,
00077 q15_t *pScratch2)
00078 {
00079 q31_t acc0, acc1, acc2, acc3;
00080 q31_t x1, x2, x3;
00081 q31_t y1, y2;
00082 q15_t *pOut = pDst;
00083 q15_t *pScr1 = pScratch1;
00084 q15_t *pScr2 = pScratch2;
00085 q15_t *pIn1;
00086 q15_t *pIn2;
00087 q15_t *px;
00088 q15_t *py;
00089 uint32_t j, k, blkCnt;
00090 uint32_t tapCnt;
00091
00092
00093
00094
00095 if(srcALen >= srcBLen)
00096 {
00097
00098 pIn1 = pSrcA;
00099
00100
00101 pIn2 = pSrcB;
00102 }
00103 else
00104 {
00105
00106 pIn1 = pSrcB;
00107
00108
00109 pIn2 = pSrcA;
00110
00111
00112 j = srcBLen;
00113 srcBLen = srcALen;
00114 srcALen = j;
00115 }
00116
00117
00118 pScr2 = pScratch2 + srcBLen - 1;
00119
00120
00121 px = pIn2;
00122
00123
00124 k = srcBLen >> 2u;
00125
00126
00127
00128
00129
00130 while(k > 0u)
00131 {
00132
00133 *pScr2-- = *px++;
00134 *pScr2-- = *px++;
00135 *pScr2-- = *px++;
00136 *pScr2-- = *px++;
00137
00138
00139 k--;
00140 }
00141
00142
00143
00144 k = srcBLen % 0x4u;
00145
00146 while(k > 0u)
00147 {
00148
00149 *pScr2-- = *px++;
00150
00151
00152 k--;
00153 }
00154
00155
00156 pScr1 = pScratch1;
00157
00158
00159
00160 arm_fill_q15(0, pScr1, (srcBLen - 1u));
00161
00162
00163 pScr1 += (srcBLen - 1u);
00164
00165
00166
00167 #ifndef UNALIGNED_SUPPORT_DISABLE
00168
00169
00170 arm_copy_q15(pIn1, pScr1, srcALen);
00171
00172
00173 pScr1 += srcALen;
00174
00175 #else
00176
00177
00178 k = srcALen >> 2u;
00179
00180
00181
00182 while(k > 0u)
00183 {
00184
00185 *pScr1++ = *pIn1++;
00186 *pScr1++ = *pIn1++;
00187 *pScr1++ = *pIn1++;
00188 *pScr1++ = *pIn1++;
00189
00190
00191 k--;
00192 }
00193
00194
00195
00196 k = srcALen % 0x4u;
00197
00198 while(k > 0u)
00199 {
00200
00201 *pScr1++ = *pIn1++;
00202
00203
00204 k--;
00205 }
00206
00207 #endif
00208
00209
00210 #ifndef UNALIGNED_SUPPORT_DISABLE
00211
00212
00213 arm_fill_q15(0, pScr1, (srcBLen - 1u));
00214
00215
00216 pScr1 += (srcBLen - 1u);
00217
00218 #else
00219
00220
00221 k = (srcBLen - 1u) >> 2u;
00222
00223
00224
00225 while(k > 0u)
00226 {
00227
00228 *pScr1++ = 0;
00229 *pScr1++ = 0;
00230 *pScr1++ = 0;
00231 *pScr1++ = 0;
00232
00233
00234 k--;
00235 }
00236
00237
00238
00239 k = (srcBLen - 1u) % 0x4u;
00240
00241 while(k > 0u)
00242 {
00243
00244 *pScr1++ = 0;
00245
00246
00247 k--;
00248 }
00249
00250 #endif
00251
00252
00253 py = pScratch2;
00254
00255
00256
00257 pIn2 = py;
00258
00259
00260
00261
00262
00263 blkCnt = (srcALen + srcBLen - 1u) >> 2;
00264
00265 while(blkCnt > 0)
00266 {
00267
00268 pScr1 = pScratch1;
00269
00270
00271 acc0 = 0;
00272 acc1 = 0;
00273 acc2 = 0;
00274 acc3 = 0;
00275
00276
00277 x1 = *__SIMD32(pScr1)++;
00278
00279
00280 x2 = *__SIMD32(pScr1)++;
00281
00282 tapCnt = (srcBLen) >> 2u;
00283
00284 while(tapCnt > 0u)
00285 {
00286
00287
00288 y1 = _SIMD32_OFFSET(pIn2);
00289 y2 = _SIMD32_OFFSET(pIn2 + 2u);
00290
00291
00292 acc0 = __SMLAD(x1, y1, acc0);
00293 acc2 = __SMLAD(x2, y1, acc2);
00294
00295
00296 #ifndef ARM_MATH_BIG_ENDIAN
00297 x3 = __PKHBT(x2, x1, 0);
00298 #else
00299 x3 = __PKHBT(x1, x2, 0);
00300 #endif
00301
00302
00303 acc1 = __SMLADX(x3, y1, acc1);
00304
00305
00306 x1 = _SIMD32_OFFSET(pScr1);
00307
00308
00309 acc0 = __SMLAD(x2, y2, acc0);
00310 acc2 = __SMLAD(x1, y2, acc2);
00311
00312
00313 #ifndef ARM_MATH_BIG_ENDIAN
00314 x3 = __PKHBT(x1, x2, 0);
00315 #else
00316 x3 = __PKHBT(x2, x1, 0);
00317 #endif
00318
00319 acc3 = __SMLADX(x3, y1, acc3);
00320 acc1 = __SMLADX(x3, y2, acc1);
00321
00322 x2 = _SIMD32_OFFSET(pScr1 + 2u);
00323
00324 #ifndef ARM_MATH_BIG_ENDIAN
00325 x3 = __PKHBT(x2, x1, 0);
00326 #else
00327 x3 = __PKHBT(x1, x2, 0);
00328 #endif
00329
00330 acc3 = __SMLADX(x3, y2, acc3);
00331
00332
00333 pIn2 += 4u;
00334 pScr1 += 4u;
00335
00336
00337
00338 tapCnt--;
00339 }
00340
00341
00342 pScr1 -= 4u;
00343
00344
00345 tapCnt = (srcBLen) & 3u;
00346
00347 while(tapCnt > 0u)
00348 {
00349
00350
00351 acc0 += (*pScr1++ * *pIn2);
00352 acc1 += (*pScr1++ * *pIn2);
00353 acc2 += (*pScr1++ * *pIn2);
00354 acc3 += (*pScr1++ * *pIn2++);
00355
00356 pScr1 -= 3u;
00357
00358
00359 tapCnt--;
00360 }
00361
00362 blkCnt--;
00363
00364
00365
00366
00367 #ifndef ARM_MATH_BIG_ENDIAN
00368
00369 *__SIMD32(pOut)++ =
00370 #ifdef CCS
00371 __PKHBT(__SSATA(acc0, 15, 16), __SSATA(acc1, 15, 16), 16);
00372
00373 *__SIMD32(pOut)++ =
00374 __PKHBT(__SSATA(acc2, 15, 16), __SSATA(acc3, 15, 16), 16);
00375 #else
00376 __PKHBT(__SSAT((acc0 >> 15), 16), __SSAT((acc1 >> 15), 16), 16);
00377
00378 *__SIMD32(pOut)++ =
00379 __PKHBT(__SSAT((acc2 >> 15), 16), __SSAT((acc3 >> 15), 16), 16);
00380 #endif
00381
00382 #else
00383
00384 *__SIMD32(pOut)++ =
00385 #ifdef CCS
00386 __PKHBT(__SSATA(acc1, 15, 16), __SSATA(acc0, 15, 16), 16);
00387
00388 *__SIMD32(pOut)++ =
00389 __PKHBT(__SSATA(acc3, 15, 16), __SSATA(acc2, 15, 16), 16);
00390 #else
00391 __PKHBT(__SSAT((acc1 >> 15), 16), __SSAT((acc0 >> 15), 16), 16);
00392
00393 *__SIMD32(pOut)++ =
00394 __PKHBT(__SSAT((acc3 >> 15), 16), __SSAT((acc2 >> 15), 16), 16);
00395
00396 #endif
00397
00398 #endif
00399
00400
00401 pIn2 = py;
00402
00403 pScratch1 += 4u;
00404
00405 }
00406
00407
00408 blkCnt = (srcALen + srcBLen - 1u) & 0x3;
00409
00410
00411 while(blkCnt > 0)
00412 {
00413
00414 pScr1 = pScratch1;
00415
00416
00417 acc0 = 0;
00418
00419 tapCnt = (srcBLen) >> 1u;
00420
00421 while(tapCnt > 0u)
00422 {
00423
00424
00425 x1 = *__SIMD32(pScr1)++;
00426
00427
00428 y1 = *__SIMD32(pIn2)++;
00429
00430 acc0 = __SMLAD(x1, y1, acc0);
00431
00432
00433 tapCnt--;
00434 }
00435
00436 tapCnt = (srcBLen) & 1u;
00437
00438
00439 while(tapCnt > 0u)
00440 {
00441
00442
00443 acc0 += (*pScr1++ * *pIn2++);
00444
00445
00446 tapCnt--;
00447 }
00448
00449 blkCnt--;
00450
00451
00452
00453 #ifdef CCS
00454 *pOut++ = (q15_t) (__SSATA(acc0, 15, 16));
00455 #else
00456 *pOut++ = (q15_t) (__SSAT((acc0 >> 15), 16));
00457 #endif
00458
00459
00460
00461 pIn2 = py;
00462
00463 pScratch1 += 1u;
00464
00465 }
00466
00467 }
00468