Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00076 arm_status arm_mat_mult_fast_q15(
00077 const arm_matrix_instance_q15 * pSrcA,
00078 const arm_matrix_instance_q15 * pSrcB,
00079 arm_matrix_instance_q15 * pDst,
00080 q15_t * pScratch)
00081 {
00082 q31_t sum;
00083 q31_t in;
00084 q15_t *pSrcBT = pScratch;
00085 q15_t *pInA = pSrcA->pData;
00086 q15_t *pInB = pSrcB->pData;
00087 q15_t *px;
00088 uint16_t numRowsA = pSrcA->numRows;
00089 uint16_t numColsB = pSrcB->numCols;
00090 uint16_t numColsA = pSrcA->numCols;
00091 uint16_t numRowsB = pSrcB->numRows;
00092 uint16_t col, i = 0u, row = numRowsB, colCnt;
00093 arm_status status;
00094
00095 #ifdef UNALIGNED_SUPPORT_DISABLE
00096
00097 q15_t inA1, inA2, inB1, inB2;
00098
00099 #else
00100
00101 q31_t inA1, inA2, inB1, inB2;
00102
00103 #endif // #ifdef UNALIGNED_SUPPORT_DISABLE
00104
00105 #ifdef ARM_MATH_MATRIX_CHECK
00106
00107 if((pSrcA->numCols != pSrcB->numRows) ||
00108 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
00109 {
00110
00111 status = ARM_MATH_SIZE_MISMATCH;
00112 }
00113 else
00114 #endif
00115 {
00116
00117 do
00118 {
00119
00120 col = numColsB >> 2;
00121
00122
00123 px = pSrcBT + i;
00124
00125
00126
00127 while(col > 0u)
00128 {
00129
00130 in = *__SIMD32(pInB)++;
00131
00132
00133 #ifndef ARM_MATH_BIG_ENDIAN
00134
00135 *px = (q15_t) in;
00136
00137 #else
00138
00139 *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
00140
00141 #endif
00142
00143
00144 px += numRowsB;
00145
00146
00147 #ifndef ARM_MATH_BIG_ENDIAN
00148
00149 *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
00150
00151 #else
00152
00153 *px = (q15_t) in;
00154
00155 #endif
00156
00157
00158 px += numRowsB;
00159
00160
00161 in = *__SIMD32(pInB)++;
00162
00163
00164 #ifndef ARM_MATH_BIG_ENDIAN
00165
00166 *px = (q15_t) in;
00167
00168 #else
00169
00170 *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
00171
00172 #endif
00173
00174
00175 px += numRowsB;
00176
00177
00178
00179 #ifndef ARM_MATH_BIG_ENDIAN
00180
00181 *px = (q15_t) ((in & (q31_t) 0xffff0000) >> 16);
00182
00183 #else
00184
00185 *px = (q15_t) in;
00186
00187 #endif
00188
00189
00190 px += numRowsB;
00191
00192
00193 col--;
00194 }
00195
00196
00197
00198 col = numColsB % 0x4u;
00199
00200 while(col > 0u)
00201 {
00202
00203 *px = *pInB++;
00204
00205
00206 px += numRowsB;
00207
00208
00209 col--;
00210 }
00211
00212 i++;
00213
00214
00215 row--;
00216
00217 } while(row > 0u);
00218
00219
00220 row = numRowsA;
00221 i = 0u;
00222 px = pDst->pData;
00223
00224
00225
00226 do
00227 {
00228
00229 col = numColsB;
00230
00231
00232
00233 pInB = pSrcBT;
00234
00235
00236 do
00237 {
00238
00239 sum = 0;
00240
00241
00242 colCnt = numColsA >> 2;
00243
00244
00245 pInA = pSrcA->pData + i;
00246
00247
00248 while(colCnt > 0u)
00249 {
00250
00251 #ifdef UNALIGNED_SUPPORT_DISABLE
00252
00253 inA1 = *pInA++;
00254 inB1 = *pInB++;
00255 inA2 = *pInA++;
00256 sum += inA1 * inB1;
00257 inB2 = *pInB++;
00258
00259 inA1 = *pInA++;
00260 inB1 = *pInB++;
00261 sum += inA2 * inB2;
00262 inA2 = *pInA++;
00263 inB2 = *pInB++;
00264
00265 sum += inA1 * inB1;
00266 sum += inA2 * inB2;
00267
00268 #else
00269
00270 inA1 = *__SIMD32(pInA)++;
00271 inB1 = *__SIMD32(pInB)++;
00272 inA2 = *__SIMD32(pInA)++;
00273 inB2 = *__SIMD32(pInB)++;
00274
00275 sum = __SMLAD(inA1, inB1, sum);
00276 sum = __SMLAD(inA2, inB2, sum);
00277
00278 #endif // #ifdef UNALIGNED_SUPPORT_DISABLE
00279
00280
00281 colCnt--;
00282 }
00283
00284
00285 colCnt = numColsA % 0x4u;
00286
00287 while(colCnt > 0u)
00288 {
00289
00290 sum += (q31_t) (*pInA++) * (*pInB++);
00291
00292 colCnt--;
00293 }
00294
00295
00296 *px = (q15_t) (sum >> 15);
00297 px++;
00298
00299
00300 col--;
00301
00302 } while(col > 0u);
00303
00304 i = i + numColsA;
00305
00306
00307 row--;
00308
00309 } while(row > 0u);
00310
00311
00312 status = ARM_MATH_SUCCESS;
00313 }
00314
00315
00316 return (status);
00317 }
00318