Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00066 arm_status arm_mat_mult_fast_q31(
00067 const arm_matrix_instance_q31 * pSrcA,
00068 const arm_matrix_instance_q31 * pSrcB,
00069 arm_matrix_instance_q31 * pDst)
00070 {
00071 q31_t *pIn1 = pSrcA->pData;
00072 q31_t *pIn2 = pSrcB->pData;
00073 q31_t *pInA = pSrcA->pData;
00074
00075 q31_t *pOut = pDst->pData;
00076 q31_t *px;
00077 q31_t sum;
00078 uint16_t numRowsA = pSrcA->numRows;
00079 uint16_t numColsB = pSrcB->numCols;
00080 uint16_t numColsA = pSrcA->numCols;
00081 uint16_t col, i = 0u, j, row = numRowsA, colCnt;
00082 arm_status status;
00083 q31_t inA1, inA2, inA3, inA4, inB1, inB2, inB3, inB4;
00084
00085 #ifdef ARM_MATH_MATRIX_CHECK
00086
00087 if((pSrcA->numCols != pSrcB->numRows) ||
00088 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols))
00089 {
00090
00091 status = ARM_MATH_SIZE_MISMATCH;
00092 }
00093 else
00094 #endif
00095 {
00096
00097
00098 do
00099 {
00100
00101 px = pOut + i;
00102
00103
00104 col = numColsB;
00105
00106
00107
00108 pIn2 = pSrcB->pData;
00109
00110 j = 0u;
00111
00112
00113 do
00114 {
00115
00116 sum = 0;
00117
00118
00119 pIn1 = pInA;
00120
00121
00122 colCnt = numColsA >> 2;
00123
00124
00125
00126 while(colCnt > 0u)
00127 {
00128
00129
00130 inB1 = *pIn2;
00131 pIn2 += numColsB;
00132
00133 inA1 = pIn1[0];
00134 inA2 = pIn1[1];
00135
00136 inB2 = *pIn2;
00137 pIn2 += numColsB;
00138
00139 inB3 = *pIn2;
00140 pIn2 += numColsB;
00141
00142 sum = (q31_t) ((((q63_t) sum << 32) +
00143 ((q63_t) inA1 * inB1)) >> 32);
00144 sum = (q31_t) ((((q63_t) sum << 32) +
00145 ((q63_t) inA2 * inB2)) >> 32);
00146
00147 inA3 = pIn1[2];
00148 inA4 = pIn1[3];
00149
00150 inB4 = *pIn2;
00151 pIn2 += numColsB;
00152
00153 sum = (q31_t) ((((q63_t) sum << 32) +
00154 ((q63_t) inA3 * inB3)) >> 32);
00155 sum = (q31_t) ((((q63_t) sum << 32) +
00156 ((q63_t) inA4 * inB4)) >> 32);
00157
00158 pIn1 += 4u;
00159
00160
00161 colCnt--;
00162 }
00163
00164
00165
00166 colCnt = numColsA % 0x4u;
00167
00168 while(colCnt > 0u)
00169 {
00170
00171 inA1 = *pIn2;
00172 inB1 = *pIn1++;
00173 pIn2 += numColsB;
00174
00175 sum = (q31_t) ((((q63_t) sum << 32) +
00176 ((q63_t)inA1 * inB1)) >> 32);
00177
00178
00179 colCnt--;
00180 }
00181
00182
00183 *px++ = sum << 1;
00184
00185
00186 j++;
00187 pIn2 = pSrcB->pData + j;
00188
00189
00190 col--;
00191
00192 } while(col > 0u);
00193
00194
00195 i = i + numColsB;
00196 pInA = pInA + numColsA;
00197
00198
00199 row--;
00200
00201 } while(row > 0u);
00202
00203
00204 status = ARM_MATH_SUCCESS;
00205 }
00206
00207 return (status);
00208 }
00209