Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "arm_math.h"
00025
00053 void arm_mult_q7(
00054 q7_t * pSrcA,
00055 q7_t * pSrcB,
00056 q7_t * pDst,
00057 uint32_t blockSize)
00058 {
00059 uint32_t blkCnt;
00060 q7_t inA1, inA2, inA3, inA4;
00061 q7_t inB1, inB2, inB3, inB4;
00062 q15_t out1, out2, out3, out4;
00063
00064
00065 blkCnt = blockSize >> 3u;
00066
00067
00068
00069 while(blkCnt > 0u)
00070 {
00071
00072
00073
00074 inA1 = *pSrcA;
00075
00076 inB1 = *pSrcB;
00077
00078 inA2 = *(pSrcA + 1);
00079
00080 inB2 = *(pSrcB + 1);
00081
00082
00083 out1 = (q15_t)((q7_t) (inA1) * (inB1));
00084
00085
00086 inA3 = *(pSrcA + 2);
00087
00088 inB3 = *(pSrcB + 2);
00089
00090
00091 out2 = (q15_t)((q7_t) (inA2) * (inB2));
00092
00093
00094 inA4 = *(pSrcA + 3);
00095
00096
00097 out1 = out1 >> 7u;
00098
00099
00100 inB4 = *(pSrcB + 3);
00101
00102
00103 out3 = (q15_t)((q7_t) (inA3) * (inB3));
00104 out4 = (q15_t)((q7_t) (inA4) * (inB4));
00105
00106
00107 out2 = out2 >> 7u;
00108
00109
00110 inA1 = *(pSrcA + 4);
00111
00112
00113 out3 = out3 >> 7u;
00114
00115
00116 inB1 = *(pSrcB + 4);
00117
00118
00119 out4 = out4 >> 7u;
00120
00121
00122 #ifdef CCS
00123
00124 inA3 = (q7_t) __SSATA(out1, 0, 8);
00125 inA4 = (q7_t) __SSATA(out2, 0, 8);
00126 inB3 = (q7_t) __SSATA(out3, 0, 8);
00127 inB4 = (q7_t) __SSATA(out4, 0, 8);
00128
00129 #else
00130
00131 inA3 = (q7_t) __SSAT(out1, 8);
00132 inA4 = (q7_t) __SSAT(out2, 8);
00133 inB3 = (q7_t) __SSAT(out3, 8);
00134 inB4 = (q7_t) __SSAT(out4, 8);
00135
00136 #endif // #ifdef CCS
00137
00138
00139 *__SIMD32(pDst)++ = __PACKq7(inA3, inA4, inB3, inB4);
00140
00141
00142 inA2 = *(pSrcA + 5);
00143
00144 inB2 = *(pSrcB + 5);
00145
00146
00147 out1 = (q15_t)((q7_t) (inA1) * (inB1));
00148
00149
00150 inA3 = *(pSrcA + 6);
00151
00152 inB3 = *(pSrcB + 6);
00153
00154
00155 out2 = (q15_t)((q7_t) (inA2) * (inB2));
00156
00157
00158 inA4 = *(pSrcA + 7);
00159
00160
00161 out1 = out1 >> 7u;
00162
00163
00164 inB4 = *(pSrcB + 7);
00165
00166
00167 out3 = (q15_t)((q7_t) (inA3) * (inB3));
00168 out4 = (q15_t)((q7_t) (inA4) * (inB4));
00169
00170
00171 out2 = out2 >> 7u;
00172 out3 = out3 >> 7u;
00173 out4 = out4 >> 7u;
00174
00175
00176 #ifdef CCS
00177
00178 inA1 = (q7_t) __SSATA(out1, 0, 8);
00179 inA2 = (q7_t) __SSATA(out2, 0, 8);
00180 inA3 = (q7_t) __SSATA(out3, 0, 8);
00181 inA4 = (q7_t) __SSATA(out4, 0, 8);
00182
00183 #else
00184
00185 inA1 = (q7_t) __SSAT(out1, 8);
00186 inA2 = (q7_t) __SSAT(out2, 8);
00187 inA3 = (q7_t) __SSAT(out3, 8);
00188 inA4 = (q7_t) __SSAT(out4, 8);
00189
00190 #endif // #ifdef CCS
00191
00192
00193 *__SIMD32(pDst)++ = __PACKq7(inA1, inA2, inA3, inA4);
00194
00195
00196 pSrcA += 8u;
00197
00198 pSrcB += 8u;
00199
00200
00201 blkCnt--;
00202 }
00203
00204
00205
00206 blkCnt = blockSize % 0x8u;
00207
00208 while(blkCnt > 0u)
00209 {
00210
00211
00212 *pDst++ = (q7_t) (((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7);
00213
00214
00215 blkCnt--;
00216 }
00217 }
00218