00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "arm_math.h"
00026
00056 arm_status arm_mat_scale_q15(
00057 const arm_matrix_instance_q15 * pSrc,
00058 q15_t scaleFract,
00059 int32_t shift,
00060 arm_matrix_instance_q15 * pDst)
00061 {
00062 q15_t *pIn = pSrc->pData;
00063 q15_t *pOut = pDst->pData;
00064 uint32_t numSamples;
00065 int32_t totShift = 15 - shift;
00066 uint32_t blkCnt;
00067 arm_status status;
00068 q15_t in1, in2, in3, in4;
00069 q31_t out1, out2, out3 ,out4;
00070 q31_t inA1, inA2;
00071
00072 #ifdef ARM_MATH_MATRIX_CHECK
00073
00074 if((pSrc->numRows != pDst->numRows) || (pSrc->numCols != pDst->numCols))
00075 {
00076
00077 status = ARM_MATH_SIZE_MISMATCH;
00078 }
00079 else
00080 #endif
00081 {
00082
00083 numSamples = (uint32_t) pSrc->numRows * pSrc->numCols;
00084
00085
00086 blkCnt = numSamples >> 3u;
00087
00088
00089
00090 while(blkCnt > 0u)
00091 {
00092
00093
00094
00095 inA1 = _SIMD32_OFFSET(pIn);
00096 inA2 = _SIMD32_OFFSET(pIn + 2);
00097
00098
00099
00100
00101 out1 = (q31_t)((q15_t)(inA1 >> 16) * scaleFract);
00102 out2 = (q31_t)((q15_t)inA1 * scaleFract);
00103 out3 = (q31_t)((q15_t)(inA2 >> 16) * scaleFract);
00104 out4 = (q31_t)((q15_t)inA2 * scaleFract);
00105
00106 out1 = out1 >> totShift;
00107 inA1 = _SIMD32_OFFSET(pIn + 4);
00108 out2 = out2 >> totShift;
00109 inA2 = _SIMD32_OFFSET(pIn + 6);
00110 out3 = out3 >> totShift;
00111 out4 = out4 >> totShift;
00112
00113 #ifdef CCS
00114 in1 = (q15_t)(__SSATA(out1, 0, 16));
00115 in2 = (q15_t)(__SSATA(out2, 0, 16));
00116 in3 = (q15_t)(__SSATA(out3, 0, 16));
00117 in4 = (q15_t)(__SSATA(out4, 0, 16));
00118 #else
00119 in1 = (q15_t)(__SSAT(out1, 16));
00120 in2 = (q15_t)(__SSAT(out2, 16));
00121 in3 = (q15_t)(__SSAT(out3, 16));
00122 in4 = (q15_t)(__SSAT(out4, 16));
00123 #endif
00124
00125
00126
00127
00128
00129 _SIMD32_OFFSET(pOut) = __PKHBT(in2, in1,16);
00130 _SIMD32_OFFSET(pOut + 2) = __PKHBT(in4, in3,16);
00131
00132
00133
00134
00135
00136
00137 out1 = (q31_t)((q15_t)(inA1 >> 16) * scaleFract);
00138 out2 = (q31_t)((q15_t)inA1 * scaleFract);
00139 out3 = (q31_t)((q15_t)(inA2 >> 16) * scaleFract);
00140 out4 = (q31_t)((q15_t)inA2 * scaleFract);
00141
00142 out1 = out1 >> totShift;
00143 out2 = out2 >> totShift;
00144 out3 = out3 >> totShift;
00145 out4 = out4 >> totShift;
00146
00147 #ifdef CCS
00148 in1 = (q15_t)(__SSATA(out1, 0, 16));
00149 in2 = (q15_t)(__SSATA(out2, 0, 16));
00150 in3 = (q15_t)(__SSATA(out3, 0, 16));
00151 in4 = (q15_t)(__SSATA(out4, 0, 16));
00152 #else
00153 in1 = (q15_t)(__SSAT(out1, 16));
00154 in2 = (q15_t)(__SSAT(out2, 16));
00155 in3 = (q15_t)(__SSAT(out3, 16));
00156 in4 = (q15_t)(__SSAT(out4, 16));
00157 #endif
00158
00159
00160
00161
00162
00163 _SIMD32_OFFSET(pOut + 4) = __PKHBT(in2, in1,16);
00164 _SIMD32_OFFSET(pOut + 6) = __PKHBT(in4, in3,16);
00165
00166 pIn += 8u;
00167 pOut += 8u;
00168
00169
00170 blkCnt--;
00171 }
00172
00173
00174
00175 blkCnt = numSamples % 0x8u;
00176
00177 while(blkCnt > 0u)
00178 {
00179
00180
00181 #ifdef CCS
00182 *pOut++ =
00183 (q15_t) (__SSATA(((q31_t) (*pIn++) * scaleFract) >> totShift, 0, 16));
00184 #else
00185 *pOut++ =
00186 (q15_t) (__SSAT(((q31_t) (*pIn++) * scaleFract) >> totShift, 16));
00187 #endif // #ifdef CCS
00188
00189
00190 blkCnt--;
00191 }
00192
00193
00194 status = ARM_MATH_SUCCESS;
00195 }
00196
00197
00198 return (status);
00199 }
00200