00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_std_q15.c 00009 * 00010 * Description: Standard deviation of an array of Q15 type. 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * -------------------------------------------------------------------- */ 00024 #include "arm_math.h" 00025 00060 void arm_std_q15( 00061 q15_t * pSrc, 00062 uint32_t blockSize, 00063 q15_t * pResult) 00064 { 00065 q63_t sum = 0; /* Accumulator */ 00066 q31_t meanOfSquares, squareOfMean; /* square of mean and mean of square */ 00067 q15_t mean; /* mean */ 00068 q31_t in1, in2, in3, in4; /* temporary variables to hold input value */ 00069 q15_t in; /* temporary input value */ 00070 uint32_t blkCnt; /* loop counter */ 00071 q15_t t; /* Temporary variable */ 00072 q63_t sumSquare = 0; 00073 q31_t one = 0x7FFF7FFF; 00074 00075 /*loop Unrolling */ 00076 blkCnt = blockSize >> 3u; 00077 00078 /* First part of the processing with loop unrolling. Compute 8 outputs at a time. 00079 ** a second loop below computes the remaining 1 to 7 samples. */ 00080 while(blkCnt > 0u) 00081 { 00082 /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 00083 /* Compute Sum of squares of the input samples 00084 * and then store the result in a temporary variable, sum. */ 00085 /* read two samples from source buffer */ 00086 in1 = _SIMD32_OFFSET(pSrc); 00087 in2 = _SIMD32_OFFSET(pSrc + 2); 00088 00089 /* calculate square and accumulate to accumulator */ 00090 sumSquare = __SMLALD(in1, in1, sumSquare); 00091 /* calculate sum of inputs by multiplying input with 1 and accumulate to accumulator */ 00092 sum = __SMLALD(in1, one, sum); 00093 00094 /* calculate square and accumulate to accumulator */ 00095 sumSquare = __SMLALD(in2, in2, sumSquare); 00096 00097 /* read two samples from source buffer */ 00098 in3 = _SIMD32_OFFSET(pSrc + 4); 00099 00100 /* calculate sum of inputs by multiplying input with 1 and accumulate to accumulator */ 00101 sum = __SMLALD(in2, one, sum); 00102 00103 /* read two samples from source buffer */ 00104 in4 = _SIMD32_OFFSET(pSrc + 6); 00105 00106 /* calculate square and accumulate to accumulator */ 00107 sumSquare = __SMLALD(in3, in3, sumSquare); 00108 00109 /* calculate sum of inputs by multiplying input with 1 and accumulate to accumulator */ 00110 sum = __SMLALD(in3, one, sum); 00111 00112 /* calculate square and accumulate to accumulator */ 00113 sumSquare = __SMLALD(in4, in4, sumSquare); 00114 00115 /* calculate sum of inputs by multiplying input with 1 and accumulate to accumulator */ 00116 sum = __SMLALD(in4, one, sum); 00117 00118 /* update pointer to process next samples */ 00119 pSrc += 8u; 00120 00121 /* Decrement the loop counter */ 00122 blkCnt--; 00123 } 00124 00125 /* If the blockSize is not a multiple of 8, compute any remaining output samples here. 00126 ** No loop unrolling is used. */ 00127 blkCnt = blockSize % 0x8u; 00128 00129 while(blkCnt > 0u) 00130 { 00131 /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */ 00132 /* Compute Sum of squares of the input samples 00133 * and then store the result in a temporary variable, sum. */ 00134 in = *pSrc++; 00135 sumSquare = __SMLALD(in, in, sumSquare); 00136 sum = __SMLALD(in, one, sum); 00137 00138 /* Decrement the loop counter */ 00139 blkCnt--; 00140 } 00141 00142 /* Compute Mean of squares of the input samples 00143 * and then store the result in a temporary variable, meanOfSquares. */ 00144 t = (q15_t) ((1.0 / (blockSize - 1)) * 16384LL); 00145 00146 #ifdef CCS 00147 meanOfSquares = __SSATA((sumSquare), 15u, 16u); 00148 #else 00149 meanOfSquares = __SSAT((sumSquare >> 15u), 16u); 00150 #endif 00151 00152 meanOfSquares = (q31_t) ((meanOfSquares * t) >> 14u); 00153 00154 /* Compute mean of all input values */ 00155 t = (q15_t) ((1.0 / (blockSize * (blockSize - 1))) * 32768LL); 00156 00157 #ifdef CCS 00158 mean = (q15_t) __SSATA(sum, 15u, 16u); 00159 #else 00160 mean = (q15_t) __SSAT(sum >> 15u, 16u); 00161 #endif 00162 00163 /* Compute square of mean */ 00164 squareOfMean = ((q31_t) mean * mean) >> 15; 00165 squareOfMean = (q31_t) (((q63_t) squareOfMean * t) >> 15); 00166 00167 /* mean of the squares minus the square of the mean. */ 00168 in1 = (q15_t) (meanOfSquares - squareOfMean); 00169 00170 /* Compute standard deviation and store the result to the destination */ 00171 arm_sqrt_q15(in1, pResult); 00172 } 00173