Cortex-R DSP Software Library: arm_dot_prod

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------      
00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 
00003 *      
00004 * $Date:        15. December 2011   
00005 * $Revision:    V2.0.0  
00006 *      
00007 * Project:      Cortex-R DSP Library 
00008 * Title:        arm_dot_prod_q31.c      
00009 *      
00010 * Description:  Q31 dot product.      
00011 *      
00012 * Target Processor:          Cortex-R4/R5
00013 *
00014 * Version 1.0.0 2011/03/08
00015 *     Alpha release.
00016 *
00017 * Version 1.0.1 2011/09/30
00018 *     Beta release.
00019 *
00020 * Version 2.0.0 2011/12/15
00021 *     Final release. 
00022 * 
00023 * -------------------------------------------------------------------- */     
00024 #include "arm_math.h"     
00025      
00053 void arm_dot_prod_q31(     
00054   q31_t * pSrcA,     
00055   q31_t * pSrcB,     
00056   uint32_t blockSize,     
00057   q63_t * result)     
00058 {     
00059   q63_t sum = 0, sum1 = 0;                       /* Temporary result storage */     
00060   uint32_t blkCnt;                               /* loop counter */     
00061   q31_t inA1, inA2, inB1, inB2;                  /* Temporary variables to store input data */  
00062     
00063   /*loop Unrolling */     
00064   blkCnt = blockSize >> 3u;     
00065      
00066   /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.      
00067    ** a second loop below computes the remaining 1 to 7 samples. */     
00068   while(blkCnt > 0u)     
00069   {     
00070     /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */     
00071     /* Calculate dot product and then store the result in a temporary buffer. */   
00072   
00073     /* read input from sourceA buffer */  
00074     inA1 = *pSrcA;  
00075     /* read input from sourceB buffer */  
00076     inB1 = *pSrcB;  
00077     inA2 = *(pSrcA + 1);  
00078     inB2 = *(pSrcB + 1);  
00079   
00080     /* multiply and accumulate in 16.48 format */  
00081     sum += ((q63_t)inA1 * inB1) >> 14u;     
00082     sum1 += ((q63_t)inA2 * inB2) >> 14u;   
00083         
00084     inA1 = *(pSrcA + 2);  
00085     inB1 = *(pSrcB + 2);  
00086     inA2 = *(pSrcA + 3);  
00087     inB2 = *(pSrcB + 3);  
00088     /* multiply and accumulate in 16.48 format */  
00089     sum += ((q63_t)inA1 * inB1) >> 14u;     
00090     sum1 += ((q63_t)inA2 * inB2) >> 14u;   
00091         
00092     inA1 = *(pSrcA + 4);  
00093     inB1 = *(pSrcB + 4);  
00094     inA2 = *(pSrcA + 5);  
00095     inB2 = *(pSrcB + 5);  
00096     /* multiply and accumulate in 16.48 format */  
00097     sum += ((q63_t)inA1 * inB1) >> 14u;     
00098     sum1 += ((q63_t)inA2 * inB2) >> 14u;  
00099          
00100     inA1 = *(pSrcA + 6);  
00101     inB1 = *(pSrcB + 6);  
00102     inA2 = *(pSrcA + 7);  
00103     inB2 = *(pSrcB + 7);  
00104     /* multiply and accumulate in 16.48 format */  
00105     sum += ((q63_t)inA1 * inB1) >> 14u;     
00106     sum1 += ((q63_t)inA2 * inB2) >> 14u;   
00107         
00108     /* increment sourceA pointer by 8 */  
00109     pSrcA += 8u;  
00110     /* increment sourceB pointer bhy 8 */  
00111     pSrcB += 8u;     
00112     
00113     /* Decrement the loop counter */     
00114     blkCnt--;     
00115   }     
00116   sum = sum + sum1;  
00117   /* If the blockSize is not a multiple of 8, compute any remaining output samples here.      
00118    ** No loop unrolling is used. */     
00119   blkCnt = blockSize % 0x8u;     
00120      
00121   while(blkCnt > 0u)     
00122   {     
00123     /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */     
00124     /* Calculate dot product and then store the result in a temporary buffer. */     
00125     sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;     
00126      
00127     /* Decrement the loop counter */     
00128     blkCnt--;     
00129   }     
00130      
00131   /* Store the result in the destination buffer in 16.48 format */     
00132   *result = sum;     
00133 }     
00134