Cortex-R DSP Software Library: arm_offset

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------      
00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 
00003 *      
00004 * $Date:        15. December 2011   
00005 * $Revision:    V2.0.0  
00006 *      
00007 * Project:      Cortex-R DSP Library 
00008 * Title:        arm_offset_q7.c      
00009 *      
00010 * Description:  Q7 vector offset.      
00011 *      
00012 * Target Processor:          Cortex-R4/R5
00013 *
00014 * Version 1.0.0 2011/03/08
00015 *     Alpha release.
00016 *
00017 * Version 1.0.1 2011/09/30
00018 *     Beta release.
00019 *
00020 * Version 2.0.0 2011/12/15
00021 *     Final release. 
00022 * 
00023 * -------------------------------------------------------------------- */     
00024 #include "arm_math.h"     
00025      
00053 void arm_offset_q7(     
00054   q7_t * pSrc,     
00055   q7_t offset,     
00056   q7_t * pDst,     
00057   uint32_t blockSize)     
00058 {     
00059   uint32_t blkCnt;                               /* loop counter */     
00060   q31_t offset_packed;                           /* Offset packed to 32 bit */     
00061   q31_t in1, in2, in3, in4;                      /* Temporary variabels */  
00062   q7_t in;                                       /* Temporary variable */  
00063      
00064   /*loop Unrolling */     
00065   blkCnt = blockSize >> 4u;     
00066      
00067   /* Offset is packed to 32 bit in order to use SIMD32 for addition */     
00068   offset_packed = __PACKq7(offset, offset, offset, offset);     
00069      
00070   /* First part of the processing with loop unrolling.  Compute 8 outputs at a time.      
00071    ** a second loop below computes the remaining 1 to 7 samples. */     
00072   while(blkCnt > 0u)     
00073   {     
00074     /* C = A + offset */     
00075     /* Add offset and then store the results in the destination bufferfor 8 samples at a time. */     
00076     /* read 4 samples at a time from the source */  
00077     in1 = _SIMD32_OFFSET(pSrc);  
00078     in2 = _SIMD32_OFFSET(pSrc + 4);  
00079   
00080     /* added offset to the four samples at a time */  
00081     in1 = __QADD8(in1, offset_packed);  
00082   
00083     /* read 4 samples at a time from the source */  
00084     in3 = _SIMD32_OFFSET(pSrc + 8);  
00085   
00086     /* added offset to the four samples at a time */  
00087     in2 = __QADD8(in2, offset_packed);  
00088   
00089     /* read 4 samples at a time from the source */  
00090     in4 = _SIMD32_OFFSET(pSrc + 12);  
00091   
00092     /* store result to the destination four samples at a time */  
00093     _SIMD32_OFFSET(pDst) = in1;  
00094   
00095     /* added offset to the four samples at a time */  
00096     in3 = __QADD8(in3, offset_packed);  
00097   
00098     /* store result to the destination four samples at a time */  
00099     _SIMD32_OFFSET(pDst + 4) = in2;  
00100   
00101     /* added offset to the four samples at a time */  
00102     in4 = __QADD8(in4, offset_packed);  
00103   
00104     /* store result to the destination four samples at a time */  
00105     _SIMD32_OFFSET(pDst + 8) = in3;  
00106   
00107     /* increment source by 16 to process next samples */  
00108     pSrc += 16u;  
00109       
00110     /* store result to the destination four samples at a time */  
00111     _SIMD32_OFFSET(pDst + 12) = in4;  
00112       
00113     /* increment destination by 16 */  
00114     pDst += 16u;  
00115   
00116     /* Decrement the loop counter */     
00117     blkCnt--;     
00118   }     
00119      
00120   /* If the blockSize is not a multiple of 8, compute any remaining output samples here.      
00121    ** No loop unrolling is used. */     
00122   blkCnt = blockSize % 0x10u;     
00123      
00124   while(blkCnt > 0u)     
00125   {     
00126     /* C = A + offset */     
00127     /* Add offset and then store the result in the destination buffer. */     
00128     in = *pSrc++;  
00129     *pDst++ = (q7_t)(__QADD8(in, offset));  
00130      
00131     /* Decrement the loop counter */     
00132     blkCnt--;     
00133   }     
00134 }     
00135