00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_cmplx_dot_prod_q31.c 00009 * 00010 * Description: Q31 complex dot product 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * -------------------------------------------------------------------- */ 00024 #include "arm_math.h" 00025 00054 void arm_cmplx_dot_prod_q31( 00055 q31_t * pSrcA, 00056 q31_t * pSrcB, 00057 uint32_t numSamples, 00058 q63_t * realResult, 00059 q63_t * imagResult) 00060 { 00061 q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */ 00062 uint32_t blkCnt; /* loop counter */ 00063 q31_t realA1, realB1; 00064 q31_t imagA1, imagB1; 00065 q63_t mul1, mul2; 00066 00067 /*loop Unrolling */ 00068 blkCnt = numSamples >> 2u; 00069 00070 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00071 ** a second loop below computes the remaining 1 to 3 samples. */ 00072 while(blkCnt > 0u) 00073 { 00074 /* read real input from sourceA */ 00075 realA1 = pSrcA[0]; 00076 /* read real input from sourecB */ 00077 realB1 = pSrcB[0]; 00078 00079 /* multiply real(sourceA) * real(sourceB) inputs */ 00080 mul1 = ((q63_t)realA1 * realB1); 00081 00082 /* read imaginary input from sourceA */ 00083 imagA1 = pSrcA[1]; 00084 /* read imaginary input from sourceB */ 00085 imagB1 = pSrcB[1]; 00086 00087 /* multiply imaginary(sourceA) * imaginary(sourceB) inputs */ 00088 mul2 = ((q63_t)imagA1 * imagB1); 00089 00090 /* read real input from sourceA */ 00091 realA1 = pSrcA[2]; 00092 /* read real input from sourecB */ 00093 realB1 = pSrcB[2]; 00094 00095 /* shift the result to get 16.48 format */ 00096 mul1 = mul1 >> 14u; 00097 mul2 = mul2 >> 14u; 00098 00099 /* accumulate real result */ 00100 real_sum += mul1; 00101 /* accumulate imaginary result */ 00102 imag_sum += mul2; 00103 00104 /* multiply real(sourceA) * real(sourceB) inputs */ 00105 mul1 = ((q63_t)realA1 * realB1); 00106 00107 /* read imaginary input from sourceA */ 00108 imagA1 = pSrcA[3]; 00109 /* read imaginary input from sourceB */ 00110 imagB1 = pSrcB[3]; 00111 00112 /* multiply imaginary(sourceA) * imaginary(sourceB) inputs */ 00113 mul2 = ((q63_t)imagA1 * imagB1); 00114 00115 /* read real input from sourceA */ 00116 realA1 = pSrcA[4]; 00117 /* read real input from sourecB */ 00118 realB1 = pSrcB[4]; 00119 00120 /* shift the result to get 16.48 format */ 00121 mul1 = mul1 >> 14u; 00122 mul2 = mul2 >> 14u; 00123 00124 /* accumulate real result */ 00125 real_sum += mul1; 00126 /* accumulate imaginary result */ 00127 imag_sum += mul2; 00128 00129 /* multiply real(sourceA) * real(sourceB) inputs */ 00130 mul1 = ((q63_t)realA1 * realB1); 00131 00132 /* read imaginary input from sourceA */ 00133 imagA1 = pSrcA[5]; 00134 /* read imaginary input from sourceB */ 00135 imagB1 = pSrcB[5]; 00136 00137 /* multiply imaginary(sourceA) * imaginary(sourceB) inputs */ 00138 mul2 = ((q63_t)imagA1 * imagB1); 00139 00140 /* shift the result to get 16.48 format */ 00141 mul1 = mul1 >> 14u; 00142 mul2 = mul2 >> 14u; 00143 00144 /* read real input from sourceA */ 00145 realA1 = pSrcA[6]; 00146 /* read real input from sourecB */ 00147 realB1 = pSrcB[6]; 00148 00149 /* accumulate real result */ 00150 real_sum += mul1; 00151 /* accumulate imaginary result */ 00152 imag_sum += mul2; 00153 00154 /* multiply real(sourceA) * real(sourceB) inputs */ 00155 mul1 = ((q63_t)realA1 * realB1); 00156 00157 /* read imaginary input from sourceA */ 00158 imagA1 = pSrcA[7]; 00159 /* read imaginary input from sourceB */ 00160 imagB1 = pSrcB[7]; 00161 00162 /* multiply imaginary(sourceA) * imaginary(sourceB) inputs */ 00163 mul2 = ((q63_t)imagA1 * imagB1); 00164 00165 /* increment pointers by 8 to process next samples */ 00166 pSrcA += 8u; 00167 pSrcB += 8u; 00168 00169 /* shift the result to get 16.48 format */ 00170 mul1 = mul1 >> 14u; 00171 mul2 = mul2 >> 14u; 00172 00173 /* accumulate real result */ 00174 real_sum += mul1; 00175 /* accumulate imaginary result */ 00176 imag_sum += mul2; 00177 00178 /* Decrement the loop counter */ 00179 blkCnt--; 00180 } 00181 00182 /* If the numSamples is not a multiple of 4, compute any remaining output samples here. 00183 ** No loop unrolling is used. */ 00184 blkCnt = numSamples % 0x4u; 00185 00186 while(blkCnt > 0u) 00187 { 00188 /* CReal = A[0]* B[0] + A[2]* B[2] + A[4]* B[4] + .....+ A[numSamples-2]* B[numSamples-2] */ 00189 real_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00190 /* CImag = A[1]* B[1] + A[3]* B[3] + A[5]* B[5] + .....+ A[numSamples-1]* B[numSamples-1] */ 00191 imag_sum += (q63_t) * pSrcA++ * (*pSrcB++) >> 14; 00192 00193 /* Decrement the loop counter */ 00194 blkCnt--; 00195 } 00196 00197 /* Store the real and imaginary results in 16.48 format */ 00198 *realResult = real_sum; 00199 *imagResult = imag_sum; 00200 } 00201