00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2011 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. December 2011 00005 * $Revision: V2.0.0 00006 * 00007 * Project: Cortex-R DSP Library 00008 * Title: arm_cmplx_mult_cmplx_f32.c 00009 * 00010 * Description: Floating-point complex-by-complex multiplication 00011 * 00012 * Target Processor: Cortex-R4/R5 00013 * 00014 * Version 1.0.0 2011/03/08 00015 * Alpha release. 00016 * 00017 * Version 1.0.1 2011/09/30 00018 * Beta release. 00019 * 00020 * Version 2.0.0 2011/12/15 00021 * Final release. 00022 * 00023 * -------------------------------------------------------------------- */ 00024 #include "arm_math.h" 00025 00067 void arm_cmplx_mult_cmplx_f32( 00068 float32_t * pSrcA, 00069 float32_t * pSrcB, 00070 float32_t * pDst, 00071 uint32_t numSamples) 00072 { 00073 float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */ 00074 float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */ 00075 float32_t acc1, acc2, acc3, acc4; 00076 00077 uint32_t blkCnt; /* loop counters */ 00078 00079 /* loop Unrolling */ 00080 blkCnt = numSamples >> 2u; 00081 00082 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00083 ** a second loop below computes the remaining 1 to 3 samples. */ 00084 while(blkCnt > 0u) 00085 { 00086 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00087 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00088 a1 = *pSrcA; /* A[2 * i] */ 00089 c1 = *pSrcB; /* B[2 * i] */ 00090 00091 b1 = *(pSrcA + 1); /* A[2 * i + 1] */ 00092 acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */ 00093 00094 a2 = *(pSrcA + 2); /* A[2 * i + 2] */ 00095 acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */ 00096 00097 d1 = *(pSrcB + 1); /* B[2 * i + 1] */ 00098 c2 = *(pSrcB + 2); /* B[2 * i + 2] */ 00099 acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */ 00100 00101 d2 = *(pSrcB + 3); /* B[2 * i + 3] */ 00102 acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */ 00103 00104 b2 = *(pSrcA + 3); /* A[2 * i + 3] */ 00105 acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */ 00106 00107 a1 = *(pSrcA + 4); /* A[2 * i + 4] */ 00108 acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */ 00109 00110 c1 = *(pSrcB + 4); /* B[2 * i + 4] */ 00111 acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */ 00112 *pDst = acc1; /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */ 00113 00114 b1 = *(pSrcA + 5); /* A[2 * i + 5] */ 00115 acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */ 00116 00117 *(pDst + 1) = acc2; /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */ 00118 acc1 = (a1 * c1); 00119 00120 d1 = *(pSrcB + 5); 00121 acc2 = (b1 * c1); 00122 00123 *(pDst + 2) = acc3; 00124 *(pDst + 3) = acc4; 00125 00126 a2 = *(pSrcA + 6); 00127 acc1 -= (b1 * d1); 00128 00129 c2 = *(pSrcB + 6); 00130 acc2 += (a1 * d1); 00131 00132 b2 = *(pSrcA + 7); 00133 acc3 = (a2 * c2); 00134 00135 d2 = *(pSrcB + 7); 00136 acc4 = (b2 * c2); 00137 00138 *(pDst + 4) = acc1; 00139 pSrcA += 8u; 00140 00141 acc3 -= (b2 * d2); 00142 acc4 += (a2 * d2); 00143 00144 *(pDst + 5) = acc2; 00145 pSrcB += 8u; 00146 00147 *(pDst + 6) = acc3; 00148 *(pDst + 7) = acc4; 00149 00150 pDst += 8u; 00151 00152 /* Decrement the numSamples loop counter */ 00153 blkCnt--; 00154 } 00155 00156 /* If the numSamples is not a multiple of 4, compute any remaining output samples here. 00157 ** No loop unrolling is used. */ 00158 blkCnt = numSamples % 0x4u; 00159 00160 while(blkCnt > 0u) 00161 { 00162 /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */ 00163 /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */ 00164 a1 = *pSrcA++; 00165 b1 = *pSrcA++; 00166 c1 = *pSrcB++; 00167 d1 = *pSrcB++; 00168 00169 /* store the result in the destination buffer. */ 00170 *pDst++ = (a1 * c1) - (b1 * d1); 00171 *pDst++ = (a1 * d1) + (b1 * c1); 00172 00173 /* Decrement the numSamples loop counter */ 00174 blkCnt--; 00175 } 00176 } 00177