Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_32fc_s32f_atan2_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
74 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
75 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
76 
77 #include <inttypes.h>
78 #include <math.h>
79 #include <stdio.h>
80 
81 #ifdef LV_HAVE_SSE4_1
82 #include <smmintrin.h>
83 
84 #ifdef LV_HAVE_LIB_SIMDMATH
85 #include <simdmath.h>
86 #endif /* LV_HAVE_LIB_SIMDMATH */
87 
88 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,
89  const lv_32fc_t* complexVector,
90  const float normalizeFactor,
91  unsigned int num_points)
92 {
93  const float* complexVectorPtr = (float*)complexVector;
94  float* outPtr = outputVector;
95 
96  unsigned int number = 0;
97  const float invNormalizeFactor = 1.0 / normalizeFactor;
98 
99 #ifdef LV_HAVE_LIB_SIMDMATH
100  const unsigned int quarterPoints = num_points / 4;
101  __m128 testVector = _mm_set_ps1(2 * M_PI);
102  __m128 correctVector = _mm_set_ps1(M_PI);
103  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
104  __m128 phase;
105  __m128 complex1, complex2, iValue, qValue;
106  __m128 keepMask;
107 
108  for (; number < quarterPoints; number++) {
109  // Load IQ data:
110  complex1 = _mm_load_ps(complexVectorPtr);
111  complexVectorPtr += 4;
112  complex2 = _mm_load_ps(complexVectorPtr);
113  complexVectorPtr += 4;
114  // Deinterleave IQ data:
115  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
116  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
117  // Arctan to get phase:
118  phase = atan2f4(qValue, iValue);
119  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
120  // Compare to 2pi:
121  keepMask = _mm_cmpneq_ps(phase, testVector);
122  phase = _mm_blendv_ps(correctVector, phase, keepMask);
123  // done with above correction.
124  phase = _mm_mul_ps(phase, vNormalizeFactor);
125  _mm_store_ps((float*)outPtr, phase);
126  outPtr += 4;
127  }
128  number = quarterPoints * 4;
129 #endif /* LV_HAVE_LIB_SIMDMATH */
130 
131  for (; number < num_points; number++) {
132  const float real = *complexVectorPtr++;
133  const float imag = *complexVectorPtr++;
134  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
135  }
136 }
137 #endif /* LV_HAVE_SSE4_1 */
138 
139 
140 #ifdef LV_HAVE_SSE
141 #include <xmmintrin.h>
142 
143 #ifdef LV_HAVE_LIB_SIMDMATH
144 #include <simdmath.h>
145 #endif /* LV_HAVE_LIB_SIMDMATH */
146 
147 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,
148  const lv_32fc_t* complexVector,
149  const float normalizeFactor,
150  unsigned int num_points)
151 {
152  const float* complexVectorPtr = (float*)complexVector;
153  float* outPtr = outputVector;
154 
155  unsigned int number = 0;
156  const float invNormalizeFactor = 1.0 / normalizeFactor;
157 
158 #ifdef LV_HAVE_LIB_SIMDMATH
159  const unsigned int quarterPoints = num_points / 4;
160  __m128 testVector = _mm_set_ps1(2 * M_PI);
161  __m128 correctVector = _mm_set_ps1(M_PI);
162  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
163  __m128 phase;
164  __m128 complex1, complex2, iValue, qValue;
165  __m128 mask;
166  __m128 keepMask;
167 
168  for (; number < quarterPoints; number++) {
169  // Load IQ data:
170  complex1 = _mm_load_ps(complexVectorPtr);
171  complexVectorPtr += 4;
172  complex2 = _mm_load_ps(complexVectorPtr);
173  complexVectorPtr += 4;
174  // Deinterleave IQ data:
175  iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
176  qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
177  // Arctan to get phase:
178  phase = atan2f4(qValue, iValue);
179  // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
180  // Compare to 2pi:
181  keepMask = _mm_cmpneq_ps(phase, testVector);
182  phase = _mm_and_ps(phase, keepMask);
183  mask = _mm_andnot_ps(keepMask, correctVector);
184  phase = _mm_or_ps(phase, mask);
185  // done with above correction.
186  phase = _mm_mul_ps(phase, vNormalizeFactor);
187  _mm_store_ps((float*)outPtr, phase);
188  outPtr += 4;
189  }
190  number = quarterPoints * 4;
191 #endif /* LV_HAVE_LIB_SIMDMATH */
192 
193  for (; number < num_points; number++) {
194  const float real = *complexVectorPtr++;
195  const float imag = *complexVectorPtr++;
196  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
197  }
198 }
199 #endif /* LV_HAVE_SSE */
200 
201 #ifdef LV_HAVE_GENERIC
202 
203 static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector,
204  const lv_32fc_t* inputVector,
205  const float normalizeFactor,
206  unsigned int num_points)
207 {
208  float* outPtr = outputVector;
209  const float* inPtr = (float*)inputVector;
210  const float invNormalizeFactor = 1.0 / normalizeFactor;
211  unsigned int number;
212  for (number = 0; number < num_points; number++) {
213  const float real = *inPtr++;
214  const float imag = *inPtr++;
215  *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
216  }
217 }
218 #endif /* LV_HAVE_GENERIC */
219 
220 
221 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
static void volk_32fc_s32f_atan2_32f_generic(float *outputVector, const lv_32fc_t *inputVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:203
static void volk_32fc_s32f_atan2_32f_a_sse(float *outputVector, const lv_32fc_t *complexVector, const float normalizeFactor, unsigned int num_points)
Definition: volk_32fc_s32f_atan2_32f.h:147
float complex lv_32fc_t
Definition: volk_complex.h:70