Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_32f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
74 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 
79 #ifdef LV_HAVE_AVX
80 #include <immintrin.h>
81 static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer,
82  float* qBuffer,
83  const lv_32fc_t* complexVector,
84  unsigned int num_points)
85 {
86  const float* complexVectorPtr = (float*)complexVector;
87  float* iBufferPtr = iBuffer;
88  float* qBufferPtr = qBuffer;
89 
90  unsigned int number = 0;
91  // Mask for real and imaginary parts
92  const unsigned int eighthPoints = num_points / 8;
93  __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
94  for (; number < eighthPoints; number++) {
95  cplxValue1 = _mm256_load_ps(complexVectorPtr);
96  complexVectorPtr += 8;
97 
98  cplxValue2 = _mm256_load_ps(complexVectorPtr);
99  complexVectorPtr += 8;
100 
101  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
102  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
103 
104  // Arrange in i1i2i3i4 format
105  iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
106  // Arrange in q1q2q3q4 format
107  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
108 
109  _mm256_store_ps(iBufferPtr, iValue);
110  _mm256_store_ps(qBufferPtr, qValue);
111 
112  iBufferPtr += 8;
113  qBufferPtr += 8;
114  }
115 
116  number = eighthPoints * 8;
117  for (; number < num_points; number++) {
118  *iBufferPtr++ = *complexVectorPtr++;
119  *qBufferPtr++ = *complexVectorPtr++;
120  }
121 }
122 #endif /* LV_HAVE_AVX */
123 
124 #ifdef LV_HAVE_SSE
125 #include <xmmintrin.h>
126 
127 static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer,
128  float* qBuffer,
129  const lv_32fc_t* complexVector,
130  unsigned int num_points)
131 {
132  const float* complexVectorPtr = (float*)complexVector;
133  float* iBufferPtr = iBuffer;
134  float* qBufferPtr = qBuffer;
135 
136  unsigned int number = 0;
137  const unsigned int quarterPoints = num_points / 4;
138  __m128 cplxValue1, cplxValue2, iValue, qValue;
139  for (; number < quarterPoints; number++) {
140  cplxValue1 = _mm_load_ps(complexVectorPtr);
141  complexVectorPtr += 4;
142 
143  cplxValue2 = _mm_load_ps(complexVectorPtr);
144  complexVectorPtr += 4;
145 
146  // Arrange in i1i2i3i4 format
147  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
148  // Arrange in q1q2q3q4 format
149  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
150 
151  _mm_store_ps(iBufferPtr, iValue);
152  _mm_store_ps(qBufferPtr, qValue);
153 
154  iBufferPtr += 4;
155  qBufferPtr += 4;
156  }
157 
158  number = quarterPoints * 4;
159  for (; number < num_points; number++) {
160  *iBufferPtr++ = *complexVectorPtr++;
161  *qBufferPtr++ = *complexVectorPtr++;
162  }
163 }
164 #endif /* LV_HAVE_SSE */
165 
166 
167 #ifdef LV_HAVE_NEON
168 #include <arm_neon.h>
169 
170 static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer,
171  float* qBuffer,
172  const lv_32fc_t* complexVector,
173  unsigned int num_points)
174 {
175  unsigned int number = 0;
176  unsigned int quarter_points = num_points / 4;
177  const float* complexVectorPtr = (float*)complexVector;
178  float* iBufferPtr = iBuffer;
179  float* qBufferPtr = qBuffer;
180  float32x4x2_t complexInput;
181 
182  for (number = 0; number < quarter_points; number++) {
183  complexInput = vld2q_f32(complexVectorPtr);
184  vst1q_f32(iBufferPtr, complexInput.val[0]);
185  vst1q_f32(qBufferPtr, complexInput.val[1]);
186  complexVectorPtr += 8;
187  iBufferPtr += 4;
188  qBufferPtr += 4;
189  }
190 
191  for (number = quarter_points * 4; number < num_points; number++) {
192  *iBufferPtr++ = *complexVectorPtr++;
193  *qBufferPtr++ = *complexVectorPtr++;
194  }
195 }
196 #endif /* LV_HAVE_NEON */
197 
198 
199 #ifdef LV_HAVE_GENERIC
200 
201 static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer,
202  float* qBuffer,
203  const lv_32fc_t* complexVector,
204  unsigned int num_points)
205 {
206  const float* complexVectorPtr = (float*)complexVector;
207  float* iBufferPtr = iBuffer;
208  float* qBufferPtr = qBuffer;
209  unsigned int number;
210  for (number = 0; number < num_points; number++) {
211  *iBufferPtr++ = *complexVectorPtr++;
212  *qBufferPtr++ = *complexVectorPtr++;
213  }
214 }
215 #endif /* LV_HAVE_GENERIC */
216 
217 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
218 
219 
220 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
221 #define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
222 
223 #include <inttypes.h>
224 #include <stdio.h>
225 
226 #ifdef LV_HAVE_AVX
227 #include <immintrin.h>
228 static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
229  float* qBuffer,
230  const lv_32fc_t* complexVector,
231  unsigned int num_points)
232 {
233  const float* complexVectorPtr = (float*)complexVector;
234  float* iBufferPtr = iBuffer;
235  float* qBufferPtr = qBuffer;
236 
237  unsigned int number = 0;
238  // Mask for real and imaginary parts
239  const unsigned int eighthPoints = num_points / 8;
240  __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
241  for (; number < eighthPoints; number++) {
242  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
243  complexVectorPtr += 8;
244 
245  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
246  complexVectorPtr += 8;
247 
248  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
249  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
250 
251  // Arrange in i1i2i3i4 format
252  iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
253  // Arrange in q1q2q3q4 format
254  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
255 
256  _mm256_storeu_ps(iBufferPtr, iValue);
257  _mm256_storeu_ps(qBufferPtr, qValue);
258 
259  iBufferPtr += 8;
260  qBufferPtr += 8;
261  }
262 
263  number = eighthPoints * 8;
264  for (; number < num_points; number++) {
265  *iBufferPtr++ = *complexVectorPtr++;
266  *qBufferPtr++ = *complexVectorPtr++;
267  }
268 }
269 #endif /* LV_HAVE_AVX */
270 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
static void volk_32fc_deinterleave_32f_x2_generic(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:201
static void volk_32fc_deinterleave_32f_x2_a_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:81
static void volk_32fc_deinterleave_32f_x2_a_sse(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:127
static void volk_32fc_deinterleave_32f_x2_neon(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:170
static void volk_32fc_deinterleave_32f_x2_u_avx(float *iBuffer, float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_32f_x2.h:228
float complex lv_32fc_t
Definition: volk_complex.h:70