Vector Optimized Library of Kernels  2.4
Architecture-tuned implementations of math kernels
volk_64f_convert_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
64 #ifndef INCLUDED_volk_64f_convert_32f_u_H
65 #define INCLUDED_volk_64f_convert_32f_u_H
66 
67 #include <inttypes.h>
68 #include <stdio.h>
69 
70 #ifdef LV_HAVE_AVX512F
71 #include <immintrin.h>
72 
73 static inline void volk_64f_convert_32f_u_avx512f(float* outputVector,
74  const double* inputVector,
75  unsigned int num_points)
76 {
77  unsigned int number = 0;
78 
79  const unsigned int oneSixteenthPoints = num_points / 16;
80 
81  const double* inputVectorPtr = (const double*)inputVector;
82  float* outputVectorPtr = outputVector;
83  __m256 ret1, ret2;
84  __m512d inputVal1, inputVal2;
85 
86  for (; number < oneSixteenthPoints; number++) {
87  inputVal1 = _mm512_loadu_pd(inputVectorPtr);
88  inputVectorPtr += 8;
89  inputVal2 = _mm512_loadu_pd(inputVectorPtr);
90  inputVectorPtr += 8;
91 
92  ret1 = _mm512_cvtpd_ps(inputVal1);
93  ret2 = _mm512_cvtpd_ps(inputVal2);
94 
95  _mm256_storeu_ps(outputVectorPtr, ret1);
96  outputVectorPtr += 8;
97 
98  _mm256_storeu_ps(outputVectorPtr, ret2);
99  outputVectorPtr += 8;
100  }
101 
102  number = oneSixteenthPoints * 16;
103  for (; number < num_points; number++) {
104  outputVector[number] = (float)(inputVector[number]);
105  }
106 }
107 #endif /* LV_HAVE_AVX512F */
108 
109 
110 #ifdef LV_HAVE_AVX
111 #include <immintrin.h>
112 
113 static inline void volk_64f_convert_32f_u_avx(float* outputVector,
114  const double* inputVector,
115  unsigned int num_points)
116 {
117  unsigned int number = 0;
118 
119  const unsigned int oneEightPoints = num_points / 8;
120 
121  const double* inputVectorPtr = (const double*)inputVector;
122  float* outputVectorPtr = outputVector;
123  __m128 ret1, ret2;
124  __m256d inputVal1, inputVal2;
125 
126  for (; number < oneEightPoints; number++) {
127  inputVal1 = _mm256_loadu_pd(inputVectorPtr);
128  inputVectorPtr += 4;
129  inputVal2 = _mm256_loadu_pd(inputVectorPtr);
130  inputVectorPtr += 4;
131 
132  ret1 = _mm256_cvtpd_ps(inputVal1);
133  ret2 = _mm256_cvtpd_ps(inputVal2);
134 
135  _mm_storeu_ps(outputVectorPtr, ret1);
136  outputVectorPtr += 4;
137 
138  _mm_storeu_ps(outputVectorPtr, ret2);
139  outputVectorPtr += 4;
140  }
141 
142  number = oneEightPoints * 8;
143  for (; number < num_points; number++) {
144  outputVector[number] = (float)(inputVector[number]);
145  }
146 }
147 #endif /* LV_HAVE_AVX */
148 
149 
150 #ifdef LV_HAVE_SSE2
151 #include <emmintrin.h>
152 
153 static inline void volk_64f_convert_32f_u_sse2(float* outputVector,
154  const double* inputVector,
155  unsigned int num_points)
156 {
157  unsigned int number = 0;
158 
159  const unsigned int quarterPoints = num_points / 4;
160 
161  const double* inputVectorPtr = (const double*)inputVector;
162  float* outputVectorPtr = outputVector;
163  __m128 ret, ret2;
164  __m128d inputVal1, inputVal2;
165 
166  for (; number < quarterPoints; number++) {
167  inputVal1 = _mm_loadu_pd(inputVectorPtr);
168  inputVectorPtr += 2;
169  inputVal2 = _mm_loadu_pd(inputVectorPtr);
170  inputVectorPtr += 2;
171 
172  ret = _mm_cvtpd_ps(inputVal1);
173  ret2 = _mm_cvtpd_ps(inputVal2);
174 
175  ret = _mm_movelh_ps(ret, ret2);
176 
177  _mm_storeu_ps(outputVectorPtr, ret);
178  outputVectorPtr += 4;
179  }
180 
181  number = quarterPoints * 4;
182  for (; number < num_points; number++) {
183  outputVector[number] = (float)(inputVector[number]);
184  }
185 }
186 #endif /* LV_HAVE_SSE2 */
187 
188 
189 #ifdef LV_HAVE_GENERIC
190 
191 static inline void volk_64f_convert_32f_generic(float* outputVector,
192  const double* inputVector,
193  unsigned int num_points)
194 {
195  float* outputVectorPtr = outputVector;
196  const double* inputVectorPtr = inputVector;
197  unsigned int number = 0;
198 
199  for (number = 0; number < num_points; number++) {
200  *outputVectorPtr++ = ((float)(*inputVectorPtr++));
201  }
202 }
203 #endif /* LV_HAVE_GENERIC */
204 
205 
206 #endif /* INCLUDED_volk_64f_convert_32f_u_H */
207 #ifndef INCLUDED_volk_64f_convert_32f_a_H
208 #define INCLUDED_volk_64f_convert_32f_a_H
209 
210 #include <inttypes.h>
211 #include <stdio.h>
212 
213 #ifdef LV_HAVE_AVX512F
214 #include <immintrin.h>
215 
216 static inline void volk_64f_convert_32f_a_avx512f(float* outputVector,
217  const double* inputVector,
218  unsigned int num_points)
219 {
220  unsigned int number = 0;
221 
222  const unsigned int oneSixteenthPoints = num_points / 16;
223 
224  const double* inputVectorPtr = (const double*)inputVector;
225  float* outputVectorPtr = outputVector;
226  __m256 ret1, ret2;
227  __m512d inputVal1, inputVal2;
228 
229  for (; number < oneSixteenthPoints; number++) {
230  inputVal1 = _mm512_load_pd(inputVectorPtr);
231  inputVectorPtr += 8;
232  inputVal2 = _mm512_load_pd(inputVectorPtr);
233  inputVectorPtr += 8;
234 
235  ret1 = _mm512_cvtpd_ps(inputVal1);
236  ret2 = _mm512_cvtpd_ps(inputVal2);
237 
238  _mm256_store_ps(outputVectorPtr, ret1);
239  outputVectorPtr += 8;
240 
241  _mm256_store_ps(outputVectorPtr, ret2);
242  outputVectorPtr += 8;
243  }
244 
245  number = oneSixteenthPoints * 16;
246  for (; number < num_points; number++) {
247  outputVector[number] = (float)(inputVector[number]);
248  }
249 }
250 #endif /* LV_HAVE_AVX512F */
251 
252 
253 #ifdef LV_HAVE_AVX
254 #include <immintrin.h>
255 
256 static inline void volk_64f_convert_32f_a_avx(float* outputVector,
257  const double* inputVector,
258  unsigned int num_points)
259 {
260  unsigned int number = 0;
261 
262  const unsigned int oneEightPoints = num_points / 8;
263 
264  const double* inputVectorPtr = (const double*)inputVector;
265  float* outputVectorPtr = outputVector;
266  __m128 ret1, ret2;
267  __m256d inputVal1, inputVal2;
268 
269  for (; number < oneEightPoints; number++) {
270  inputVal1 = _mm256_load_pd(inputVectorPtr);
271  inputVectorPtr += 4;
272  inputVal2 = _mm256_load_pd(inputVectorPtr);
273  inputVectorPtr += 4;
274 
275  ret1 = _mm256_cvtpd_ps(inputVal1);
276  ret2 = _mm256_cvtpd_ps(inputVal2);
277 
278  _mm_store_ps(outputVectorPtr, ret1);
279  outputVectorPtr += 4;
280 
281  _mm_store_ps(outputVectorPtr, ret2);
282  outputVectorPtr += 4;
283  }
284 
285  number = oneEightPoints * 8;
286  for (; number < num_points; number++) {
287  outputVector[number] = (float)(inputVector[number]);
288  }
289 }
290 #endif /* LV_HAVE_AVX */
291 
292 
293 #ifdef LV_HAVE_SSE2
294 #include <emmintrin.h>
295 
296 static inline void volk_64f_convert_32f_a_sse2(float* outputVector,
297  const double* inputVector,
298  unsigned int num_points)
299 {
300  unsigned int number = 0;
301 
302  const unsigned int quarterPoints = num_points / 4;
303 
304  const double* inputVectorPtr = (const double*)inputVector;
305  float* outputVectorPtr = outputVector;
306  __m128 ret, ret2;
307  __m128d inputVal1, inputVal2;
308 
309  for (; number < quarterPoints; number++) {
310  inputVal1 = _mm_load_pd(inputVectorPtr);
311  inputVectorPtr += 2;
312  inputVal2 = _mm_load_pd(inputVectorPtr);
313  inputVectorPtr += 2;
314 
315  ret = _mm_cvtpd_ps(inputVal1);
316  ret2 = _mm_cvtpd_ps(inputVal2);
317 
318  ret = _mm_movelh_ps(ret, ret2);
319 
320  _mm_store_ps(outputVectorPtr, ret);
321  outputVectorPtr += 4;
322  }
323 
324  number = quarterPoints * 4;
325  for (; number < num_points; number++) {
326  outputVector[number] = (float)(inputVector[number]);
327  }
328 }
329 #endif /* LV_HAVE_SSE2 */
330 
331 
332 #ifdef LV_HAVE_GENERIC
333 
334 static inline void volk_64f_convert_32f_a_generic(float* outputVector,
335  const double* inputVector,
336  unsigned int num_points)
337 {
338  float* outputVectorPtr = outputVector;
339  const double* inputVectorPtr = inputVector;
340  unsigned int number = 0;
341 
342  for (number = 0; number < num_points; number++) {
343  *outputVectorPtr++ = ((float)(*inputVectorPtr++));
344  }
345 }
346 #endif /* LV_HAVE_GENERIC */
347 
348 
349 #endif /* INCLUDED_volk_64f_convert_32f_a_H */
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:113
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:191
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:256
static void volk_64f_convert_32f_a_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:334
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:153
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:296