53 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
54 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
61 #include <immintrin.h>
63 static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
65 unsigned int num_points)
67 unsigned int number = 0;
68 const int8_t* complexVectorPtr = (int8_t*)complexVector;
69 int16_t* iBufferPtr = iBuffer;
70 __m256i moveMask = _mm256_set_epi8(0x80,
102 __m256i complexVal, outputVal;
105 unsigned int sixteenthPoints = num_points / 16;
107 for (number = 0; number < sixteenthPoints; number++) {
108 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
109 complexVectorPtr += 32;
111 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
112 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
114 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
116 outputVal = _mm256_cvtepi8_epi16(outputVal0);
117 outputVal = _mm256_slli_epi16(outputVal, 7);
119 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
124 number = sixteenthPoints * 16;
125 for (; number < num_points; number++) {
126 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
132 #ifdef LV_HAVE_SSE4_1
133 #include <smmintrin.h>
135 static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
137 unsigned int num_points)
139 unsigned int number = 0;
140 const int8_t* complexVectorPtr = (int8_t*)complexVector;
141 int16_t* iBufferPtr = iBuffer;
142 __m128i moveMask = _mm_set_epi8(
143 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
144 __m128i complexVal, outputVal;
146 unsigned int eighthPoints = num_points / 8;
148 for (number = 0; number < eighthPoints; number++) {
149 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
150 complexVectorPtr += 16;
152 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
154 outputVal = _mm_cvtepi8_epi16(complexVal);
155 outputVal = _mm_slli_epi16(outputVal, 7);
157 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
161 number = eighthPoints * 8;
162 for (; number < num_points; number++) {
163 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
171 #include <immintrin.h>
175 unsigned int num_points)
177 unsigned int number = 0;
178 const int8_t* complexVectorPtr = (int8_t*)complexVector;
179 int16_t* iBufferPtr = iBuffer;
180 __m128i moveMask = _mm_set_epi8(
181 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
182 __m256i complexVal, outputVal;
183 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
185 unsigned int sixteenthPoints = num_points / 16;
187 for (number = 0; number < sixteenthPoints; number++) {
188 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
189 complexVectorPtr += 32;
191 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
192 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
194 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
195 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
197 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
198 outputVal1 = _mm_slli_epi16(outputVal1, 7);
199 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
200 outputVal0 = _mm_slli_epi16(outputVal0, 7);
202 __m256i dummy = _mm256_setzero_si256();
203 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
204 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
205 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
210 number = sixteenthPoints * 16;
211 for (; number < num_points; number++) {
212 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
219 #ifdef LV_HAVE_GENERIC
223 unsigned int num_points)
225 unsigned int number = 0;
226 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
227 int16_t* iBufferPtr = iBuffer;
228 for (number = 0; number < num_points; number++) {
229 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
238 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
239 #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
241 #include <inttypes.h>
246 #include <immintrin.h>
248 static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
250 unsigned int num_points)
252 unsigned int number = 0;
253 const int8_t* complexVectorPtr = (int8_t*)complexVector;
254 int16_t* iBufferPtr = iBuffer;
255 __m256i moveMask = _mm256_set_epi8(0x80,
287 __m256i complexVal, outputVal;
290 unsigned int sixteenthPoints = num_points / 16;
292 for (number = 0; number < sixteenthPoints; number++) {
293 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
294 complexVectorPtr += 32;
296 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
297 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
299 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
301 outputVal = _mm256_cvtepi8_epi16(outputVal0);
302 outputVal = _mm256_slli_epi16(outputVal, 7);
304 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
309 number = sixteenthPoints * 16;
310 for (; number < num_points; number++) {
311 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
static void volk_8ic_deinterleave_real_16i_a_avx(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:173
static void volk_8ic_deinterleave_real_16i_generic(int16_t *iBuffer, const lv_8sc_t *complexVector, unsigned int num_points)
Definition: volk_8ic_deinterleave_real_16i.h:221
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:66