41#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
42#define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
49static inline void volk_16ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
52 unsigned int num_points)
54 unsigned int number = 0;
55 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 int16_t* iBufferPtr = iBuffer;
57 int16_t* qBufferPtr = qBuffer;
59 __m256i MoveMask = _mm256_set_epi8(15,
92 __m256i iMove2, iMove1;
93 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
95 unsigned int sixteenthPoints = num_points / 16;
97 for (number = 0; number < sixteenthPoints; number++) {
98 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
99 complexVectorPtr += 32;
100 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
101 complexVectorPtr += 32;
103 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
104 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
106 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
107 _mm256_permute4x64_epi64(iMove2, 0x80),
109 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
110 _mm256_permute4x64_epi64(iMove2, 0xd0),
113 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
114 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
120 number = sixteenthPoints * 16;
121 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
122 for (; number < num_points; number++) {
123 *iBufferPtr++ = *int16ComplexVectorPtr++;
124 *qBufferPtr++ = *int16ComplexVectorPtr++;
130#include <tmmintrin.h>
135 unsigned int num_points)
137 unsigned int number = 0;
138 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 int16_t* iBufferPtr = iBuffer;
140 int16_t* qBufferPtr = qBuffer;
143 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
145 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
148 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
150 15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
152 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
154 unsigned int eighthPoints = num_points / 8;
156 for (number = 0; number < eighthPoints; number++) {
158 complexVectorPtr += 16;
160 complexVectorPtr += 16;
174 number = eighthPoints * 8;
175 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
176 for (; number < num_points; number++) {
177 *iBufferPtr++ = *int16ComplexVectorPtr++;
178 *qBufferPtr++ = *int16ComplexVectorPtr++;
184#include <emmintrin.h>
189 unsigned int num_points)
191 unsigned int number = 0;
192 const int16_t* complexVectorPtr = (int16_t*)complexVector;
193 int16_t* iBufferPtr = iBuffer;
194 int16_t* qBufferPtr = qBuffer;
195 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1,
196 qComplexVal2, iOutputVal, qOutputVal;
200 unsigned int eighthPoints = num_points / 8;
202 for (number = 0; number < eighthPoints; number++) {
204 complexVectorPtr += 8;
206 complexVectorPtr += 8;
246 number = eighthPoints * 8;
247 for (; number < num_points; number++) {
248 *iBufferPtr++ = *complexVectorPtr++;
249 *qBufferPtr++ = *complexVectorPtr++;
254#ifdef LV_HAVE_GENERIC
259 unsigned int num_points)
261 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
262 int16_t* iBufferPtr = iBuffer;
263 int16_t* qBufferPtr = qBuffer;
265 for (number = 0; number < num_points; number++) {
266 *iBufferPtr++ = *complexVectorPtr++;
267 *qBufferPtr++ = *complexVectorPtr++;
274extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer,
278static inline void volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer,
281 unsigned int num_points)
283 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
290#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
291#define INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
296#include <immintrin.h>
298static inline void volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
301 unsigned int num_points)
303 unsigned int number = 0;
304 const int8_t* complexVectorPtr = (int8_t*)complexVector;
305 int16_t* iBufferPtr = iBuffer;
306 int16_t* qBufferPtr = qBuffer;
308 __m256i MoveMask = _mm256_set_epi8(15,
341 __m256i iMove2, iMove1;
342 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
344 unsigned int sixteenthPoints = num_points / 16;
346 for (number = 0; number < sixteenthPoints; number++) {
347 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
348 complexVectorPtr += 32;
349 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
350 complexVectorPtr += 32;
352 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
353 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
355 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
356 _mm256_permute4x64_epi64(iMove2, 0x80),
358 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
359 _mm256_permute4x64_epi64(iMove2, 0xd0),
362 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
363 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
369 number = sixteenthPoints * 16;
370 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
371 for (; number < num_points; number++) {
372 *iBufferPtr++ = *int16ComplexVectorPtr++;
373 *qBufferPtr++ = *int16ComplexVectorPtr++;
FORCE_INLINE void _mm_store_si128(__m128i *p, __m128i a)
Definition sse2neon.h:5452
FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b14, signed char b13, signed char b12, signed char b11, signed char b10, signed char b9, signed char b8, signed char b7, signed char b6, signed char b5, signed char b4, signed char b3, signed char b2, signed char b1, signed char b0)
Definition sse2neon.h:4863
FORCE_INLINE __m128i _mm_set_epi32(int, int, int, int)
Definition sse2neon.h:4840
FORCE_INLINE __m128i _mm_and_si128(__m128i, __m128i)
Definition sse2neon.h:3034
FORCE_INLINE __m128i _mm_shuffle_epi8(__m128i a, __m128i b)
Definition sse2neon.h:6445
FORCE_INLINE __m128i _mm_load_si128(const __m128i *p)
Definition sse2neon.h:4260
#define _mm_shufflelo_epi16(a, imm)
Definition sse2neon.h:5157
FORCE_INLINE __m128i _mm_or_si128(__m128i, __m128i)
Definition sse2neon.h:4761
#define _mm_shufflehi_epi16(a, imm)
Definition sse2neon.h:5142
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0)
Definition sse2neon.h:306
int64x2_t __m128i
Definition sse2neon.h:375
#define _mm_shuffle_epi32(a, imm)
Definition sse2neon.h:5063
static void volk_16ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_16i_x2.h:256
static void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_16i_x2.h:186
static void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition volk_16ic_deinterleave_16i_x2.h:132
short complex lv_16sc_t
Definition volk_complex.h:71