48 #ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
49 #define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
58 #include <immintrin.h>
62 const float* realDataPoints,
63 const float spectralExclusionValue,
64 const unsigned int num_points)
66 unsigned int number = 0;
67 const unsigned int eighthPoints = num_points / 8;
69 const float* dataPointsPtr = realDataPoints;
73 __m256 avgPointsVal = _mm256_setzero_ps();
75 for (; number < eighthPoints; number++) {
77 dataPointsVal = _mm256_load_ps(dataPointsPtr);
81 avgPointsVal = _mm256_add_ps(avgPointsVal, dataPointsVal);
84 _mm256_store_ps(avgPointsVector, avgPointsVal);
87 sumMean += avgPointsVector[0];
88 sumMean += avgPointsVector[1];
89 sumMean += avgPointsVector[2];
90 sumMean += avgPointsVector[3];
91 sumMean += avgPointsVector[4];
92 sumMean += avgPointsVector[5];
93 sumMean += avgPointsVector[6];
94 sumMean += avgPointsVector[7];
96 number = eighthPoints * 8;
97 for (; number < num_points; number++) {
98 sumMean += realDataPoints[number];
104 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
106 dataPointsPtr = realDataPoints;
107 __m256 vMeanAmplitudeVector = _mm256_set1_ps(meanAmplitude);
108 __m256 vOnesVector = _mm256_set1_ps(1.0);
109 __m256 vValidBinCount = _mm256_setzero_ps();
110 avgPointsVal = _mm256_setzero_ps();
114 for (; number < eighthPoints; number++) {
116 dataPointsVal = _mm256_load_ps(dataPointsPtr);
121 compareMask = _mm256_cmp_ps(dataPointsVal, vMeanAmplitudeVector, _CMP_LE_OQ);
126 _mm256_add_ps(avgPointsVal, _mm256_and_ps(compareMask, dataPointsVal));
130 _mm256_add_ps(vValidBinCount, _mm256_and_ps(compareMask, vOnesVector));
134 _mm256_store_ps(avgPointsVector, avgPointsVal);
137 sumMean += avgPointsVector[0];
138 sumMean += avgPointsVector[1];
139 sumMean += avgPointsVector[2];
140 sumMean += avgPointsVector[3];
141 sumMean += avgPointsVector[4];
142 sumMean += avgPointsVector[5];
143 sumMean += avgPointsVector[6];
144 sumMean += avgPointsVector[7];
148 _mm256_store_ps(validBinCountVector, vValidBinCount);
150 float validBinCount = 0;
151 validBinCount += validBinCountVector[0];
152 validBinCount += validBinCountVector[1];
153 validBinCount += validBinCountVector[2];
154 validBinCount += validBinCountVector[3];
155 validBinCount += validBinCountVector[4];
156 validBinCount += validBinCountVector[5];
157 validBinCount += validBinCountVector[6];
158 validBinCount += validBinCountVector[7];
160 number = eighthPoints * 8;
161 for (; number < num_points; number++) {
162 if (realDataPoints[number] <= meanAmplitude) {
163 sumMean += realDataPoints[number];
164 validBinCount += 1.0;
168 float localNoiseFloorAmplitude = 0;
169 if (validBinCount > 0.0) {
170 localNoiseFloorAmplitude = sumMean / validBinCount;
172 localNoiseFloorAmplitude =
176 *noiseFloorAmplitude = localNoiseFloorAmplitude;
181 #include <xmmintrin.h>
185 const float* realDataPoints,
186 const float spectralExclusionValue,
187 const unsigned int num_points)
189 unsigned int number = 0;
190 const unsigned int quarterPoints = num_points / 4;
192 const float* dataPointsPtr = realDataPoints;
195 __m128 dataPointsVal;
196 __m128 avgPointsVal = _mm_setzero_ps();
198 for (; number < quarterPoints; number++) {
200 dataPointsVal = _mm_load_ps(dataPointsPtr);
204 avgPointsVal = _mm_add_ps(avgPointsVal, dataPointsVal);
207 _mm_store_ps(avgPointsVector, avgPointsVal);
210 sumMean += avgPointsVector[0];
211 sumMean += avgPointsVector[1];
212 sumMean += avgPointsVector[2];
213 sumMean += avgPointsVector[3];
215 number = quarterPoints * 4;
216 for (; number < num_points; number++) {
217 sumMean += realDataPoints[number];
223 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
225 dataPointsPtr = realDataPoints;
226 __m128 vMeanAmplitudeVector = _mm_set_ps1(meanAmplitude);
227 __m128 vOnesVector = _mm_set_ps1(1.0);
228 __m128 vValidBinCount = _mm_setzero_ps();
229 avgPointsVal = _mm_setzero_ps();
233 for (; number < quarterPoints; number++) {
235 dataPointsVal = _mm_load_ps(dataPointsPtr);
240 compareMask = _mm_cmple_ps(dataPointsVal, vMeanAmplitudeVector);
244 avgPointsVal = _mm_add_ps(avgPointsVal, _mm_and_ps(compareMask, dataPointsVal));
247 vValidBinCount = _mm_add_ps(vValidBinCount, _mm_and_ps(compareMask, vOnesVector));
251 _mm_store_ps(avgPointsVector, avgPointsVal);
254 sumMean += avgPointsVector[0];
255 sumMean += avgPointsVector[1];
256 sumMean += avgPointsVector[2];
257 sumMean += avgPointsVector[3];
261 _mm_store_ps(validBinCountVector, vValidBinCount);
263 float validBinCount = 0;
264 validBinCount += validBinCountVector[0];
265 validBinCount += validBinCountVector[1];
266 validBinCount += validBinCountVector[2];
267 validBinCount += validBinCountVector[3];
269 number = quarterPoints * 4;
270 for (; number < num_points; number++) {
271 if (realDataPoints[number] <= meanAmplitude) {
272 sumMean += realDataPoints[number];
273 validBinCount += 1.0;
277 float localNoiseFloorAmplitude = 0;
278 if (validBinCount > 0.0) {
279 localNoiseFloorAmplitude = sumMean / validBinCount;
281 localNoiseFloorAmplitude =
285 *noiseFloorAmplitude = localNoiseFloorAmplitude;
290 #ifdef LV_HAVE_GENERIC
294 const float* realDataPoints,
295 const float spectralExclusionValue,
296 const unsigned int num_points)
301 for (number = 0; number < num_points; number++) {
303 sumMean += realDataPoints[number];
309 const float meanAmplitude = (sumMean / num_points) + spectralExclusionValue;
313 unsigned int newNumDataPoints = num_points;
314 for (number = 0; number < num_points; number++) {
315 if (realDataPoints[number] <= meanAmplitude)
316 sumMean += realDataPoints[number];
321 float localNoiseFloorAmplitude = 0.0;
322 if (newNumDataPoints == 0)
323 localNoiseFloorAmplitude = meanAmplitude;
325 localNoiseFloorAmplitude = sumMean / ((float)newNumDataPoints);
327 *noiseFloorAmplitude = localNoiseFloorAmplitude;
334 #ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H
335 #define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_u_H
337 #include <inttypes.h>
342 #include <immintrin.h>
346 const float* realDataPoints,
347 const float spectralExclusionValue,
348 const unsigned int num_points)
350 unsigned int number = 0;
351 const unsigned int eighthPoints = num_points / 8;
353 const float* dataPointsPtr = realDataPoints;
356 __m256 dataPointsVal;
357 __m256 avgPointsVal = _mm256_setzero_ps();
359 for (; number < eighthPoints; number++) {
361 dataPointsVal = _mm256_loadu_ps(dataPointsPtr);
365 avgPointsVal = _mm256_add_ps(avgPointsVal, dataPointsVal);
368 _mm256_storeu_ps(avgPointsVector, avgPointsVal);
371 sumMean += avgPointsVector[0];
372 sumMean += avgPointsVector[1];
373 sumMean += avgPointsVector[2];
374 sumMean += avgPointsVector[3];
375 sumMean += avgPointsVector[4];
376 sumMean += avgPointsVector[5];
377 sumMean += avgPointsVector[6];
378 sumMean += avgPointsVector[7];
380 number = eighthPoints * 8;
381 for (; number < num_points; number++) {
382 sumMean += realDataPoints[number];
388 const float meanAmplitude = (sumMean / ((float)num_points)) + spectralExclusionValue;
390 dataPointsPtr = realDataPoints;
391 __m256 vMeanAmplitudeVector = _mm256_set1_ps(meanAmplitude);
392 __m256 vOnesVector = _mm256_set1_ps(1.0);
393 __m256 vValidBinCount = _mm256_setzero_ps();
394 avgPointsVal = _mm256_setzero_ps();
398 for (; number < eighthPoints; number++) {
400 dataPointsVal = _mm256_loadu_ps(dataPointsPtr);
405 compareMask = _mm256_cmp_ps(dataPointsVal, vMeanAmplitudeVector, _CMP_LE_OQ);
410 _mm256_add_ps(avgPointsVal, _mm256_and_ps(compareMask, dataPointsVal));
414 _mm256_add_ps(vValidBinCount, _mm256_and_ps(compareMask, vOnesVector));
418 _mm256_storeu_ps(avgPointsVector, avgPointsVal);
421 sumMean += avgPointsVector[0];
422 sumMean += avgPointsVector[1];
423 sumMean += avgPointsVector[2];
424 sumMean += avgPointsVector[3];
425 sumMean += avgPointsVector[4];
426 sumMean += avgPointsVector[5];
427 sumMean += avgPointsVector[6];
428 sumMean += avgPointsVector[7];
432 _mm256_storeu_ps(validBinCountVector, vValidBinCount);
434 float validBinCount = 0;
435 validBinCount += validBinCountVector[0];
436 validBinCount += validBinCountVector[1];
437 validBinCount += validBinCountVector[2];
438 validBinCount += validBinCountVector[3];
439 validBinCount += validBinCountVector[4];
440 validBinCount += validBinCountVector[5];
441 validBinCount += validBinCountVector[6];
442 validBinCount += validBinCountVector[7];
444 number = eighthPoints * 8;
445 for (; number < num_points; number++) {
446 if (realDataPoints[number] <= meanAmplitude) {
447 sumMean += realDataPoints[number];
448 validBinCount += 1.0;
452 float localNoiseFloorAmplitude = 0;
453 if (validBinCount > 0.0) {
454 localNoiseFloorAmplitude = sumMean / validBinCount;
456 localNoiseFloorAmplitude =
460 *noiseFloorAmplitude = localNoiseFloorAmplitude;
465 #include <riscv_vector.h>
468 volk_32f_s32f_calc_spectral_noise_floor_32f_rvv(
float* noiseFloorAmplitude,
469 const float* realDataPoints,
470 const float spectralExclusionValue,
471 const unsigned int num_points)
474 volk_32f_accumulator_s32f_rvv(&sum, realDataPoints, num_points);
475 float meanAmplitude = sum / num_points + spectralExclusionValue;
477 vfloat32m8_t vbin = __riscv_vfmv_v_f_f32m8(meanAmplitude, __riscv_vsetvlmax_e32m8());
478 vfloat32m8_t vsum = __riscv_vfmv_v_f_f32m8(0, __riscv_vsetvlmax_e32m8());
479 size_t n = num_points, binCount = 0;
480 for (
size_t vl; n > 0; n -= vl, realDataPoints += vl) {
481 vl = __riscv_vsetvl_e32m8(n);
482 vfloat32m8_t v = __riscv_vle32_v_f32m8(realDataPoints, vl);
483 vbool4_t m = __riscv_vmfle(v, vbin, vl);
484 binCount += __riscv_vcpop(m, vl);
485 vsum = __riscv_vfadd_tumu(m, vsum, vsum, v, vl);
487 size_t vl = __riscv_vsetvlmax_e32m1();
489 vfloat32m1_t z = __riscv_vfmv_s_f_f32m1(0, vl);
490 sum = __riscv_vfmv_f(__riscv_vfredusum(v, z, vl));
492 *noiseFloorAmplitude = binCount == 0 ? meanAmplitude : sum / binCount;
static void volk_32f_s32f_calc_spectral_noise_floor_32f_u_avx(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition: volk_32f_s32f_calc_spectral_noise_floor_32f.h:345
static void volk_32f_s32f_calc_spectral_noise_floor_32f_generic(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition: volk_32f_s32f_calc_spectral_noise_floor_32f.h:293
static void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition: volk_32f_s32f_calc_spectral_noise_floor_32f.h:184
static void volk_32f_s32f_calc_spectral_noise_floor_32f_a_avx(float *noiseFloorAmplitude, const float *realDataPoints, const float spectralExclusionValue, const unsigned int num_points)
Definition: volk_32f_s32f_calc_spectral_noise_floor_32f.h:61
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:62
#define RISCV_SHRINK8(op, T, S, v)
Definition: volk_rvv_intrinsics.h:33