Skip to content

Commit 2637444

Browse files
committed
Added option to HampelFilterVisitor to populate indices to datapoints affected
1 parent c56d010 commit 2637444

File tree

2 files changed

+55
-29
lines changed

2 files changed

+55
-29
lines changed

docs/HTML/HampelFilterVisitor.html

+9-3
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@
6868
<td bgcolor="blue"> <font color="white">
6969
<PRE><B>#include &lt;DataFrame/DataFrameTransformVisitors.h&gt;
7070

71-
template&lt;typename T, typename I = unsigned long&gt;
71+
template&lt;typename T, typename I = unsigned long
72+
std::size_t A = 0&gt;
7273
struct HampelFilterVisitor;
7374

7475
// -------------------------------------
@@ -85,13 +86,18 @@
8586
explicit
8687
HampelFilterVisitor(std::size_t window_size,
8788
hampel_type ht = hampel_type::median,
88-
T num_of_std = 3);
89+
T num_of_std = 3,
90+
bool populate_idxs = false);
8991
</PRE>
9092
</I>
93+
If <I>populate_idxs</I> is true, the input data column will be unchanged. Instead, a vector of indices to datapoints affected will be populated and can be accessed by calling <I>get_idxs()</I>.<BR><BR>
94+
<I>get_result()</I> returns number of data points affected<BR>
95+
<I>get_idxs()</I> returns a std::vector of indices of datapoints affected<BR>
9196
</td>
9297
<td width="30%">
9398
<B>T</B>: Column data type.<BR>
94-
<B>I</B>: Index type.
99+
<B>I</B>: Index type.<BR>
100+
<B>A</B>: Memory alignment boundary for vectors. Default is system default alignment<BR>
95101
</td>
96102
</tr>
97103

include/DataFrame/DataFrameTransformVisitors.h

+46-26
Original file line numberDiff line numberDiff line change
@@ -367,33 +367,43 @@ struct HampelFilterVisitor {
367367

368368
const value_type factor = num_of_std_ * unbiased_factor_;
369369

370-
if (col_s >= ThreadPool::MUL_THR_THHOLD &&
371-
ThreadGranularity::get_thread_level() > 2) {
372-
auto futures =
373-
ThreadGranularity::thr_pool_.parallel_loop(
374-
size_type(0),
375-
col_s,
376-
[&column_begin, &diff = std::as_const(diff), factor,
377-
&aggr = std::as_const(aggr.get_result())]
378-
(auto begin, auto end) -> result_type {
379-
result_type count { 0 };
380-
381-
for (size_type i = begin; i < end; ++i) {
382-
if (diff[i] > (aggr[i] * factor)) {
383-
*(column_begin + i) =
384-
std::numeric_limits<T>::quiet_NaN();
385-
count += 1;
386-
}
370+
if (! populate_idxs_) {
371+
auto lbd =
372+
[&column_begin, &diff = std::as_const(diff), factor,
373+
&aggr = std::as_const(aggr.get_result())]
374+
(size_type begin, size_type end) -> result_type {
375+
result_type count { 0 };
376+
377+
for (size_type i = begin; i < end; ++i) {
378+
if (diff[i] > (aggr[i] * factor)) {
379+
*(column_begin + i) =
380+
std::numeric_limits<T>::quiet_NaN();
381+
count += 1;
387382
}
388-
return (count);
389-
});
390-
391-
for (auto &fut : futures) count_ += fut.get();
383+
}
384+
return (count);
385+
};
386+
387+
if (col_s >= ThreadPool::MUL_THR_THHOLD &&
388+
ThreadGranularity::get_thread_level() > 2) {
389+
auto futures =
390+
ThreadGranularity::thr_pool_.parallel_loop(size_type(0),
391+
col_s,
392+
lbd);
393+
394+
for (auto &fut : futures) count_ += fut.get();
395+
}
396+
else {
397+
count_ = lbd(0, col_s);
398+
}
392399
}
393400
else {
401+
const auto &aggr_res = aggr.get_result();
402+
403+
idxs_.reserve(col_s / 20);
394404
for (size_type i = 0; i < col_s; ++i) {
395-
if (diff[i] > (aggr.get_result()[i] * factor)) {
396-
*(column_begin + i) = std::numeric_limits<T>::quiet_NaN();
405+
if (diff[i] > (aggr_res[i] * factor)) {
406+
idxs_.push_back(i);
397407
count_ += 1;
398408
}
399409
}
@@ -402,6 +412,9 @@ struct HampelFilterVisitor {
402412

403413
public:
404414

415+
using idxs_vec_t =
416+
std::vector<size_type, typename allocator_declare<size_type, A>::type>;
417+
405418
template<typename K, typename H>
406419
inline void
407420
operator() (K idx_begin, K idx_end, H column_begin, H column_end) {
@@ -416,15 +429,20 @@ struct HampelFilterVisitor {
416429
(MeanVisitor<T, I> { true }, window_size_));
417430
}
418431

419-
DEFINE_PRE_POST_2
432+
inline void pre () { count_ = 0; idxs_.clear(); }
433+
inline void post () { }
434+
inline result_type get_result () const { return (count_); }
435+
inline const idxs_vec_t &get_idxs () const { return (idxs_); }
420436

421437
explicit
422438
HampelFilterVisitor(size_type window_size,
423439
hampel_type ht = hampel_type::median,
424-
value_type num_of_std = 3)
440+
value_type num_of_std = 3,
441+
bool populate_idxs = false)
425442
: window_size_(window_size),
426443
type_(ht),
427-
num_of_std_(num_of_std) { }
444+
num_of_std_(num_of_std),
445+
populate_idxs_(populate_idxs) { }
428446

429447
private:
430448

@@ -434,7 +452,9 @@ struct HampelFilterVisitor {
434452
const size_type window_size_;
435453
const hampel_type type_;
436454
const value_type num_of_std_;
455+
const bool populate_idxs_;
437456
result_type count_ { 0 };
457+
idxs_vec_t idxs_;
438458
};
439459

440460
template<typename T, typename I = unsigned long, std::size_t A = 0>

0 commit comments

Comments
 (0)