12#ifndef CHOOSE_N_FARTHEST_POINTS_H_
13#define CHOOSE_N_FARTHEST_POINTS_H_
15#include <boost/version.hpp>
16#include <boost/range.hpp>
17#include <boost/heap/d_ary_heap.hpp>
18#if BOOST_VERSION >= 108100
19 #include <boost/unordered/unordered_flat_set.hpp>
21 #include <boost/unordered_set.hpp>
24#include <gudhi/Null_output_iterator.h>
34namespace subsampling {
71template <
typename Distance,
73typename PointOutputIterator,
76 Point_range
const &input_pts,
77 std::size_t final_size,
78 std::size_t starting_point,
79 PointOutputIterator output_it,
80 DistanceOutputIterator dist_it = {}) {
81 std::size_t nb_points = boost::size(input_pts);
82 if (final_size > nb_points)
83 final_size = nb_points;
91 std::random_device rd;
92 std::mt19937 gen(rd());
93 std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1);
94 starting_point = dis(gen);
99 static_assert(std::numeric_limits<FT>::has_infinity,
"the number type needs to support infinity()");
101 *output_it++ = input_pts[starting_point];
102 *dist_it++ = std::numeric_limits<FT>::infinity();
103 if (final_size == 1)
return;
105 std::vector<std::size_t> points(nb_points);
106 std::vector< FT > dist_to_L(nb_points);
107 for(std::size_t i = 0; i < nb_points; ++i) {
109 dist_to_L[i] = dist(input_pts[i], input_pts[starting_point]);
118 std::size_t curr_max_w = starting_point;
120 for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
121 std::size_t latest_landmark = points[curr_max_w];
124 std::size_t last = points.size() - 1;
125 if (curr_max_w != last) {
126 points[curr_max_w] = points[last];
127 dist_to_L[curr_max_w] = dist_to_L[last];
133 for (
auto p : points) {
134 FT curr_dist = dist(input_pts[p], input_pts[latest_landmark]);
135 if (curr_dist < dist_to_L[i])
136 dist_to_L[i] = curr_dist;
141 FT curr_max_dist = dist_to_L[curr_max_w];
142 for (i = 1; i < points.size(); i++)
143 if (dist_to_L[i] > curr_max_dist) {
144 curr_max_dist = dist_to_L[i];
147 *output_it++ = input_pts[points[curr_max_w]];
148 *dist_it++ = dist_to_L[curr_max_w];
156template<
class FT>
struct Landmark_info;
158struct Compare_landmark_radius {
159 std::vector<Landmark_info<FT>>* landmarks_p;
160 Compare_landmark_radius(std::vector<Landmark_info<FT>>* p): landmarks_p(p) {}
161 bool operator()(std::size_t, std::size_t)
const;
166using radius_priority_ds =
167 boost::heap::d_ary_heap<std::size_t, boost::heap::arity<7>, boost::heap::compare<Compare_landmark_radius<FT>>,
168 boost::heap::mutable_<true>, boost::heap::constant_time_size<false>>;
170struct Landmark_info {
171 std::size_t farthest; FT radius;
173 std::vector<std::pair<std::size_t, FT>> voronoi;
176 std::vector<std::pair<std::size_t, FT>> neighbors;
178 typename radius_priority_ds<FT>::handle_type position_in_queue;
181bool Compare_landmark_radius<FT>::operator()(std::size_t a, std::size_t b)
const{
return (*landmarks_p)[a].radius < (*landmarks_p)[b].radius; }
204template <
typename Distance,
206typename PointOutputIterator,
207typename DistanceOutputIterator = Null_output_iterator>
209 Point_range
const &input_pts,
210 std::size_t final_size,
211 std::size_t starting_point,
212 PointOutputIterator output_it,
213 DistanceOutputIterator dist_it = {}) {
214 std::size_t nb_points = boost::size(input_pts);
215 if (final_size > nb_points)
216 final_size = nb_points;
224 std::random_device rd;
225 std::mt19937 gen(rd());
226 std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1);
227 starting_point = dis(gen);
232 static_assert(std::numeric_limits<FT>::has_infinity,
"the number type needs to support infinity()");
234 *output_it++ = input_pts[starting_point];
235 *dist_it++ = std::numeric_limits<FT>::infinity();
236 if (final_size == 1)
return;
238 auto dist = [&](std::size_t a, std::size_t b){
return dist_(input_pts[a], input_pts[b]); };
240 std::vector<Landmark_info<FT>> landmarks(nb_points);
241 radius_priority_ds<FT> radius_priority(&landmarks);
243 auto compute_radius = [&](std::size_t i)
245 FT r = -std::numeric_limits<FT>::infinity();
246 std::size_t jmax = -1;
247 for(
auto [ j, d ] : landmarks[i].voronoi) {
253 landmarks[i].radius = r;
254 landmarks[i].farthest = jmax;
256 auto update_radius = [&](std::size_t i)
259 radius_priority.decrease(landmarks[i].position_in_queue);
264 auto& ini = landmarks[starting_point];
265 ini.voronoi.reserve(nb_points - 1);
266 for (std::size_t i = 0; i < nb_points; ++i)
267 if (i != starting_point)
268 ini.voronoi.emplace_back(i, dist(starting_point, i));
269 compute_radius(starting_point);
270 ini.position_in_queue = radius_priority.push(starting_point);
273 std::vector<std::size_t> modified_neighbors;
274#if BOOST_VERSION >= 108100
275 boost::unordered_flat_set<std::size_t>
277 boost::unordered_set<std::size_t>
280 for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
281 std::size_t l_parent = radius_priority.top();
282 auto& parent_info = landmarks[l_parent];
283 std::size_t l = parent_info.farthest;
284 FT radius = parent_info.radius;
285 auto& info = landmarks[l];
286 *output_it++ = input_pts[l];
289 modified_neighbors.clear();
292 auto max_dist = [](FT a, FT b){
return a + b + std::max(a, b); };
294 auto handle_neighbor_voronoi = [&](std::size_t ngb)
296 auto& ngb_info = landmarks[ngb];
297 auto it = std::remove_if(ngb_info.voronoi.begin(), ngb_info.voronoi.end(), [&](
auto wd)
299 std::size_t w = wd.first;
301 FT newd = dist(l, w);
304 info.voronoi.emplace_back(w, newd);
309 if (it != ngb_info.voronoi.end()) {
310 ngb_info.voronoi.erase(it, ngb_info.voronoi.end());
311 modified_neighbors.push_back(ngb);
320 auto handle_neighbor_neighbors = [&](std::size_t ngb)
322 auto& ngb_info = landmarks[ngb];
323 auto it = std::remove_if(ngb_info.neighbors.begin(), ngb_info.neighbors.end(), [&](
auto near_){
324 std::size_t near = near_.first;
327 if (d <= 3 * radius) { l_neighbors.insert(near); }
329 return d >= max_dist(ngb_info.radius, landmarks[near].radius);
331 ngb_info.neighbors.erase(it, ngb_info.neighbors.end());
337 handle_neighbor_voronoi(l_parent);
339 for (
auto ngb_ : parent_info.neighbors) {
340 std::size_t ngb = ngb_.first;
344 if(dist(l, ngb) < 2 * landmarks[ngb].radius)
345 handle_neighbor_voronoi(ngb);
350 for (std::size_t ngb : modified_neighbors)
351 handle_neighbor_neighbors(ngb);
354 info.position_in_queue = radius_priority.push(l);
356 l_neighbors.insert(l_parent);
357 for (std::size_t ngb : l_neighbors) {
359 if (d < max_dist(info.radius, landmarks[ngb].radius)) {
360 info.neighbors.emplace_back(ngb, d);
361 landmarks[ngb].neighbors.emplace_back(l, d);
void choose_n_farthest_points(Distance dist, Point_range const &input_pts, std::size_t final_size, std::size_t starting_point, PointOutputIterator output_it, DistanceOutputIterator dist_it={})
Subsample by an iterative, greedy strategy.
Definition: choose_n_farthest_points.h:75
void choose_n_farthest_points_metric(Distance dist_, Point_range const &input_pts, std::size_t final_size, std::size_t starting_point, PointOutputIterator output_it, DistanceOutputIterator dist_it={})
Subsample by an iterative, greedy strategy.
Definition: choose_n_farthest_points.h:208
@ random_starting_point
Definition: choose_n_farthest_points.h:43
Gudhi namespace.
Definition: SimplicialComplexForAlpha.h:14
Definition: Null_output_iterator.h:19