choose_n_farthest_points.h
1/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2 * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3 * Author(s): Siargey Kachanovich
4 *
5 * Copyright (C) 2016 Inria
6 *
7 * Modification(s):
8 * - YYYY/MM Author: Description of the modification
9 */
10
11#ifndef CHOOSE_N_FARTHEST_POINTS_H_
12#define CHOOSE_N_FARTHEST_POINTS_H_
13
14#include <boost/range.hpp>
15
16#include <gudhi/Null_output_iterator.h>
17
18#include <iterator>
19#include <vector>
20#include <random>
21#include <limits> // for numeric_limits<>
22
23namespace Gudhi {
24
25namespace subsampling {
26
30enum : std::size_t {
34 random_starting_point = std::size_t(-1)
35};
36
65template < typename Distance,
66typename Point_range,
67typename PointOutputIterator,
68typename DistanceOutputIterator = Null_output_iterator>
69void choose_n_farthest_points(Distance dist,
70 Point_range const &input_pts,
71 std::size_t final_size,
72 std::size_t starting_point,
73 PointOutputIterator output_it,
74 DistanceOutputIterator dist_it = {}) {
75 std::size_t nb_points = boost::size(input_pts);
76 if (final_size > nb_points)
77 final_size = nb_points;
78
79 // Tests to the limit
80 if (final_size < 1)
81 return;
82
83 if (starting_point == random_starting_point) {
84 // Choose randomly the first landmark
85 std::random_device rd;
86 std::mt19937 gen(rd());
87 std::uniform_int_distribution<std::size_t> dis(0, nb_points - 1);
88 starting_point = dis(gen);
89 }
90
91 // FIXME: don't hard-code the type as double. For Epeck_d, we also want to handle types that do not have an infinity.
92 static_assert(std::numeric_limits<double>::has_infinity, "the number type needs to support infinity()");
93
94 *output_it++ = input_pts[starting_point];
95 *dist_it++ = std::numeric_limits<double>::infinity();
96 if (final_size == 1) return;
97
98 std::vector<std::size_t> points(nb_points); // map from remaining points to indexes in input_pts
99 std::vector< double > dist_to_L(nb_points); // vector of current distances to L from points
100 for(std::size_t i = 0; i < nb_points; ++i) {
101 points[i] = i;
102 dist_to_L[i] = dist(input_pts[i], input_pts[starting_point]);
103 }
104 // The indirection through points makes the program a bit slower. Some alternatives:
105 // - the original code never removed points and counted on them not
106 // reappearing because of a self-distance of 0. This causes unnecessary
107 // computations when final_size is large. It also causes trouble if there are
108 // input points at distance 0 from each other.
109 // - copy input_pts and update the local copy when removing points.
110
111 std::size_t curr_max_w = starting_point;
112
113 for (std::size_t current_number_of_landmarks = 1; current_number_of_landmarks != final_size; current_number_of_landmarks++) {
114 std::size_t latest_landmark = points[curr_max_w];
115 // To remove the latest landmark at index curr_max_w, replace it
116 // with the last point and reduce the length of the vector.
117 std::size_t last = points.size() - 1;
118 if (curr_max_w != last) {
119 points[curr_max_w] = points[last];
120 dist_to_L[curr_max_w] = dist_to_L[last];
121 }
122 points.pop_back();
123
124 // Update distances to L.
125 std::size_t i = 0;
126 for (auto p : points) {
127 double curr_dist = dist(input_pts[p], input_pts[latest_landmark]);
128 if (curr_dist < dist_to_L[i])
129 dist_to_L[i] = curr_dist;
130 ++i;
131 }
132 // choose the next landmark
133 curr_max_w = 0;
134 double curr_max_dist = dist_to_L[curr_max_w]; // used for defining the furthest point from L
135 for (i = 1; i < points.size(); i++)
136 if (dist_to_L[i] > curr_max_dist) {
137 curr_max_dist = dist_to_L[i];
138 curr_max_w = i;
139 }
140 *output_it++ = input_pts[points[curr_max_w]];
141 *dist_it++ = dist_to_L[curr_max_w];
142 }
143}
144
145} // namespace subsampling
146
147} // namespace Gudhi
148
149#endif // CHOOSE_N_FARTHEST_POINTS_H_
void choose_n_farthest_points(Distance dist, Point_range const &input_pts, std::size_t final_size, std::size_t starting_point, PointOutputIterator output_it, DistanceOutputIterator dist_it={})
Subsample by a greedy strategy of iteratively adding the farthest point from the current chosen point...
Definition: choose_n_farthest_points.h:69
@ random_starting_point
Definition: choose_n_farthest_points.h:34
Definition: Null_output_iterator.h:19