Loading...
Searching...
No Matches
Persistence_intervals.h
1/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2 * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3 * Author(s): Pawel Dlotko
4 *
5 * Copyright (C) 2016 Inria
6 *
7 * Modification(s):
8 * - 2019/12 Vincent Rouvreau: Fix #118 - Make histogram_of_lengths and cumulative_histogram_of_lengths
9 * return the exact number_of_bins (was failing on x86)
10 * - 2025/06 Hannah Schreiber: Various small bug fixes (missing `inline`s, `DEBUG_TRACES`s etc.)
11 * - YYYY/MM Author: Description of the modification
12 */
13
14#ifndef PERSISTENCE_INTERVALS_H_
15#define PERSISTENCE_INTERVALS_H_
16
17// standard include
18#ifdef DEBUG_TRACES
19#include <iostream> // std::clog
20#endif
21#include <cstddef> // std::size_t
22#include <ostream> // std::ostream
23#include <fstream> // std::ofstream
24#include <sstream> // std::stringstream
25#include <limits> // std::numeric_limits
26#include <algorithm> // std::sort
27#include <cmath> // std::sqrt
28#include <utility> // std::pair
29#include <vector>
30
31// gudhi include
32#include <gudhi/read_persistence_from_file.h>
33#include <gudhi/Debug_utils.h>
34
35namespace Gudhi {
36namespace Persistence_representations {
37
38// TODO: it would have been better to have this file in a subfolder "Persistence_representations"
39// to avoid including it with "<gudhi/Persistence_intervals.h>" which makes it sound universal within gudhi
40// even though it is only used in this format within this module.
41// How critical would it be for retro-compatibility to change that?
42
51{
52 public:
64 Persistence_intervals(const char* filename, unsigned int dimension = std::numeric_limits<unsigned int>::max())
66 filename,
67 dimension == std::numeric_limits<unsigned int>::max() ? -1 : static_cast<int>(dimension))),
68 number_of_functions_for_vectorization_(intervals_.size()),
69 number_of_functions_for_projections_to_reals_(1)
70 {}
71
78 Persistence_intervals(const std::vector<std::pair<double, double> >& intervals)
79 : intervals_(intervals),
80 number_of_functions_for_vectorization_(intervals.size()),
81 number_of_functions_for_projections_to_reals_(1)
82 {}
83
87 std::pair<double, double> get_x_range() const
88 {
89 double min = std::numeric_limits<int>::max();
90 double max = -std::numeric_limits<int>::max();
91 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
92 if (this->intervals_[i].first < min) min = this->intervals_[i].first;
93 if (this->intervals_[i].second > max) max = this->intervals_[i].second;
94 }
95 return std::make_pair(min, max);
96 }
97
101 std::pair<double, double> get_y_range() const
102 {
103 double min = std::numeric_limits<int>::max();
104 double max = -std::numeric_limits<int>::max();
105 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
106 if (this->intervals_[i].second < min) min = this->intervals_[i].second;
107 if (this->intervals_[i].second > max) max = this->intervals_[i].second;
108 }
109 return std::make_pair(min, max);
110 }
111
116 std::vector<double> length_of_dominant_intervals(std::size_t where_to_cut = 100) const;
117
122 std::vector<std::pair<double, double> > dominant_intervals(std::size_t where_to_cut = 100) const;
123
132 std::vector<std::size_t> histogram_of_lengths(std::size_t number_of_bins = 10) const;
133
139 std::vector<std::size_t> cumulative_histogram_of_lengths(std::size_t number_of_bins = 10) const;
140
147 std::vector<double> characteristic_function_of_diagram(double x_min,
148 double x_max,
149 std::size_t number_of_bins = 10) const;
150
154 std::vector<double> cumulative_characteristic_function_of_diagram(double x_min,
155 double x_max,
156 std::size_t number_of_bins = 10) const;
157
163 std::vector<std::pair<double, std::size_t> > compute_persistent_betti_numbers() const;
164
171 std::vector<double> k_n_n(std::size_t k, std::size_t where_to_cut = 10) const;
172
176 friend std::ostream& operator<<(std::ostream& out, const Persistence_intervals& intervals)
177 {
178 for (std::size_t i = 0; i != intervals.intervals_.size(); ++i) {
179 out << intervals.intervals_[i].first << " " << intervals.intervals_[i].second << std::endl;
180 }
181 return out;
182 }
183
187 void plot(const char* filename,
188 double min_x = std::numeric_limits<double>::max(),
189 double max_x = std::numeric_limits<double>::max(),
190 double min_y = std::numeric_limits<double>::max(),
191 double max_y = std::numeric_limits<double>::max()) const
192 {
193 // this program create a gnuplot script file that allows to plot persistence diagram.
194 std::ofstream out;
195
196 std::stringstream gnuplot_script;
197 gnuplot_script << filename << "_GnuplotScript";
198
199 out.open(gnuplot_script.str().c_str());
200
201 std::pair<double, double> min_max_values = this->get_x_range();
202 if (min_x == max_x) {
203 out << "set xrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
204 << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
205 out << "set yrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
206 << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
207 } else {
208 out << "set xrange [" << min_x << " : " << max_x << " ]" << std::endl;
209 out << "set yrange [" << min_y << " : " << max_y << " ]" << std::endl;
210 }
211 out << "plot '-' using 1:2 notitle \"" << filename << "\", \\" << std::endl;
212 out << " '-' using 1:2 notitle with lp" << std::endl;
213 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
214 out << this->intervals_[i].first << " " << this->intervals_[i].second << std::endl;
215 }
216 out << "EOF" << std::endl;
217 out << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " "
218 << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
219 out << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " "
220 << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
221
222 out.close();
223
224#ifdef DEBUG_TRACES
225 std::clog << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'"
226 << gnuplot_script.str().c_str() << "\'\"" << std::endl;
227#endif
228 }
229
233 std::size_t size() const { return this->intervals_.size(); }
234
239 const std::pair<double, double>& operator[](std::size_t i) const
240 {
241 if (i >= this->intervals_.size()) throw("Index out of range! Operator [], one_d_gaussians class\n");
242 return this->intervals_[i];
243 }
244
245 // Implementations of functions for various concepts.
246
254 double project_to_R(int number_of_function) const;
255
260 std::size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals_; }
261
266 std::vector<double> vectorize(int number_of_function) const
267 {
268 return this->length_of_dominant_intervals(number_of_function);
269 }
270
275 std::size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization_; }
276
277 // end of implementation of functions needed for concepts.
278
279 // For visualization use output from vectorize and build histograms.
280 const std::vector<std::pair<double, double> >& output_for_visualization() { return this->intervals_; }
281
282 protected:
283 std::vector<std::pair<double, double> > intervals_;
284
285 private:
286 std::size_t number_of_functions_for_vectorization_;
287 std::size_t number_of_functions_for_projections_to_reals_;
288};
289
290inline std::vector<double> Persistence_intervals::length_of_dominant_intervals(std::size_t where_to_cut) const
291{
292 std::vector<double> result(this->intervals_.size());
293 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
294 result[i] = this->intervals_[i].second - this->intervals_[i].first;
295 }
296 std::sort(result.begin(), result.end(), std::greater<double>());
297
298 result.resize(std::min(where_to_cut, result.size()));
299 return result;
300}
301
302inline std::vector<std::pair<double, double> > Persistence_intervals::dominant_intervals(std::size_t where_to_cut) const
303{
304 std::vector<std::pair<std::size_t, double> > position_length_vector(this->intervals_.size());
305 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
306 position_length_vector[i] = std::make_pair(i, this->intervals_[i].second - this->intervals_[i].first);
307 }
308
309 std::sort(position_length_vector.begin(),
310 position_length_vector.end(),
311 [](const std::pair<std::size_t, double>& first, const std::pair<std::size_t, double>& second) -> bool {
312 return first.second > second.second;
313 });
314
315 std::vector<std::pair<double, double> > result;
316 result.reserve(std::min(where_to_cut, position_length_vector.size()));
317
318 for (std::size_t i = 0; i != std::min(where_to_cut, position_length_vector.size()); ++i) {
319 result.push_back(this->intervals_[position_length_vector[i].first]);
320#ifdef DEBUG_TRACES
321 std::clog << "Position : " << position_length_vector[i].first << " length : " << position_length_vector[i].second
322 << std::endl;
323#endif
324 }
325
326 return result;
327} // dominant_intervals
328
329inline std::vector<std::size_t> Persistence_intervals::histogram_of_lengths(std::size_t number_of_bins) const
330{
331#ifdef DEBUG_TRACES
332 std::clog << "this->intervals.size() : " << this->intervals_.size() << std::endl;
333#endif
334
335 // first find the length of the longest interval:
336 double lengthOfLongest = 0;
337 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
338 if ((this->intervals_[i].second - this->intervals_[i].first) > lengthOfLongest) {
339 lengthOfLongest = this->intervals_[i].second - this->intervals_[i].first;
340 }
341 }
342
343#ifdef DEBUG_TRACES
344 std::clog << "lengthOfLongest : " << lengthOfLongest << std::endl;
345#endif
346
347 // this is a container we will use to store the resulting histogram
348 std::vector<std::size_t> result(number_of_bins + 1, 0);
349
350 // for every persistence interval in our collection.
351 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
352 // compute its length relative to the length of the dominant interval:
353 double relative_length_of_this_interval =
354 (this->intervals_[i].second - this->intervals_[i].first) / lengthOfLongest;
355
356 // given the relative length (between 0 and 1) compute to which bin should it contribute.
357 std::size_t position = (std::size_t)(relative_length_of_this_interval * number_of_bins);
358
359 ++result[position];
360
361#ifdef DEBUG_TRACES
362 std::clog << "i : " << i << std::endl;
363 std::clog << "Interval : [" << this->intervals_[i].first << " , " << this->intervals_[i].second << " ] \n";
364 std::clog << "relative_length_of_this_interval : " << relative_length_of_this_interval << std::endl;
365 std::clog << "position : " << position << std::endl;
366#endif
367 }
368 // we want number of bins equals to number_of_bins (some unexpected results on x86)
369 result[number_of_bins - 1] += result[number_of_bins];
370 result.resize(number_of_bins);
371
372#ifdef DEBUG_TRACES
373 for (std::size_t i = 0; i != result.size(); ++i) std::clog << result[i] << std::endl;
374#endif
375 return result;
376}
377
378inline std::vector<std::size_t> Persistence_intervals::cumulative_histogram_of_lengths(std::size_t number_of_bins) const
379{
380 std::vector<std::size_t> histogram = this->histogram_of_lengths(number_of_bins);
381 std::vector<std::size_t> result(histogram.size());
382
383 std::size_t sum = 0;
384 for (std::size_t i = 0; i != histogram.size(); ++i) {
385 sum += histogram[i];
386 result[i] = sum;
387 }
388 return result;
389}
390
391inline std::vector<double> Persistence_intervals::characteristic_function_of_diagram(double x_min,
392 double x_max,
393 std::size_t number_of_bins) const
394{
395 std::vector<double> result(number_of_bins);
396 std::fill(result.begin(), result.end(), 0);
397
398 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
399#ifdef DEBUG_TRACES
400 std::clog << "Interval : " << this->intervals_[i].first << " , " << this->intervals_[i].second << std::endl;
401#endif
402
403 std::size_t beginIt = 0;
404 if (this->intervals_[i].first < x_min) beginIt = 0;
405 if (this->intervals_[i].first >= x_max) beginIt = result.size();
406 if ((this->intervals_[i].first > x_min) && (this->intervals_[i].first < x_max)) {
407 beginIt = number_of_bins * (this->intervals_[i].first - x_min) / (x_max - x_min);
408 }
409
410 std::size_t endIt = 0;
411 if (this->intervals_[i].second < x_min) endIt = 0;
412 if (this->intervals_[i].second >= x_max) endIt = result.size();
413 if ((this->intervals_[i].second > x_min) && (this->intervals_[i].second < x_max)) {
414 endIt = number_of_bins * (this->intervals_[i].second - x_min) / (x_max - x_min);
415 }
416
417 if (beginIt > endIt) {
418 beginIt = endIt;
419 }
420
421#ifdef DEBUG_TRACES
422 std::clog << "beginIt : " << beginIt << std::endl;
423 std::clog << "endIt : " << endIt << std::endl;
424#endif
425
426 for (std::size_t pos = beginIt; pos != endIt; ++pos) {
427 result[pos] += ((x_max - x_min) / static_cast<double>(number_of_bins)) *
428 (this->intervals_[i].second - this->intervals_[i].first);
429 }
430#ifdef DEBUG_TRACES
431 std::clog << "Result at this stage \n";
432 for (std::size_t aa = 0; aa != result.size(); ++aa) {
433 std::clog << result[aa] << " ";
434 }
435 std::clog << std::endl;
436#endif
437 }
438 return result;
439} // characteristic_function_of_diagram
440
442 double x_min,
443 double x_max,
444 std::size_t number_of_bins) const
445{
446 std::vector<double> intsOfBars = this->characteristic_function_of_diagram(x_min, x_max, number_of_bins);
447 std::vector<double> result(intsOfBars.size());
448 double sum = 0;
449 for (std::size_t i = 0; i != intsOfBars.size(); ++i) {
450 sum += intsOfBars[i];
451 result[i] = sum;
452 }
453 return result;
454}
455
456inline std::vector<std::pair<double, std::size_t> > Persistence_intervals::compute_persistent_betti_numbers() const
457{
458 std::vector<std::pair<double, bool> > places_where_pbs_change(2 * this->intervals_.size());
459
460 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
461 places_where_pbs_change[2 * i] = std::make_pair(this->intervals_[i].first, true);
462 places_where_pbs_change[2 * i + 1] = std::make_pair(this->intervals_[i].second, false);
463 }
464
465 std::sort(
466 places_where_pbs_change.begin(),
467 places_where_pbs_change.end(),
468 [](const std::pair<double, bool>& f, const std::pair<double, bool>& s) -> bool { return f.first < s.first; });
469
470 std::size_t pbn = 0;
471 std::vector<std::pair<double, std::size_t> > pbns(places_where_pbs_change.size());
472 for (std::size_t i = 0; i != places_where_pbs_change.size(); ++i) {
473 if (places_where_pbs_change[i].second == true) {
474 ++pbn;
475 } else {
476 --pbn;
477 }
478 pbns[i] = std::make_pair(places_where_pbs_change[i].first, pbn);
479 }
480 return pbns;
481}
482
483inline std::vector<double> Persistence_intervals::k_n_n(std::size_t k, std::size_t where_to_cut) const
484{
485#ifdef DEBUG_TRACES
486 std::clog << "Here are the intervals : \n";
487 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
488 std::clog << "[ " << this->intervals_[i].first << " , " << this->intervals_[i].second << "] \n";
489 }
490#endif
491
492 auto compute_euclidean_distance = [](const std::pair<double, double>& f,
493 const std::pair<double, double>& s) -> double {
494 return std::sqrt((f.first - s.first) * (f.first - s.first) + (f.second - s.second) * (f.second - s.second));
495 };
496
497 std::vector<double> result;
498 // compute all to all distance between point in the diagram. Also, consider points in the diagonal with the infinite
499 // multiplicity.
500 std::vector<std::vector<double> > distances(this->intervals_.size());
501 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
502 std::vector<double> aa(this->intervals_.size());
503 std::fill(aa.begin(), aa.end(), 0);
504 distances[i] = aa;
505 }
506 std::vector<double> distances_from_diagonal(this->intervals_.size());
507 std::fill(distances_from_diagonal.begin(), distances_from_diagonal.end(), 0);
508
509 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
510 std::vector<double> distancesFromI;
511 for (std::size_t j = i + 1; j != this->intervals_.size(); ++j) {
512 distancesFromI.push_back(compute_euclidean_distance(this->intervals_[i], this->intervals_[j]));
513 }
514 // also add a distance from this guy to diagonal:
515 double distanceToDiagonal =
516 compute_euclidean_distance(this->intervals_[i],
517 std::make_pair(0.5 * (this->intervals_[i].first + this->intervals_[i].second),
518 0.5 * (this->intervals_[i].first + this->intervals_[i].second)));
519 distances_from_diagonal[i] = distanceToDiagonal;
520
521#ifdef DEBUG_TRACES
522 std::clog << "Here are the distances form the point : [" << this->intervals_[i].first << " , "
523 << this->intervals_[i].second << "] in the diagram \n";
524 for (std::size_t aa = 0; aa != distancesFromI.size(); ++aa) {
525 std::clog << "To : " << i + aa << " : " << distancesFromI[aa] << " ";
526 }
527 std::clog << std::endl;
528#endif
529
530 // filling in the distances matrix:
531 for (std::size_t j = i + 1; j != this->intervals_.size(); ++j) {
532 distances[i][j] = distancesFromI[j - i - 1];
533 distances[j][i] = distancesFromI[j - i - 1];
534 }
535 }
536
537#ifdef DEBUG_TRACES
538 std::clog << "Here is the distance matrix : \n";
539 for (std::size_t i = 0; i != distances.size(); ++i) {
540 for (std::size_t j = 0; j != distances.size(); ++j) {
541 std::clog << distances[i][j] << " ";
542 }
543 std::clog << std::endl;
544 }
545 std::clog << std::endl << std::endl << "And here are the distances to the diagonal : " << std::endl;
546 for (std::size_t i = 0; i != distances_from_diagonal.size(); ++i) {
547 std::clog << distances_from_diagonal[i] << " ";
548 }
549 std::clog << std::endl << std::endl;
550#endif
551
552 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
553 std::vector<double> distancesFromI = distances[i];
554 distancesFromI.push_back(distances_from_diagonal[i]);
555
556 // sort it:
557 std::sort(distancesFromI.begin(), distancesFromI.end(), std::greater<double>());
558
559 if (k > distancesFromI.size()) {
560#ifdef DEBUG_TRACES
561 std::clog << "There are not enough neighbors in your set. We set the result to plus infty \n";
562#endif
563 result.push_back(std::numeric_limits<double>::max());
564 } else {
565 if (distances_from_diagonal[i] > distancesFromI[k]) {
566#ifdef DEBUG_TRACES
567 std::clog << "The k-th n.n. is on a diagonal. Therefore we set up a distance to diagonal \n";
568#endif
569 result.push_back(distances_from_diagonal[i]);
570 } else {
571 result.push_back(distancesFromI[k]);
572 }
573 }
574 }
575 std::sort(result.begin(), result.end(), std::greater<double>());
576 result.resize(std::min(result.size(), where_to_cut));
577
578 return result;
579}
580
581inline double Persistence_intervals::project_to_R(int number_of_function) const
582{
583 double result = 0;
584
585 for (std::size_t i = 0; i != this->intervals_.size(); ++i) {
586 auto diff = this->intervals_[i].second - this->intervals_[i].first;
587 result += diff * diff;
588 }
589
590 return result;
591}
592
593} // namespace Persistence_representations
594} // namespace Gudhi
595
596#endif // PERSISTENCE_INTERVALS_H_
std::vector< std::size_t > histogram_of_lengths(std::size_t number_of_bins=10) const
Definition Persistence_intervals.h:329
std::vector< double > characteristic_function_of_diagram(double x_min, double x_max, std::size_t number_of_bins=10) const
Definition Persistence_intervals.h:391
std::vector< double > vectorize(int number_of_function) const
Definition Persistence_intervals.h:266
double project_to_R(int number_of_function) const
Definition Persistence_intervals.h:581
Persistence_intervals(const std::vector< std::pair< double, double > > &intervals)
Constructor from a vector of pairs.
Definition Persistence_intervals.h:78
std::pair< double, double > get_x_range() const
Definition Persistence_intervals.h:87
std::vector< std::size_t > cumulative_histogram_of_lengths(std::size_t number_of_bins=10) const
Definition Persistence_intervals.h:378
std::vector< std::pair< double, double > > dominant_intervals(std::size_t where_to_cut=100) const
Definition Persistence_intervals.h:302
friend std::ostream & operator<<(std::ostream &out, const Persistence_intervals &intervals)
Definition Persistence_intervals.h:176
void plot(const char *filename, double min_x=std::numeric_limits< double >::max(), double max_x=std::numeric_limits< double >::max(), double min_y=std::numeric_limits< double >::max(), double max_y=std::numeric_limits< double >::max()) const
Definition Persistence_intervals.h:187
std::size_t number_of_vectorize_functions() const
Definition Persistence_intervals.h:275
std::size_t size() const
Definition Persistence_intervals.h:233
std::size_t number_of_projections_to_R() const
Definition Persistence_intervals.h:260
std::vector< double > cumulative_characteristic_function_of_diagram(double x_min, double x_max, std::size_t number_of_bins=10) const
Definition Persistence_intervals.h:441
const std::pair< double, double > & operator[](std::size_t i) const
Definition Persistence_intervals.h:239
std::vector< double > k_n_n(std::size_t k, std::size_t where_to_cut=10) const
Definition Persistence_intervals.h:483
std::vector< double > length_of_dominant_intervals(std::size_t where_to_cut=100) const
Definition Persistence_intervals.h:290
std::pair< double, double > get_y_range() const
Definition Persistence_intervals.h:101
std::vector< std::pair< double, std::size_t > > compute_persistent_betti_numbers() const
Definition Persistence_intervals.h:456
Persistence_intervals(const char *filename, unsigned int dimension=std::numeric_limits< unsigned int >::max())
Constructor from a text file.
Definition Persistence_intervals.h:64
std::vector< std::pair< double, double > > read_persistence_intervals_in_one_dimension_from_file(std::string const &filename, int dimension=-1, double what_to_substitute_for_infinite_bar=-1)
Definition read_persistence_from_file.h:44
Gudhi namespace.
Definition SimplicialComplexForAlpha.h:14
STL namespace.