Persistence_intervals.h
1 /* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2  * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3  * Author(s): Pawel Dlotko
4  *
5  * Copyright (C) 2016 Inria
6  *
7  * Modification(s):
8  * - YYYY/MM Author: Description of the modification
9  */
10 
11 #ifndef PERSISTENCE_INTERVALS_H_
12 #define PERSISTENCE_INTERVALS_H_
13 
14 // gudhi include
15 #include <gudhi/read_persistence_from_file.h>
16 
17 // standard include
18 #include <limits>
19 #include <iostream>
20 #include <fstream>
21 #include <vector>
22 #include <algorithm>
23 #include <cmath>
24 #include <functional>
25 #include <utility>
26 #include <string>
27 
28 namespace Gudhi {
29 namespace Persistence_representations {
30 
36  public:
45  Persistence_intervals(const char* filename, unsigned dimension = std::numeric_limits<unsigned>::max());
46 
52  Persistence_intervals(const std::vector<std::pair<double, double> >& intervals);
53 
57  std::pair<double, double> get_x_range() const {
58  double min_ = std::numeric_limits<int>::max();
59  double max_ = -std::numeric_limits<int>::max();
60  for (size_t i = 0; i != this->intervals.size(); ++i) {
61  if (this->intervals[i].first < min_) min_ = this->intervals[i].first;
62  if (this->intervals[i].second > max_) max_ = this->intervals[i].second;
63  }
64  return std::make_pair(min_, max_);
65  }
66 
70  std::pair<double, double> get_y_range() const {
71  double min_ = std::numeric_limits<int>::max();
72  double max_ = -std::numeric_limits<int>::max();
73  for (size_t i = 0; i != this->intervals.size(); ++i) {
74  if (this->intervals[i].second < min_) min_ = this->intervals[i].second;
75  if (this->intervals[i].second > max_) max_ = this->intervals[i].second;
76  }
77  return std::make_pair(min_, max_);
78  }
79 
84  std::vector<double> length_of_dominant_intervals(size_t where_to_cut = 100) const;
85 
90  std::vector<std::pair<double, double> > dominant_intervals(size_t where_to_cut = 100) const;
91 
100  std::vector<size_t> histogram_of_lengths(size_t number_of_bins = 10) const;
101 
107  std::vector<size_t> cumulative_histogram_of_lengths(size_t number_of_bins = 10) const;
108 
116  std::vector<double> characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins = 10) const;
117 
121  std::vector<double> cumulative_characteristic_function_of_diagram(double x_min, double x_max,
122  size_t number_of_bins = 10) const;
123 
129  std::vector<std::pair<double, size_t> > compute_persistent_betti_numbers() const;
130 
137  std::vector<double> k_n_n(size_t k, size_t where_to_cut = 10) const;
138 
142  friend std::ostream& operator<<(std::ostream& out, const Persistence_intervals& intervals) {
143  for (size_t i = 0; i != intervals.intervals.size(); ++i) {
144  out << intervals.intervals[i].first << " " << intervals.intervals[i].second << std::endl;
145  }
146  return out;
147  }
148 
152  void plot(const char* filename, double min_x = std::numeric_limits<double>::max(),
153  double max_x = std::numeric_limits<double>::max(), double min_y = std::numeric_limits<double>::max(),
154  double max_y = std::numeric_limits<double>::max()) const {
155  // this program create a gnuplot script file that allows to plot persistence diagram.
156  std::ofstream out;
157 
158  std::stringstream gnuplot_script;
159  gnuplot_script << filename << "_GnuplotScript";
160 
161  out.open(gnuplot_script.str().c_str());
162 
163  std::pair<double, double> min_max_values = this->get_x_range();
164  if (min_x == max_x) {
165  out << "set xrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
166  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
167  out << "set yrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
168  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
169  } else {
170  out << "set xrange [" << min_x << " : " << max_x << " ]" << std::endl;
171  out << "set yrange [" << min_y << " : " << max_y << " ]" << std::endl;
172  }
173  out << "plot '-' using 1:2 notitle \"" << filename << "\", \\" << std::endl;
174  out << " '-' using 1:2 notitle with lp" << std::endl;
175  for (size_t i = 0; i != this->intervals.size(); ++i) {
176  out << this->intervals[i].first << " " << this->intervals[i].second << std::endl;
177  }
178  out << "EOF" << std::endl;
179  out << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " "
180  << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
181  out << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " "
182  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
183 
184  out.close();
185 
186  std::cout << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'"
187  << gnuplot_script.str().c_str() << "\'\"" << std::endl;
188  }
189 
193  size_t size() const { return this->intervals.size(); }
194 
199  inline std::pair<double, double> operator[](size_t i) const {
200  if (i >= this->intervals.size()) throw("Index out of range! Operator [], one_d_gaussians class\n");
201  return this->intervals[i];
202  }
203 
204  // Implementations of functions for various concepts.
213  double project_to_R(int number_of_function) const;
218  size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; }
219 
224  std::vector<double> vectorize(int number_of_function) const {
225  return this->length_of_dominant_intervals(number_of_function);
226  }
231  size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; }
232 
233  // end of implementation of functions needed for concepts.
234 
235  // For visualization use output from vectorize and build histograms.
236  std::vector<std::pair<double, double> > output_for_visualization() { return this->intervals; }
237 
238  protected:
239  void set_up_numbers_of_functions_for_vectorization_and_projections_to_reals() {
240  // warning, this function can be only called after filling in the intervals vector.
241  this->number_of_functions_for_vectorization = this->intervals.size();
242  this->number_of_functions_for_projections_to_reals = 1;
243  }
244 
245  std::vector<std::pair<double, double> > intervals;
246  size_t number_of_functions_for_vectorization;
247  size_t number_of_functions_for_projections_to_reals;
248 };
249 
250 Persistence_intervals::Persistence_intervals(const char* filename, unsigned dimension) {
251  if (dimension == std::numeric_limits<unsigned>::max()) {
252  this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename);
253  } else {
254  this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename, dimension);
255  }
256  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
257 } // Persistence_intervals
258 
259 Persistence_intervals::Persistence_intervals(const std::vector<std::pair<double, double> >& intervals_)
260  : intervals(intervals_) {
261  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
262 }
263 
264 std::vector<double> Persistence_intervals::length_of_dominant_intervals(size_t where_to_cut) const {
265  std::vector<double> result(this->intervals.size());
266  for (size_t i = 0; i != this->intervals.size(); ++i) {
267  result[i] = this->intervals[i].second - this->intervals[i].first;
268  }
269  std::sort(result.begin(), result.end(), std::greater<double>());
270 
271  result.resize(std::min(where_to_cut, result.size()));
272  return result;
273 } // length_of_dominant_intervals
274 
275 bool compare(const std::pair<size_t, double>& first, const std::pair<size_t, double>& second) {
276  return first.second > second.second;
277 }
278 
279 std::vector<std::pair<double, double> > Persistence_intervals::dominant_intervals(size_t where_to_cut) const {
280  bool dbg = false;
281  std::vector<std::pair<size_t, double> > position_length_vector(this->intervals.size());
282  for (size_t i = 0; i != this->intervals.size(); ++i) {
283  position_length_vector[i] = std::make_pair(i, this->intervals[i].second - this->intervals[i].first);
284  }
285 
286  std::sort(position_length_vector.begin(), position_length_vector.end(), compare);
287 
288  std::vector<std::pair<double, double> > result;
289  result.reserve(std::min(where_to_cut, position_length_vector.size()));
290 
291  for (size_t i = 0; i != std::min(where_to_cut, position_length_vector.size()); ++i) {
292  result.push_back(this->intervals[position_length_vector[i].first]);
293  if (dbg)
294  std::cerr << "Position : " << position_length_vector[i].first << " length : " << position_length_vector[i].second
295  << std::endl;
296  }
297 
298  return result;
299 } // dominant_intervals
300 
301 std::vector<size_t> Persistence_intervals::histogram_of_lengths(size_t number_of_bins) const {
302  bool dbg = false;
303 
304  if (dbg) std::cerr << "this->intervals.size() : " << this->intervals.size() << std::endl;
305  // first find the length of the longest interval:
306  double lengthOfLongest = 0;
307  for (size_t i = 0; i != this->intervals.size(); ++i) {
308  if ((this->intervals[i].second - this->intervals[i].first) > lengthOfLongest) {
309  lengthOfLongest = this->intervals[i].second - this->intervals[i].first;
310  }
311  }
312 
313  if (dbg) {
314  std::cerr << "lengthOfLongest : " << lengthOfLongest << std::endl;
315  }
316 
317  // this is a container we will use to store the resulting histogram
318  std::vector<size_t> result(number_of_bins + 1, 0);
319 
320  // for every persistence interval in our collection.
321  for (size_t i = 0; i != this->intervals.size(); ++i) {
322  // compute its length relative to the length of the dominant interval:
323  double relative_length_of_this_interval = (this->intervals[i].second - this->intervals[i].first) / lengthOfLongest;
324 
325  // given the relative length (between 0 and 1) compute to which bin should it contribute.
326  size_t position = (size_t)(relative_length_of_this_interval * number_of_bins);
327 
328  ++result[position];
329 
330  if (dbg) {
331  std::cerr << "i : " << i << std::endl;
332  std::cerr << "Interval : [" << this->intervals[i].first << " , " << this->intervals[i].second << " ] \n";
333  std::cerr << "relative_length_of_this_interval : " << relative_length_of_this_interval << std::endl;
334  std::cerr << "position : " << position << std::endl;
335  getchar();
336  }
337  }
338 
339  if (dbg) {
340  for (size_t i = 0; i != result.size(); ++i) std::cerr << result[i] << std::endl;
341  }
342  return result;
343 }
344 
345 std::vector<size_t> Persistence_intervals::cumulative_histogram_of_lengths(size_t number_of_bins) const {
346  std::vector<size_t> histogram = this->histogram_of_lengths(number_of_bins);
347  std::vector<size_t> result(histogram.size());
348 
349  size_t sum = 0;
350  for (size_t i = 0; i != histogram.size(); ++i) {
351  sum += histogram[i];
352  result[i] = sum;
353  }
354  return result;
355 }
356 
357 std::vector<double> Persistence_intervals::characteristic_function_of_diagram(double x_min, double x_max,
358  size_t number_of_bins) const {
359  bool dbg = false;
360 
361  std::vector<double> result(number_of_bins);
362  std::fill(result.begin(), result.end(), 0);
363 
364  for (size_t i = 0; i != this->intervals.size(); ++i) {
365  if (dbg) {
366  std::cerr << "Interval : " << this->intervals[i].first << " , " << this->intervals[i].second << std::endl;
367  }
368 
369  size_t beginIt = 0;
370  if (this->intervals[i].first < x_min) beginIt = 0;
371  if (this->intervals[i].first >= x_max) beginIt = result.size();
372  if ((this->intervals[i].first > x_min) && (this->intervals[i].first < x_max)) {
373  beginIt = number_of_bins * (this->intervals[i].first - x_min) / (x_max - x_min);
374  }
375 
376  size_t endIt = 0;
377  if (this->intervals[i].second < x_min) endIt = 0;
378  if (this->intervals[i].second >= x_max) endIt = result.size();
379  if ((this->intervals[i].second > x_min) && (this->intervals[i].second < x_max)) {
380  endIt = number_of_bins * (this->intervals[i].second - x_min) / (x_max - x_min);
381  }
382 
383  if (beginIt > endIt) {
384  beginIt = endIt;
385  }
386 
387  if (dbg) {
388  std::cerr << "beginIt : " << beginIt << std::endl;
389  std::cerr << "endIt : " << endIt << std::endl;
390  }
391 
392  for (size_t pos = beginIt; pos != endIt; ++pos) {
393  result[pos] += ((x_max - x_min) / static_cast<double>(number_of_bins)) *
394  (this->intervals[i].second - this->intervals[i].first);
395  }
396  if (dbg) {
397  std::cerr << "Result at this stage \n";
398  for (size_t aa = 0; aa != result.size(); ++aa) {
399  std::cerr << result[aa] << " ";
400  }
401  std::cerr << std::endl;
402  }
403  }
404  return result;
405 } // characteristic_function_of_diagram
406 
407 std::vector<double> Persistence_intervals::cumulative_characteristic_function_of_diagram(double x_min, double x_max,
408  size_t number_of_bins) const {
409  std::vector<double> intsOfBars = this->characteristic_function_of_diagram(x_min, x_max, number_of_bins);
410  std::vector<double> result(intsOfBars.size());
411  double sum = 0;
412  for (size_t i = 0; i != intsOfBars.size(); ++i) {
413  sum += intsOfBars[i];
414  result[i] = sum;
415  }
416  return result;
417 } // cumulative_characteristic_function_of_diagram
418 
419 template <typename T>
420 bool compare_first_element_of_pair(const std::pair<T, bool>& f, const std::pair<T, bool>& s) {
421  return (f.first < s.first);
422 }
423 
424 std::vector<std::pair<double, size_t> > Persistence_intervals::compute_persistent_betti_numbers() const {
425  std::vector<std::pair<double, bool> > places_where_pbs_change(2 * this->intervals.size());
426 
427  for (size_t i = 0; i != this->intervals.size(); ++i) {
428  places_where_pbs_change[2 * i] = std::make_pair(this->intervals[i].first, true);
429  places_where_pbs_change[2 * i + 1] = std::make_pair(this->intervals[i].second, false);
430  }
431 
432  std::sort(places_where_pbs_change.begin(), places_where_pbs_change.end(), compare_first_element_of_pair<double>);
433  size_t pbn = 0;
434  std::vector<std::pair<double, size_t> > pbns(places_where_pbs_change.size());
435  for (size_t i = 0; i != places_where_pbs_change.size(); ++i) {
436  if (places_where_pbs_change[i].second == true) {
437  ++pbn;
438  } else {
439  --pbn;
440  }
441  pbns[i] = std::make_pair(places_where_pbs_change[i].first, pbn);
442  }
443  return pbns;
444 }
445 
446 inline double compute_euclidean_distance(const std::pair<double, double>& f, const std::pair<double, double>& s) {
447  return sqrt((f.first - s.first) * (f.first - s.first) + (f.second - s.second) * (f.second - s.second));
448 }
449 
450 std::vector<double> Persistence_intervals::k_n_n(size_t k, size_t where_to_cut) const {
451  bool dbg = false;
452  if (dbg) {
453  std::cerr << "Here are the intervals : \n";
454  for (size_t i = 0; i != this->intervals.size(); ++i) {
455  std::cerr << "[ " << this->intervals[i].first << " , " << this->intervals[i].second << "] \n";
456  }
457  getchar();
458  }
459 
460  std::vector<double> result;
461  // compute all to all distance between point in the diagram. Also, consider points in the diagonal with the infinite
462  // multiplicity.
463  std::vector<std::vector<double> > distances(this->intervals.size());
464  for (size_t i = 0; i != this->intervals.size(); ++i) {
465  std::vector<double> aa(this->intervals.size());
466  std::fill(aa.begin(), aa.end(), 0);
467  distances[i] = aa;
468  }
469  std::vector<double> distances_from_diagonal(this->intervals.size());
470  std::fill(distances_from_diagonal.begin(), distances_from_diagonal.end(), 0);
471 
472  for (size_t i = 0; i != this->intervals.size(); ++i) {
473  std::vector<double> distancesFromI;
474  for (size_t j = i + 1; j != this->intervals.size(); ++j) {
475  distancesFromI.push_back(compute_euclidean_distance(this->intervals[i], this->intervals[j]));
476  }
477  // also add a distance from this guy to diagonal:
478  double distanceToDiagonal = compute_euclidean_distance(
479  this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
480  0.5 * (this->intervals[i].first + this->intervals[i].second)));
481  distances_from_diagonal[i] = distanceToDiagonal;
482 
483  if (dbg) {
484  std::cerr << "Here are the distances form the point : [" << this->intervals[i].first << " , "
485  << this->intervals[i].second << "] in the diagram \n";
486  for (size_t aa = 0; aa != distancesFromI.size(); ++aa) {
487  std::cerr << "To : " << i + aa << " : " << distancesFromI[aa] << " ";
488  }
489  std::cerr << std::endl;
490  getchar();
491  }
492 
493  // filling in the distances matrix:
494  for (size_t j = i + 1; j != this->intervals.size(); ++j) {
495  distances[i][j] = distancesFromI[j - i - 1];
496  distances[j][i] = distancesFromI[j - i - 1];
497  }
498  }
499  if (dbg) {
500  std::cerr << "Here is the distance matrix : \n";
501  for (size_t i = 0; i != distances.size(); ++i) {
502  for (size_t j = 0; j != distances.size(); ++j) {
503  std::cerr << distances[i][j] << " ";
504  }
505  std::cerr << std::endl;
506  }
507  std::cerr << std::endl << std::endl << "And here are the distances to the diagonal : " << std::endl;
508  for (size_t i = 0; i != distances_from_diagonal.size(); ++i) {
509  std::cerr << distances_from_diagonal[i] << " ";
510  }
511  std::cerr << std::endl << std::endl;
512  getchar();
513  }
514 
515  for (size_t i = 0; i != this->intervals.size(); ++i) {
516  std::vector<double> distancesFromI = distances[i];
517  distancesFromI.push_back(distances_from_diagonal[i]);
518 
519  // sort it:
520  std::sort(distancesFromI.begin(), distancesFromI.end(), std::greater<double>());
521 
522  if (k > distancesFromI.size()) {
523  if (dbg) {
524  std::cerr << "There are not enough neighbors in your set. We set the result to plus infty \n";
525  }
526  result.push_back(std::numeric_limits<double>::max());
527  } else {
528  if (distances_from_diagonal[i] > distancesFromI[k]) {
529  if (dbg) {
530  std::cerr << "The k-th n.n. is on a diagonal. Therefore we set up a distance to diagonal \n";
531  }
532  result.push_back(distances_from_diagonal[i]);
533  } else {
534  result.push_back(distancesFromI[k]);
535  }
536  }
537  }
538  std::sort(result.begin(), result.end(), std::greater<double>());
539  result.resize(std::min(result.size(), where_to_cut));
540 
541  return result;
542 }
543 
544 double Persistence_intervals::project_to_R(int number_of_function) const {
545  double result = 0;
546 
547  for (size_t i = 0; i != this->intervals.size(); ++i) {
548  result +=
549  (this->intervals[i].second - this->intervals[i].first) * (this->intervals[i].second - this->intervals[i].first);
550  }
551 
552  return result;
553 }
554 
555 } // namespace Persistence_representations
556 } // namespace Gudhi
557 
558 #endif // PERSISTENCE_INTERVALS_H_
std::vector< std::pair< double, size_t > > compute_persistent_betti_numbers() const
Definition: Persistence_intervals.h:424
size_t size() const
Definition: Persistence_intervals.h:193
std::vector< double > characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:357
std::vector< double > k_n_n(size_t k, size_t where_to_cut=10) const
Definition: Persistence_intervals.h:450
std::vector< double > vectorize(int number_of_function) const
Definition: Persistence_intervals.h:224
std::vector< std::pair< double, double > > dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:279
Definition: SimplicialComplexForAlpha.h:14
std::vector< double > length_of_dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:264
std::vector< size_t > histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:301
void plot(const char *filename, double min_x=std::numeric_limits< double >::max(), double max_x=std::numeric_limits< double >::max(), double min_y=std::numeric_limits< double >::max(), double max_y=std::numeric_limits< double >::max()) const
Definition: Persistence_intervals.h:152
std::pair< double, double > get_x_range() const
Definition: Persistence_intervals.h:57
Persistence_intervals(const char *filename, unsigned dimension=std::numeric_limits< unsigned >::max())
Definition: Persistence_intervals.h:250
Definition: Persistence_intervals.h:35
size_t number_of_vectorize_functions() const
Definition: Persistence_intervals.h:231
friend std::ostream & operator<<(std::ostream &out, const Persistence_intervals &intervals)
Definition: Persistence_intervals.h:142
std::vector< double > cumulative_characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:407
std::pair< double, double > operator[](size_t i) const
Definition: Persistence_intervals.h:199
std::vector< size_t > cumulative_histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:345
double project_to_R(int number_of_function) const
Definition: Persistence_intervals.h:544
size_t number_of_projections_to_R() const
Definition: Persistence_intervals.h:218
std::pair< double, double > get_y_range() const
Definition: Persistence_intervals.h:70
GUDHI  Version 3.0.0  - C++ library for Topological Data Analysis (TDA) and Higher Dimensional Geometry Understanding.  - Copyright : MIT Generated on Tue Sep 24 2019 09:57:51 for GUDHI by Doxygen 1.8.13