Persistence_intervals.h
1 /* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2  * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3  * Author(s): Pawel Dlotko
4  *
5  * Copyright (C) 2016 Inria
6  *
7  * Modification(s):
8  * - YYYY/MM Author: Description of the modification
9  * - 2019/12 Vincent Rouvreau: Fix #118 - Make histogram_of_lengths and cumulative_histogram_of_lengths
10  * return the exact number_of_bins (was failing on x86)
11  */
12 
13 #ifndef PERSISTENCE_INTERVALS_H_
14 #define PERSISTENCE_INTERVALS_H_
15 
16 // gudhi include
17 #include <gudhi/read_persistence_from_file.h>
18 
19 // standard include
20 #include <limits>
21 #include <iostream>
22 #include <fstream>
23 #include <vector>
24 #include <algorithm>
25 #include <cmath>
26 #include <functional>
27 #include <utility>
28 #include <string>
29 
30 namespace Gudhi {
31 namespace Persistence_representations {
32 
38  public:
47  Persistence_intervals(const char* filename, unsigned dimension = std::numeric_limits<unsigned>::max());
48 
54  Persistence_intervals(const std::vector<std::pair<double, double> >& intervals);
55 
59  std::pair<double, double> get_x_range() const {
60  double min_ = std::numeric_limits<int>::max();
61  double max_ = -std::numeric_limits<int>::max();
62  for (size_t i = 0; i != this->intervals.size(); ++i) {
63  if (this->intervals[i].first < min_) min_ = this->intervals[i].first;
64  if (this->intervals[i].second > max_) max_ = this->intervals[i].second;
65  }
66  return std::make_pair(min_, max_);
67  }
68 
72  std::pair<double, double> get_y_range() const {
73  double min_ = std::numeric_limits<int>::max();
74  double max_ = -std::numeric_limits<int>::max();
75  for (size_t i = 0; i != this->intervals.size(); ++i) {
76  if (this->intervals[i].second < min_) min_ = this->intervals[i].second;
77  if (this->intervals[i].second > max_) max_ = this->intervals[i].second;
78  }
79  return std::make_pair(min_, max_);
80  }
81 
86  std::vector<double> length_of_dominant_intervals(size_t where_to_cut = 100) const;
87 
92  std::vector<std::pair<double, double> > dominant_intervals(size_t where_to_cut = 100) const;
93 
102  std::vector<size_t> histogram_of_lengths(size_t number_of_bins = 10) const;
103 
109  std::vector<size_t> cumulative_histogram_of_lengths(size_t number_of_bins = 10) const;
110 
118  std::vector<double> characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins = 10) const;
119 
123  std::vector<double> cumulative_characteristic_function_of_diagram(double x_min, double x_max,
124  size_t number_of_bins = 10) const;
125 
131  std::vector<std::pair<double, size_t> > compute_persistent_betti_numbers() const;
132 
139  std::vector<double> k_n_n(size_t k, size_t where_to_cut = 10) const;
140 
144  friend std::ostream& operator<<(std::ostream& out, const Persistence_intervals& intervals) {
145  for (size_t i = 0; i != intervals.intervals.size(); ++i) {
146  out << intervals.intervals[i].first << " " << intervals.intervals[i].second << std::endl;
147  }
148  return out;
149  }
150 
154  void plot(const char* filename, double min_x = std::numeric_limits<double>::max(),
155  double max_x = std::numeric_limits<double>::max(), double min_y = std::numeric_limits<double>::max(),
156  double max_y = std::numeric_limits<double>::max()) const {
157  // this program create a gnuplot script file that allows to plot persistence diagram.
158  std::ofstream out;
159 
160  std::stringstream gnuplot_script;
161  gnuplot_script << filename << "_GnuplotScript";
162 
163  out.open(gnuplot_script.str().c_str());
164 
165  std::pair<double, double> min_max_values = this->get_x_range();
166  if (min_x == max_x) {
167  out << "set xrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
168  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
169  out << "set yrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
170  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
171  } else {
172  out << "set xrange [" << min_x << " : " << max_x << " ]" << std::endl;
173  out << "set yrange [" << min_y << " : " << max_y << " ]" << std::endl;
174  }
175  out << "plot '-' using 1:2 notitle \"" << filename << "\", \\" << std::endl;
176  out << " '-' using 1:2 notitle with lp" << std::endl;
177  for (size_t i = 0; i != this->intervals.size(); ++i) {
178  out << this->intervals[i].first << " " << this->intervals[i].second << std::endl;
179  }
180  out << "EOF" << std::endl;
181  out << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " "
182  << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
183  out << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " "
184  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
185 
186  out.close();
187 
188  std::clog << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'"
189  << gnuplot_script.str().c_str() << "\'\"" << std::endl;
190  }
191 
195  size_t size() const { return this->intervals.size(); }
196 
201  inline std::pair<double, double> operator[](size_t i) const {
202  if (i >= this->intervals.size()) throw("Index out of range! Operator [], one_d_gaussians class\n");
203  return this->intervals[i];
204  }
205 
206  // Implementations of functions for various concepts.
215  double project_to_R(int number_of_function) const;
220  size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; }
221 
226  std::vector<double> vectorize(int number_of_function) const {
227  return this->length_of_dominant_intervals(number_of_function);
228  }
233  size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; }
234 
235  // end of implementation of functions needed for concepts.
236 
237  // For visualization use output from vectorize and build histograms.
238  std::vector<std::pair<double, double> > output_for_visualization() { return this->intervals; }
239 
240  protected:
241  void set_up_numbers_of_functions_for_vectorization_and_projections_to_reals() {
242  // warning, this function can be only called after filling in the intervals vector.
243  this->number_of_functions_for_vectorization = this->intervals.size();
244  this->number_of_functions_for_projections_to_reals = 1;
245  }
246 
247  std::vector<std::pair<double, double> > intervals;
248  size_t number_of_functions_for_vectorization;
249  size_t number_of_functions_for_projections_to_reals;
250 };
251 
252 Persistence_intervals::Persistence_intervals(const char* filename, unsigned dimension) {
253  if (dimension == std::numeric_limits<unsigned>::max()) {
254  this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename);
255  } else {
256  this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename, dimension);
257  }
258  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
259 } // Persistence_intervals
260 
261 Persistence_intervals::Persistence_intervals(const std::vector<std::pair<double, double> >& intervals_)
262  : intervals(intervals_) {
263  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
264 }
265 
266 std::vector<double> Persistence_intervals::length_of_dominant_intervals(size_t where_to_cut) const {
267  std::vector<double> result(this->intervals.size());
268  for (size_t i = 0; i != this->intervals.size(); ++i) {
269  result[i] = this->intervals[i].second - this->intervals[i].first;
270  }
271  std::sort(result.begin(), result.end(), std::greater<double>());
272 
273  result.resize(std::min(where_to_cut, result.size()));
274  return result;
275 } // length_of_dominant_intervals
276 
277 bool compare(const std::pair<size_t, double>& first, const std::pair<size_t, double>& second) {
278  return first.second > second.second;
279 }
280 
281 std::vector<std::pair<double, double> > Persistence_intervals::dominant_intervals(size_t where_to_cut) const {
282  bool dbg = false;
283  std::vector<std::pair<size_t, double> > position_length_vector(this->intervals.size());
284  for (size_t i = 0; i != this->intervals.size(); ++i) {
285  position_length_vector[i] = std::make_pair(i, this->intervals[i].second - this->intervals[i].first);
286  }
287 
288  std::sort(position_length_vector.begin(), position_length_vector.end(), compare);
289 
290  std::vector<std::pair<double, double> > result;
291  result.reserve(std::min(where_to_cut, position_length_vector.size()));
292 
293  for (size_t i = 0; i != std::min(where_to_cut, position_length_vector.size()); ++i) {
294  result.push_back(this->intervals[position_length_vector[i].first]);
295  if (dbg)
296  std::clog << "Position : " << position_length_vector[i].first << " length : " << position_length_vector[i].second
297  << std::endl;
298  }
299 
300  return result;
301 } // dominant_intervals
302 
303 std::vector<size_t> Persistence_intervals::histogram_of_lengths(size_t number_of_bins) const {
304  bool dbg = false;
305 
306  if (dbg) std::clog << "this->intervals.size() : " << this->intervals.size() << std::endl;
307  // first find the length of the longest interval:
308  double lengthOfLongest = 0;
309  for (size_t i = 0; i != this->intervals.size(); ++i) {
310  if ((this->intervals[i].second - this->intervals[i].first) > lengthOfLongest) {
311  lengthOfLongest = this->intervals[i].second - this->intervals[i].first;
312  }
313  }
314 
315  if (dbg) {
316  std::clog << "lengthOfLongest : " << lengthOfLongest << std::endl;
317  }
318 
319  // this is a container we will use to store the resulting histogram
320  std::vector<size_t> result(number_of_bins + 1, 0);
321 
322  // for every persistence interval in our collection.
323  for (size_t i = 0; i != this->intervals.size(); ++i) {
324  // compute its length relative to the length of the dominant interval:
325  double relative_length_of_this_interval = (this->intervals[i].second - this->intervals[i].first) / lengthOfLongest;
326 
327  // given the relative length (between 0 and 1) compute to which bin should it contribute.
328  size_t position = (size_t)(relative_length_of_this_interval * number_of_bins);
329 
330  ++result[position];
331 
332  if (dbg) {
333  std::clog << "i : " << i << std::endl;
334  std::clog << "Interval : [" << this->intervals[i].first << " , " << this->intervals[i].second << " ] \n";
335  std::clog << "relative_length_of_this_interval : " << relative_length_of_this_interval << std::endl;
336  std::clog << "position : " << position << std::endl;
337  getchar();
338  }
339  }
340  // we want number of bins equals to number_of_bins (some unexpected results on x86)
341  result[number_of_bins-1]+=result[number_of_bins];
342  result.resize(number_of_bins);
343 
344  if (dbg) {
345  for (size_t i = 0; i != result.size(); ++i) std::clog << result[i] << std::endl;
346  }
347  return result;
348 }
349 
350 std::vector<size_t> Persistence_intervals::cumulative_histogram_of_lengths(size_t number_of_bins) const {
351  std::vector<size_t> histogram = this->histogram_of_lengths(number_of_bins);
352  std::vector<size_t> result(histogram.size());
353 
354  size_t sum = 0;
355  for (size_t i = 0; i != histogram.size(); ++i) {
356  sum += histogram[i];
357  result[i] = sum;
358  }
359  return result;
360 }
361 
362 std::vector<double> Persistence_intervals::characteristic_function_of_diagram(double x_min, double x_max,
363  size_t number_of_bins) const {
364  bool dbg = false;
365 
366  std::vector<double> result(number_of_bins);
367  std::fill(result.begin(), result.end(), 0);
368 
369  for (size_t i = 0; i != this->intervals.size(); ++i) {
370  if (dbg) {
371  std::clog << "Interval : " << this->intervals[i].first << " , " << this->intervals[i].second << std::endl;
372  }
373 
374  size_t beginIt = 0;
375  if (this->intervals[i].first < x_min) beginIt = 0;
376  if (this->intervals[i].first >= x_max) beginIt = result.size();
377  if ((this->intervals[i].first > x_min) && (this->intervals[i].first < x_max)) {
378  beginIt = number_of_bins * (this->intervals[i].first - x_min) / (x_max - x_min);
379  }
380 
381  size_t endIt = 0;
382  if (this->intervals[i].second < x_min) endIt = 0;
383  if (this->intervals[i].second >= x_max) endIt = result.size();
384  if ((this->intervals[i].second > x_min) && (this->intervals[i].second < x_max)) {
385  endIt = number_of_bins * (this->intervals[i].second - x_min) / (x_max - x_min);
386  }
387 
388  if (beginIt > endIt) {
389  beginIt = endIt;
390  }
391 
392  if (dbg) {
393  std::clog << "beginIt : " << beginIt << std::endl;
394  std::clog << "endIt : " << endIt << std::endl;
395  }
396 
397  for (size_t pos = beginIt; pos != endIt; ++pos) {
398  result[pos] += ((x_max - x_min) / static_cast<double>(number_of_bins)) *
399  (this->intervals[i].second - this->intervals[i].first);
400  }
401  if (dbg) {
402  std::clog << "Result at this stage \n";
403  for (size_t aa = 0; aa != result.size(); ++aa) {
404  std::clog << result[aa] << " ";
405  }
406  std::clog << std::endl;
407  }
408  }
409  return result;
410 } // characteristic_function_of_diagram
411 
412 std::vector<double> Persistence_intervals::cumulative_characteristic_function_of_diagram(double x_min, double x_max,
413  size_t number_of_bins) const {
414  std::vector<double> intsOfBars = this->characteristic_function_of_diagram(x_min, x_max, number_of_bins);
415  std::vector<double> result(intsOfBars.size());
416  double sum = 0;
417  for (size_t i = 0; i != intsOfBars.size(); ++i) {
418  sum += intsOfBars[i];
419  result[i] = sum;
420  }
421  return result;
422 } // cumulative_characteristic_function_of_diagram
423 
424 template <typename T>
425 bool compare_first_element_of_pair(const std::pair<T, bool>& f, const std::pair<T, bool>& s) {
426  return (f.first < s.first);
427 }
428 
429 std::vector<std::pair<double, size_t> > Persistence_intervals::compute_persistent_betti_numbers() const {
430  std::vector<std::pair<double, bool> > places_where_pbs_change(2 * this->intervals.size());
431 
432  for (size_t i = 0; i != this->intervals.size(); ++i) {
433  places_where_pbs_change[2 * i] = std::make_pair(this->intervals[i].first, true);
434  places_where_pbs_change[2 * i + 1] = std::make_pair(this->intervals[i].second, false);
435  }
436 
437  std::sort(places_where_pbs_change.begin(), places_where_pbs_change.end(), compare_first_element_of_pair<double>);
438  size_t pbn = 0;
439  std::vector<std::pair<double, size_t> > pbns(places_where_pbs_change.size());
440  for (size_t i = 0; i != places_where_pbs_change.size(); ++i) {
441  if (places_where_pbs_change[i].second == true) {
442  ++pbn;
443  } else {
444  --pbn;
445  }
446  pbns[i] = std::make_pair(places_where_pbs_change[i].first, pbn);
447  }
448  return pbns;
449 }
450 
451 inline double compute_euclidean_distance(const std::pair<double, double>& f, const std::pair<double, double>& s) {
452  return sqrt((f.first - s.first) * (f.first - s.first) + (f.second - s.second) * (f.second - s.second));
453 }
454 
455 std::vector<double> Persistence_intervals::k_n_n(size_t k, size_t where_to_cut) const {
456  bool dbg = false;
457  if (dbg) {
458  std::clog << "Here are the intervals : \n";
459  for (size_t i = 0; i != this->intervals.size(); ++i) {
460  std::clog << "[ " << this->intervals[i].first << " , " << this->intervals[i].second << "] \n";
461  }
462  getchar();
463  }
464 
465  std::vector<double> result;
466  // compute all to all distance between point in the diagram. Also, consider points in the diagonal with the infinite
467  // multiplicity.
468  std::vector<std::vector<double> > distances(this->intervals.size());
469  for (size_t i = 0; i != this->intervals.size(); ++i) {
470  std::vector<double> aa(this->intervals.size());
471  std::fill(aa.begin(), aa.end(), 0);
472  distances[i] = aa;
473  }
474  std::vector<double> distances_from_diagonal(this->intervals.size());
475  std::fill(distances_from_diagonal.begin(), distances_from_diagonal.end(), 0);
476 
477  for (size_t i = 0; i != this->intervals.size(); ++i) {
478  std::vector<double> distancesFromI;
479  for (size_t j = i + 1; j != this->intervals.size(); ++j) {
480  distancesFromI.push_back(compute_euclidean_distance(this->intervals[i], this->intervals[j]));
481  }
482  // also add a distance from this guy to diagonal:
483  double distanceToDiagonal = compute_euclidean_distance(
484  this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
485  0.5 * (this->intervals[i].first + this->intervals[i].second)));
486  distances_from_diagonal[i] = distanceToDiagonal;
487 
488  if (dbg) {
489  std::clog << "Here are the distances form the point : [" << this->intervals[i].first << " , "
490  << this->intervals[i].second << "] in the diagram \n";
491  for (size_t aa = 0; aa != distancesFromI.size(); ++aa) {
492  std::clog << "To : " << i + aa << " : " << distancesFromI[aa] << " ";
493  }
494  std::clog << std::endl;
495  getchar();
496  }
497 
498  // filling in the distances matrix:
499  for (size_t j = i + 1; j != this->intervals.size(); ++j) {
500  distances[i][j] = distancesFromI[j - i - 1];
501  distances[j][i] = distancesFromI[j - i - 1];
502  }
503  }
504  if (dbg) {
505  std::clog << "Here is the distance matrix : \n";
506  for (size_t i = 0; i != distances.size(); ++i) {
507  for (size_t j = 0; j != distances.size(); ++j) {
508  std::clog << distances[i][j] << " ";
509  }
510  std::clog << std::endl;
511  }
512  std::clog << std::endl << std::endl << "And here are the distances to the diagonal : " << std::endl;
513  for (size_t i = 0; i != distances_from_diagonal.size(); ++i) {
514  std::clog << distances_from_diagonal[i] << " ";
515  }
516  std::clog << std::endl << std::endl;
517  getchar();
518  }
519 
520  for (size_t i = 0; i != this->intervals.size(); ++i) {
521  std::vector<double> distancesFromI = distances[i];
522  distancesFromI.push_back(distances_from_diagonal[i]);
523 
524  // sort it:
525  std::sort(distancesFromI.begin(), distancesFromI.end(), std::greater<double>());
526 
527  if (k > distancesFromI.size()) {
528  if (dbg) {
529  std::clog << "There are not enough neighbors in your set. We set the result to plus infty \n";
530  }
531  result.push_back(std::numeric_limits<double>::max());
532  } else {
533  if (distances_from_diagonal[i] > distancesFromI[k]) {
534  if (dbg) {
535  std::clog << "The k-th n.n. is on a diagonal. Therefore we set up a distance to diagonal \n";
536  }
537  result.push_back(distances_from_diagonal[i]);
538  } else {
539  result.push_back(distancesFromI[k]);
540  }
541  }
542  }
543  std::sort(result.begin(), result.end(), std::greater<double>());
544  result.resize(std::min(result.size(), where_to_cut));
545 
546  return result;
547 }
548 
549 double Persistence_intervals::project_to_R(int number_of_function) const {
550  double result = 0;
551 
552  for (size_t i = 0; i != this->intervals.size(); ++i) {
553  result +=
554  (this->intervals[i].second - this->intervals[i].first) * (this->intervals[i].second - this->intervals[i].first);
555  }
556 
557  return result;
558 }
559 
560 } // namespace Persistence_representations
561 } // namespace Gudhi
562 
563 #endif // PERSISTENCE_INTERVALS_H_
Definition: Persistence_intervals.h:37
std::vector< std::pair< double, size_t > > compute_persistent_betti_numbers() const
Definition: Persistence_intervals.h:429
double project_to_R(int number_of_function) const
Definition: Persistence_intervals.h:549
std::vector< double > cumulative_characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:412
size_t number_of_projections_to_R() const
Definition: Persistence_intervals.h:220
std::vector< double > characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:362
std::vector< std::pair< double, double > > dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:281
std::pair< double, double > get_y_range() const
Definition: Persistence_intervals.h:72
std::vector< size_t > cumulative_histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:350
void plot(const char *filename, double min_x=std::numeric_limits< double >::max(), double max_x=std::numeric_limits< double >::max(), double min_y=std::numeric_limits< double >::max(), double max_y=std::numeric_limits< double >::max()) const
Definition: Persistence_intervals.h:154
std::vector< double > length_of_dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:266
std::vector< size_t > histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:303
size_t number_of_vectorize_functions() const
Definition: Persistence_intervals.h:233
size_t size() const
Definition: Persistence_intervals.h:195
std::vector< double > k_n_n(size_t k, size_t where_to_cut=10) const
Definition: Persistence_intervals.h:455
friend std::ostream & operator<<(std::ostream &out, const Persistence_intervals &intervals)
Definition: Persistence_intervals.h:144
std::pair< double, double > get_x_range() const
Definition: Persistence_intervals.h:59
std::pair< double, double > operator[](size_t i) const
Definition: Persistence_intervals.h:201
std::vector< double > vectorize(int number_of_function) const
Definition: Persistence_intervals.h:226
Persistence_intervals(const char *filename, unsigned dimension=std::numeric_limits< unsigned >::max())
Definition: Persistence_intervals.h:252
Gudhi namespace.
Definition: SimplicialComplexForAlpha.h:14