Persistence_intervals.h
1 /* This file is part of the Gudhi hiLibrary. The Gudhi library
2  * (Geometric Understanding in Higher Dimensions) is a generic C++
3  * library for computational topology.
4  *
5  * Author(s): Pawel Dlotko
6  *
7  * Copyright (C) 2016 Inria
8  *
9  * This program is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation, either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program. If not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #ifndef PERSISTENCE_INTERVALS_H_
24 #define PERSISTENCE_INTERVALS_H_
25 
26 // gudhi include
27 #include <gudhi/read_persistence_from_file.h>
28 
29 // standard include
30 #include <limits>
31 #include <iostream>
32 #include <fstream>
33 #include <vector>
34 #include <algorithm>
35 #include <cmath>
36 #include <functional>
37 #include <utility>
38 #include <string>
39 
40 namespace Gudhi {
41 namespace Persistence_representations {
42 
48  public:
57  Persistence_intervals(const char* filename, unsigned dimension = std::numeric_limits<unsigned>::max());
58 
64  Persistence_intervals(const std::vector<std::pair<double, double> >& intervals);
65 
69  std::pair<double, double> get_x_range() const {
70  double min_ = std::numeric_limits<int>::max();
71  double max_ = -std::numeric_limits<int>::max();
72  for (size_t i = 0; i != this->intervals.size(); ++i) {
73  if (this->intervals[i].first < min_) min_ = this->intervals[i].first;
74  if (this->intervals[i].second > max_) max_ = this->intervals[i].second;
75  }
76  return std::make_pair(min_, max_);
77  }
78 
82  std::pair<double, double> get_y_range() const {
83  double min_ = std::numeric_limits<int>::max();
84  double max_ = -std::numeric_limits<int>::max();
85  for (size_t i = 0; i != this->intervals.size(); ++i) {
86  if (this->intervals[i].second < min_) min_ = this->intervals[i].second;
87  if (this->intervals[i].second > max_) max_ = this->intervals[i].second;
88  }
89  return std::make_pair(min_, max_);
90  }
91 
96  std::vector<double> length_of_dominant_intervals(size_t where_to_cut = 100) const;
97 
102  std::vector<std::pair<double, double> > dominant_intervals(size_t where_to_cut = 100) const;
103 
112  std::vector<size_t> histogram_of_lengths(size_t number_of_bins = 10) const;
113 
119  std::vector<size_t> cumulative_histogram_of_lengths(size_t number_of_bins = 10) const;
120 
128  std::vector<double> characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins = 10) const;
129 
133  std::vector<double> cumulative_characteristic_function_of_diagram(double x_min, double x_max,
134  size_t number_of_bins = 10) const;
135 
141  std::vector<std::pair<double, size_t> > compute_persistent_betti_numbers() const;
142 
149  std::vector<double> k_n_n(size_t k, size_t where_to_cut = 10) const;
150 
154  friend std::ostream& operator<<(std::ostream& out, const Persistence_intervals& intervals) {
155  for (size_t i = 0; i != intervals.intervals.size(); ++i) {
156  out << intervals.intervals[i].first << " " << intervals.intervals[i].second << std::endl;
157  }
158  return out;
159  }
160 
164  void plot(const char* filename, double min_x = std::numeric_limits<double>::max(),
165  double max_x = std::numeric_limits<double>::max(), double min_y = std::numeric_limits<double>::max(),
166  double max_y = std::numeric_limits<double>::max()) const {
167  // this program create a gnuplot script file that allows to plot persistence diagram.
168  std::ofstream out;
169 
170  std::stringstream gnuplot_script;
171  gnuplot_script << filename << "_GnuplotScript";
172 
173  out.open(gnuplot_script.str().c_str());
174 
175  std::pair<double, double> min_max_values = this->get_x_range();
176  if (min_x == max_x) {
177  out << "set xrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
178  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
179  out << "set yrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "
180  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;
181  } else {
182  out << "set xrange [" << min_x << " : " << max_x << " ]" << std::endl;
183  out << "set yrange [" << min_y << " : " << max_y << " ]" << std::endl;
184  }
185  out << "plot '-' using 1:2 notitle \"" << filename << "\", \\" << std::endl;
186  out << " '-' using 1:2 notitle with lp" << std::endl;
187  for (size_t i = 0; i != this->intervals.size(); ++i) {
188  out << this->intervals[i].first << " " << this->intervals[i].second << std::endl;
189  }
190  out << "EOF" << std::endl;
191  out << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " "
192  << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
193  out << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " "
194  << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << std::endl;
195 
196  out.close();
197 
198  std::cout << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'"
199  << gnuplot_script.str().c_str() << "\'\"" << std::endl;
200  }
201 
205  size_t size() const { return this->intervals.size(); }
206 
211  inline std::pair<double, double> operator[](size_t i) const {
212  if (i >= this->intervals.size()) throw("Index out of range! Operator [], one_d_gaussians class\n");
213  return this->intervals[i];
214  }
215 
216  // Implementations of functions for various concepts.
225  double project_to_R(int number_of_function) const;
230  size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; }
231 
236  std::vector<double> vectorize(int number_of_function) const {
237  return this->length_of_dominant_intervals(number_of_function);
238  }
243  size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; }
244 
245  // end of implementation of functions needed for concepts.
246 
247  // For visualization use output from vectorize and build histograms.
248  std::vector<std::pair<double, double> > output_for_visualization() { return this->intervals; }
249 
250  protected:
251  void set_up_numbers_of_functions_for_vectorization_and_projections_to_reals() {
252  // warning, this function can be only called after filling in the intervals vector.
253  this->number_of_functions_for_vectorization = this->intervals.size();
254  this->number_of_functions_for_projections_to_reals = 1;
255  }
256 
257  std::vector<std::pair<double, double> > intervals;
258  size_t number_of_functions_for_vectorization;
259  size_t number_of_functions_for_projections_to_reals;
260 };
261 
262 Persistence_intervals::Persistence_intervals(const char* filename, unsigned dimension) {
263  if (dimension == std::numeric_limits<unsigned>::max()) {
264  this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename);
265  } else {
266  this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename, dimension);
267  }
268  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
269 } // Persistence_intervals
270 
271 Persistence_intervals::Persistence_intervals(const std::vector<std::pair<double, double> >& intervals_)
272  : intervals(intervals_) {
273  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
274 }
275 
276 std::vector<double> Persistence_intervals::length_of_dominant_intervals(size_t where_to_cut) const {
277  std::vector<double> result(this->intervals.size());
278  for (size_t i = 0; i != this->intervals.size(); ++i) {
279  result[i] = this->intervals[i].second - this->intervals[i].first;
280  }
281  std::sort(result.begin(), result.end(), std::greater<double>());
282 
283  result.resize(std::min(where_to_cut, result.size()));
284  return result;
285 } // length_of_dominant_intervals
286 
287 bool compare(const std::pair<size_t, double>& first, const std::pair<size_t, double>& second) {
288  return first.second > second.second;
289 }
290 
291 std::vector<std::pair<double, double> > Persistence_intervals::dominant_intervals(size_t where_to_cut) const {
292  bool dbg = false;
293  std::vector<std::pair<size_t, double> > position_length_vector(this->intervals.size());
294  for (size_t i = 0; i != this->intervals.size(); ++i) {
295  position_length_vector[i] = std::make_pair(i, this->intervals[i].second - this->intervals[i].first);
296  }
297 
298  std::sort(position_length_vector.begin(), position_length_vector.end(), compare);
299 
300  std::vector<std::pair<double, double> > result;
301  result.reserve(std::min(where_to_cut, position_length_vector.size()));
302 
303  for (size_t i = 0; i != std::min(where_to_cut, position_length_vector.size()); ++i) {
304  result.push_back(this->intervals[position_length_vector[i].first]);
305  if (dbg)
306  std::cerr << "Position : " << position_length_vector[i].first << " length : " << position_length_vector[i].second
307  << std::endl;
308  }
309 
310  return result;
311 } // dominant_intervals
312 
313 std::vector<size_t> Persistence_intervals::histogram_of_lengths(size_t number_of_bins) const {
314  bool dbg = false;
315 
316  if (dbg) std::cerr << "this->intervals.size() : " << this->intervals.size() << std::endl;
317  // first find the length of the longest interval:
318  double lengthOfLongest = 0;
319  for (size_t i = 0; i != this->intervals.size(); ++i) {
320  if ((this->intervals[i].second - this->intervals[i].first) > lengthOfLongest) {
321  lengthOfLongest = this->intervals[i].second - this->intervals[i].first;
322  }
323  }
324 
325  if (dbg) {
326  std::cerr << "lengthOfLongest : " << lengthOfLongest << std::endl;
327  }
328 
329  // this is a container we will use to store the resulting histogram
330  std::vector<size_t> result(number_of_bins + 1, 0);
331 
332  // for every persistence interval in our collection.
333  for (size_t i = 0; i != this->intervals.size(); ++i) {
334  // compute its length relative to the length of the dominant interval:
335  double relative_length_of_this_interval = (this->intervals[i].second - this->intervals[i].first) / lengthOfLongest;
336 
337  // given the relative length (between 0 and 1) compute to which bin should it contribute.
338  size_t position = (size_t)(relative_length_of_this_interval * number_of_bins);
339 
340  ++result[position];
341 
342  if (dbg) {
343  std::cerr << "i : " << i << std::endl;
344  std::cerr << "Interval : [" << this->intervals[i].first << " , " << this->intervals[i].second << " ] \n";
345  std::cerr << "relative_length_of_this_interval : " << relative_length_of_this_interval << std::endl;
346  std::cerr << "position : " << position << std::endl;
347  getchar();
348  }
349  }
350 
351  if (dbg) {
352  for (size_t i = 0; i != result.size(); ++i) std::cerr << result[i] << std::endl;
353  }
354  return result;
355 }
356 
357 std::vector<size_t> Persistence_intervals::cumulative_histogram_of_lengths(size_t number_of_bins) const {
358  std::vector<size_t> histogram = this->histogram_of_lengths(number_of_bins);
359  std::vector<size_t> result(histogram.size());
360 
361  size_t sum = 0;
362  for (size_t i = 0; i != histogram.size(); ++i) {
363  sum += histogram[i];
364  result[i] = sum;
365  }
366  return result;
367 }
368 
369 std::vector<double> Persistence_intervals::characteristic_function_of_diagram(double x_min, double x_max,
370  size_t number_of_bins) const {
371  bool dbg = false;
372 
373  std::vector<double> result(number_of_bins);
374  std::fill(result.begin(), result.end(), 0);
375 
376  for (size_t i = 0; i != this->intervals.size(); ++i) {
377  if (dbg) {
378  std::cerr << "Interval : " << this->intervals[i].first << " , " << this->intervals[i].second << std::endl;
379  }
380 
381  size_t beginIt = 0;
382  if (this->intervals[i].first < x_min) beginIt = 0;
383  if (this->intervals[i].first >= x_max) beginIt = result.size();
384  if ((this->intervals[i].first > x_min) && (this->intervals[i].first < x_max)) {
385  beginIt = number_of_bins * (this->intervals[i].first - x_min) / (x_max - x_min);
386  }
387 
388  size_t endIt = 0;
389  if (this->intervals[i].second < x_min) endIt = 0;
390  if (this->intervals[i].second >= x_max) endIt = result.size();
391  if ((this->intervals[i].second > x_min) && (this->intervals[i].second < x_max)) {
392  endIt = number_of_bins * (this->intervals[i].second - x_min) / (x_max - x_min);
393  }
394 
395  if (beginIt > endIt) {
396  beginIt = endIt;
397  }
398 
399  if (dbg) {
400  std::cerr << "beginIt : " << beginIt << std::endl;
401  std::cerr << "endIt : " << endIt << std::endl;
402  }
403 
404  for (size_t pos = beginIt; pos != endIt; ++pos) {
405  result[pos] += ((x_max - x_min) / static_cast<double>(number_of_bins)) *
406  (this->intervals[i].second - this->intervals[i].first);
407  }
408  if (dbg) {
409  std::cerr << "Result at this stage \n";
410  for (size_t aa = 0; aa != result.size(); ++aa) {
411  std::cerr << result[aa] << " ";
412  }
413  std::cerr << std::endl;
414  }
415  }
416  return result;
417 } // characteristic_function_of_diagram
418 
419 std::vector<double> Persistence_intervals::cumulative_characteristic_function_of_diagram(double x_min, double x_max,
420  size_t number_of_bins) const {
421  std::vector<double> intsOfBars = this->characteristic_function_of_diagram(x_min, x_max, number_of_bins);
422  std::vector<double> result(intsOfBars.size());
423  double sum = 0;
424  for (size_t i = 0; i != intsOfBars.size(); ++i) {
425  sum += intsOfBars[i];
426  result[i] = sum;
427  }
428  return result;
429 } // cumulative_characteristic_function_of_diagram
430 
431 template <typename T>
432 bool compare_first_element_of_pair(const std::pair<T, bool>& f, const std::pair<T, bool>& s) {
433  return (f.first < s.first);
434 }
435 
436 std::vector<std::pair<double, size_t> > Persistence_intervals::compute_persistent_betti_numbers() const {
437  std::vector<std::pair<double, bool> > places_where_pbs_change(2 * this->intervals.size());
438 
439  for (size_t i = 0; i != this->intervals.size(); ++i) {
440  places_where_pbs_change[2 * i] = std::make_pair(this->intervals[i].first, true);
441  places_where_pbs_change[2 * i + 1] = std::make_pair(this->intervals[i].second, false);
442  }
443 
444  std::sort(places_where_pbs_change.begin(), places_where_pbs_change.end(), compare_first_element_of_pair<double>);
445  size_t pbn = 0;
446  std::vector<std::pair<double, size_t> > pbns(places_where_pbs_change.size());
447  for (size_t i = 0; i != places_where_pbs_change.size(); ++i) {
448  if (places_where_pbs_change[i].second == true) {
449  ++pbn;
450  } else {
451  --pbn;
452  }
453  pbns[i] = std::make_pair(places_where_pbs_change[i].first, pbn);
454  }
455  return pbns;
456 }
457 
458 inline double compute_euclidean_distance(const std::pair<double, double>& f, const std::pair<double, double>& s) {
459  return sqrt((f.first - s.first) * (f.first - s.first) + (f.second - s.second) * (f.second - s.second));
460 }
461 
462 std::vector<double> Persistence_intervals::k_n_n(size_t k, size_t where_to_cut) const {
463  bool dbg = false;
464  if (dbg) {
465  std::cerr << "Here are the intervals : \n";
466  for (size_t i = 0; i != this->intervals.size(); ++i) {
467  std::cerr << "[ " << this->intervals[i].first << " , " << this->intervals[i].second << "] \n";
468  }
469  getchar();
470  }
471 
472  std::vector<double> result;
473  // compute all to all distance between point in the diagram. Also, consider points in the diagonal with the infinite
474  // multiplicity.
475  std::vector<std::vector<double> > distances(this->intervals.size());
476  for (size_t i = 0; i != this->intervals.size(); ++i) {
477  std::vector<double> aa(this->intervals.size());
478  std::fill(aa.begin(), aa.end(), 0);
479  distances[i] = aa;
480  }
481  std::vector<double> distances_from_diagonal(this->intervals.size());
482  std::fill(distances_from_diagonal.begin(), distances_from_diagonal.end(), 0);
483 
484  for (size_t i = 0; i != this->intervals.size(); ++i) {
485  std::vector<double> distancesFromI;
486  for (size_t j = i + 1; j != this->intervals.size(); ++j) {
487  distancesFromI.push_back(compute_euclidean_distance(this->intervals[i], this->intervals[j]));
488  }
489  // also add a distance from this guy to diagonal:
490  double distanceToDiagonal = compute_euclidean_distance(
491  this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
492  0.5 * (this->intervals[i].first + this->intervals[i].second)));
493  distances_from_diagonal[i] = distanceToDiagonal;
494 
495  if (dbg) {
496  std::cerr << "Here are the distances form the point : [" << this->intervals[i].first << " , "
497  << this->intervals[i].second << "] in the diagram \n";
498  for (size_t aa = 0; aa != distancesFromI.size(); ++aa) {
499  std::cerr << "To : " << i + aa << " : " << distancesFromI[aa] << " ";
500  }
501  std::cerr << std::endl;
502  getchar();
503  }
504 
505  // filling in the distances matrix:
506  for (size_t j = i + 1; j != this->intervals.size(); ++j) {
507  distances[i][j] = distancesFromI[j - i - 1];
508  distances[j][i] = distancesFromI[j - i - 1];
509  }
510  }
511  if (dbg) {
512  std::cerr << "Here is the distance matrix : \n";
513  for (size_t i = 0; i != distances.size(); ++i) {
514  for (size_t j = 0; j != distances.size(); ++j) {
515  std::cerr << distances[i][j] << " ";
516  }
517  std::cerr << std::endl;
518  }
519  std::cerr << std::endl << std::endl << "And here are the distances to the diagonal : " << std::endl;
520  for (size_t i = 0; i != distances_from_diagonal.size(); ++i) {
521  std::cerr << distances_from_diagonal[i] << " ";
522  }
523  std::cerr << std::endl << std::endl;
524  getchar();
525  }
526 
527  for (size_t i = 0; i != this->intervals.size(); ++i) {
528  std::vector<double> distancesFromI = distances[i];
529  distancesFromI.push_back(distances_from_diagonal[i]);
530 
531  // sort it:
532  std::sort(distancesFromI.begin(), distancesFromI.end(), std::greater<double>());
533 
534  if (k > distancesFromI.size()) {
535  if (dbg) {
536  std::cerr << "There are not enough neighbors in your set. We set the result to plus infty \n";
537  }
538  result.push_back(std::numeric_limits<double>::max());
539  } else {
540  if (distances_from_diagonal[i] > distancesFromI[k]) {
541  if (dbg) {
542  std::cerr << "The k-th n.n. is on a diagonal. Therefore we set up a distance to diagonal \n";
543  }
544  result.push_back(distances_from_diagonal[i]);
545  } else {
546  result.push_back(distancesFromI[k]);
547  }
548  }
549  }
550  std::sort(result.begin(), result.end(), std::greater<double>());
551  result.resize(std::min(result.size(), where_to_cut));
552 
553  return result;
554 }
555 
556 double Persistence_intervals::project_to_R(int number_of_function) const {
557  double result = 0;
558 
559  for (size_t i = 0; i != this->intervals.size(); ++i) {
560  result +=
561  (this->intervals[i].second - this->intervals[i].first) * (this->intervals[i].second - this->intervals[i].first);
562  }
563 
564  return result;
565 }
566 
567 } // namespace Persistence_representations
568 } // namespace Gudhi
569 
570 #endif // PERSISTENCE_INTERVALS_H_
std::vector< std::pair< double, size_t > > compute_persistent_betti_numbers() const
Definition: Persistence_intervals.h:436
size_t size() const
Definition: Persistence_intervals.h:205
std::vector< double > characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:369
std::vector< double > k_n_n(size_t k, size_t where_to_cut=10) const
Definition: Persistence_intervals.h:462
std::vector< double > vectorize(int number_of_function) const
Definition: Persistence_intervals.h:236
std::vector< std::pair< double, double > > dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:291
Definition: SimplicialComplexForAlpha.h:26
std::vector< double > length_of_dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:276
std::vector< size_t > histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:313
void plot(const char *filename, double min_x=std::numeric_limits< double >::max(), double max_x=std::numeric_limits< double >::max(), double min_y=std::numeric_limits< double >::max(), double max_y=std::numeric_limits< double >::max()) const
Definition: Persistence_intervals.h:164
std::pair< double, double > get_x_range() const
Definition: Persistence_intervals.h:69
Persistence_intervals(const char *filename, unsigned dimension=std::numeric_limits< unsigned >::max())
Definition: Persistence_intervals.h:262
Definition: Persistence_intervals.h:47
size_t number_of_vectorize_functions() const
Definition: Persistence_intervals.h:243
friend std::ostream & operator<<(std::ostream &out, const Persistence_intervals &intervals)
Definition: Persistence_intervals.h:154
std::vector< double > cumulative_characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:419
std::pair< double, double > operator[](size_t i) const
Definition: Persistence_intervals.h:211
std::vector< size_t > cumulative_histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:357
double project_to_R(int number_of_function) const
Definition: Persistence_intervals.h:556
size_t number_of_projections_to_R() const
Definition: Persistence_intervals.h:230
std::pair< double, double > get_y_range() const
Definition: Persistence_intervals.h:82
GUDHI  Version 2.2.0  - C++ library for Topological Data Analysis (TDA) and Higher Dimensional Geometry Understanding.  - Copyright : GPL v3 Generated on Thu Jun 14 2018 15:00:54 for GUDHI by Doxygen 1.8.13