Persistence_vectors.h
1 /* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2  * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3  * Author(s): Pawel Dlotko
4  *
5  * Copyright (C) 2016 Inria
6  *
7  * Modification(s):
8  * - YYYY/MM Author: Description of the modification
9  */
10 
11 #ifndef PERSISTENCE_VECTORS_H_
12 #define PERSISTENCE_VECTORS_H_
13 
14 // gudhi include
15 #include <gudhi/read_persistence_from_file.h>
16 #include <gudhi/common_persistence_representations.h>
18 
19 #include <fstream>
20 #include <cmath>
21 #include <algorithm>
22 #include <iostream>
23 #include <limits>
24 #include <functional>
25 #include <utility>
26 #include <vector>
27 
28 namespace Gudhi {
29 namespace Persistence_representations {
30 
31 template <typename T>
32 struct Maximum_distance {
33  double operator()(const std::pair<T, T>& f, const std::pair<T, T>& s) {
34  return std::max(fabs(f.first - s.first), fabs(f.second - s.second));
35  }
36 };
37 
53 template <typename F>
55  public:
60 
65  Vector_distances_in_diagram(const std::vector<std::pair<double, double> >& intervals, size_t where_to_cut);
66 
71  Vector_distances_in_diagram(const char* filename, size_t where_to_cut,
72  unsigned dimension = std::numeric_limits<unsigned>::max());
73 
77  template <typename K>
78  friend std::ostream& operator<<(std::ostream& out, const Vector_distances_in_diagram<K>& d) {
79  for (size_t i = 0; i != std::min(d.sorted_vector_of_distances.size(), d.where_to_cut); ++i) {
80  out << d.sorted_vector_of_distances[i] << " ";
81  }
82  return out;
83  }
84 
88  inline double vector_in_position(size_t position) const {
89  if (position >= this->sorted_vector_of_distances.size())
90  throw("Wrong position in accessing Vector_distances_in_diagram::sorted_vector_of_distances\n");
91  return this->sorted_vector_of_distances[position];
92  }
93 
97  inline size_t size() const { return this->sorted_vector_of_distances.size(); }
98 
102  void write_to_file(const char* filename) const;
103 
107  void print_to_file(const char* filename) const { this->write_to_file(filename); }
108 
112  void load_from_file(const char* filename);
113 
117  bool operator==(const Vector_distances_in_diagram& second) const {
118  if (this->sorted_vector_of_distances.size() != second.sorted_vector_of_distances.size()) return false;
119  for (size_t i = 0; i != this->sorted_vector_of_distances.size(); ++i) {
120  if (!almost_equal(this->sorted_vector_of_distances[i], second.sorted_vector_of_distances[i])) return false;
121  }
122  return true;
123  }
124 
125  bool operator!=(const Vector_distances_in_diagram& second) const { return !(*this == second); }
126 
127  // Implementations of functions for various concepts.
135  double project_to_R(int number_of_function) const;
140  size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; }
141 
145  std::vector<double> vectorize(int number_of_function) const;
150  size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; }
151 
155  void compute_average(const std::vector<Vector_distances_in_diagram*>& to_average);
156 
161  double distance(const Vector_distances_in_diagram& second, double power = 1) const;
162 
167  double compute_scalar_product(const Vector_distances_in_diagram& second) const;
168  // end of implementation of functions needed for concepts.
169 
173  std::vector<double> output_for_visualization() const { return this->sorted_vector_of_distances; }
174 
178  void plot(const char* filename) const {
179  std::stringstream gnuplot_script;
180  gnuplot_script << filename << "_GnuplotScript";
181  std::ofstream out;
182  out.open(gnuplot_script.str().c_str());
183  out << "set style data histogram" << std::endl;
184  out << "set style histogram cluster gap 1" << std::endl;
185  out << "set style fill solid border -1" << std::endl;
186  out << "plot '-' notitle" << std::endl;
187  for (size_t i = 0; i != this->sorted_vector_of_distances.size(); ++i) {
188  out << this->sorted_vector_of_distances[i] << std::endl;
189  }
190  out << std::endl;
191  out.close();
192  std::clog << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'"
193  << gnuplot_script.str().c_str() << "\'\"" << std::endl;
194  }
195 
199  std::pair<double, double> get_x_range() const { return std::make_pair(0, this->sorted_vector_of_distances.size()); }
200 
204  std::pair<double, double> get_y_range() const {
205  if (this->sorted_vector_of_distances.size() == 0) return std::make_pair(0, 0);
206  return std::make_pair(this->sorted_vector_of_distances[0], 0);
207  }
208 
209  // arithmetic operations:
210  template <typename Operation_type>
211  friend Vector_distances_in_diagram operation_on_pair_of_vectors(const Vector_distances_in_diagram& first,
212  const Vector_distances_in_diagram& second,
213  Operation_type opertion) {
215  // Operation_type operation;
216  result.sorted_vector_of_distances.reserve(
217  std::max(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size()));
218  for (size_t i = 0; i != std::min(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size());
219  ++i) {
220  result.sorted_vector_of_distances.push_back(
221  opertion(first.sorted_vector_of_distances[i], second.sorted_vector_of_distances[i]));
222  }
223  if (first.sorted_vector_of_distances.size() ==
224  std::min(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size())) {
225  for (size_t i = std::min(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size());
226  i != std::max(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size()); ++i) {
227  result.sorted_vector_of_distances.push_back(opertion(0, second.sorted_vector_of_distances[i]));
228  }
229  } else {
230  for (size_t i = std::min(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size());
231  i != std::max(first.sorted_vector_of_distances.size(), second.sorted_vector_of_distances.size()); ++i) {
232  result.sorted_vector_of_distances.push_back(opertion(first.sorted_vector_of_distances[i], 0));
233  }
234  }
235  return result;
236  } // operation_on_pair_of_vectors
237 
243  result.sorted_vector_of_distances.reserve(this->sorted_vector_of_distances.size());
244  for (size_t i = 0; i != this->sorted_vector_of_distances.size(); ++i) {
245  result.sorted_vector_of_distances.push_back(scalar * this->sorted_vector_of_distances[i]);
246  }
247  return result;
248  } // multiply_by_scalar
249 
254  const Vector_distances_in_diagram& second) {
255  return operation_on_pair_of_vectors(first, second, std::plus<double>());
256  }
261  const Vector_distances_in_diagram& second) {
262  return operation_on_pair_of_vectors(first, second, std::minus<double>());
263  }
268  return A.multiply_by_scalar(scalar);
269  }
274  return A.multiply_by_scalar(scalar);
275  }
279  Vector_distances_in_diagram operator*(double scalar) { return this->multiply_by_scalar(scalar); }
284  *this = *this + rhs;
285  return *this;
286  }
291  *this = *this - rhs;
292  return *this;
293  }
298  *this = *this * x;
299  return *this;
300  }
305  if (x == 0) throw("In operator /=, division by 0. Program terminated.");
306  *this = *this * (1 / x);
307  return *this;
308  }
309 
310  private:
311  std::vector<std::pair<double, double> > intervals;
312  std::vector<double> sorted_vector_of_distances;
313  size_t number_of_functions_for_vectorization;
314  size_t number_of_functions_for_projections_to_reals;
315  size_t where_to_cut;
316 
317  void compute_sorted_vector_of_distances_via_heap(size_t where_to_cut);
318  void compute_sorted_vector_of_distances_via_vector_sorting(size_t where_to_cut);
319 
320  Vector_distances_in_diagram(const std::vector<double>& sorted_vector_of_distances_)
321  : sorted_vector_of_distances(sorted_vector_of_distances_) {
322  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
323  }
324 
325  void set_up_numbers_of_functions_for_vectorization_and_projections_to_reals() {
326  // warning, this function can be only called after filling in the intervals vector.
327  this->number_of_functions_for_vectorization = this->sorted_vector_of_distances.size();
328  this->number_of_functions_for_projections_to_reals = this->sorted_vector_of_distances.size();
329  }
330 };
331 
332 template <typename F>
333 Vector_distances_in_diagram<F>::Vector_distances_in_diagram(const std::vector<std::pair<double, double> >& intervals_,
334  size_t where_to_cut_)
335  : where_to_cut(where_to_cut_) {
336  std::vector<std::pair<double, double> > i(intervals_);
337  this->intervals = i;
338  // this->compute_sorted_vector_of_distances_via_heap( where_to_cut );
339  this->compute_sorted_vector_of_distances_via_vector_sorting(where_to_cut);
340  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
341 }
342 
343 template <typename F>
344 Vector_distances_in_diagram<F>::Vector_distances_in_diagram(const char* filename, size_t where_to_cut,
345  unsigned dimension)
346  : where_to_cut(where_to_cut) {
347  std::vector<std::pair<double, double> > intervals;
348  if (dimension == std::numeric_limits<unsigned>::max()) {
349  intervals = read_persistence_intervals_in_one_dimension_from_file(filename);
350  } else {
351  intervals = read_persistence_intervals_in_one_dimension_from_file(filename, dimension);
352  }
353  this->intervals = intervals;
354  this->compute_sorted_vector_of_distances_via_heap(where_to_cut);
355  // this->compute_sorted_vector_of_distances_via_vector_sorting( where_to_cut );
356  set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();
357 }
358 
359 template <typename F>
361  bool dbg = false;
362  if (dbg) {
363  std::clog << "Here are the intervals : \n";
364  for (size_t i = 0; i != this->intervals.size(); ++i) {
365  std::clog << this->intervals[i].first << " , " << this->intervals[i].second << std::endl;
366  }
367  }
368  where_to_cut = std::min(
369  where_to_cut, (size_t)(0.5 * this->intervals.size() * (this->intervals.size() - 1) + this->intervals.size()));
370 
371  std::vector<double> heap(where_to_cut, std::numeric_limits<int>::max());
372  std::make_heap(heap.begin(), heap.end());
373  F f;
374 
375  // for every pair of points in the diagram, compute the minimum of their distance, and distance of those points from
376  // diagonal
377  for (size_t i = 0; i < this->intervals.size(); ++i) {
378  for (size_t j = i + 1; j < this->intervals.size(); ++j) {
379  double value = std::min(
380  f(this->intervals[i], this->intervals[j]),
381  std::min(
382  f(this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
383  0.5 * (this->intervals[i].first + this->intervals[i].second))),
384  f(this->intervals[j], std::make_pair(0.5 * (this->intervals[j].first + this->intervals[j].second),
385  0.5 * (this->intervals[j].first + this->intervals[j].second)))));
386 
387  if (dbg) {
388  std::clog << "Value : " << value << std::endl;
389  std::clog << "heap.front() : " << heap.front() << std::endl;
390  getchar();
391  }
392 
393  if (-value < heap.front()) {
394  if (dbg) {
395  std::clog << "Replacing : " << heap.front() << " with : " << -value << std::endl;
396  getchar();
397  }
398  // remove the first element from the heap
399  std::pop_heap(heap.begin(), heap.end());
400  // heap.pop_back();
401  // and put value there instead:
402  // heap.push_back(-value);
403  heap[where_to_cut - 1] = -value;
404  std::push_heap(heap.begin(), heap.end());
405  }
406  }
407  }
408 
409  // now add distances of all points from diagonal
410  for (size_t i = 0; i < this->intervals.size(); ++i) {
411  double value = f(this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
412  0.5 * (this->intervals[i].first + this->intervals[i].second)));
413  if (-value < heap.front()) {
414  // remove the first element from the heap
415  std::pop_heap(heap.begin(), heap.end());
416  // heap.pop_back();
417  // and put value there instead:
418  // heap.push_back(-value);
419  heap[where_to_cut - 1] = -value;
420  std::push_heap(heap.begin(), heap.end());
421  }
422  }
423 
424  std::sort_heap(heap.begin(), heap.end());
425  for (size_t i = 0; i != heap.size(); ++i) {
426  if (heap[i] == std::numeric_limits<int>::max()) {
427  heap[i] = 0;
428  } else {
429  heap[i] *= -1;
430  }
431  }
432 
433  if (dbg) {
434  std::clog << "This is the heap after all the operations :\n";
435  for (size_t i = 0; i != heap.size(); ++i) {
436  std::clog << heap[i] << " ";
437  }
438  std::clog << std::endl;
439  }
440 
441  this->sorted_vector_of_distances = heap;
442 }
443 
444 template <typename F>
446  std::vector<double> distances;
447  distances.reserve((size_t)(0.5 * this->intervals.size() * (this->intervals.size() - 1) + this->intervals.size()));
448  F f;
449 
450  // for every pair of points in the diagram, compute the minimum of their distance, and distance of those points from
451  // diagonal
452  for (size_t i = 0; i < this->intervals.size(); ++i) {
453  // add distance of i-th point in the diagram from the diagonal to the distances vector
454  distances.push_back(
455  f(this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
456  0.5 * (this->intervals[i].first + this->intervals[i].second))));
457  for (size_t j = i + 1; j < this->intervals.size(); ++j) {
458  double value = std::min(
459  f(this->intervals[i], this->intervals[j]),
460  std::min(
461  f(this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),
462  0.5 * (this->intervals[i].first + this->intervals[i].second))),
463  f(this->intervals[j], std::make_pair(0.5 * (this->intervals[j].first + this->intervals[j].second),
464  0.5 * (this->intervals[j].first + this->intervals[j].second)))));
465  distances.push_back(value);
466  }
467  }
468  std::sort(distances.begin(), distances.end(), std::greater<double>());
469  if (distances.size() > where_to_cut) distances.resize(where_to_cut);
470 
471  this->sorted_vector_of_distances = distances;
472 }
473 
474 // Implementations of functions for various concepts.
475 template <typename F>
476 double Vector_distances_in_diagram<F>::project_to_R(int number_of_function) const {
477  if ((size_t)number_of_function > this->number_of_functions_for_projections_to_reals)
478  throw "Wrong index of a function in a method Vector_distances_in_diagram<F>::project_to_R";
479  if (number_of_function < 0)
480  throw "Wrong index of a function in a method Vector_distances_in_diagram<F>::project_to_R";
481 
482  double result = 0;
483  for (size_t i = 0; i != (size_t)number_of_function; ++i) {
484  result += sorted_vector_of_distances[i];
485  }
486  return result;
487 }
488 
489 template <typename F>
490 void Vector_distances_in_diagram<F>::compute_average(const std::vector<Vector_distances_in_diagram*>& to_average) {
491  if (to_average.size() == 0) {
492  (*this) = Vector_distances_in_diagram<F>();
493  return;
494  }
495 
496  size_t maximal_length_of_vector = 0;
497  for (size_t i = 0; i != to_average.size(); ++i) {
498  if (to_average[i]->sorted_vector_of_distances.size() > maximal_length_of_vector) {
499  maximal_length_of_vector = to_average[i]->sorted_vector_of_distances.size();
500  }
501  }
502 
503  std::vector<double> av(maximal_length_of_vector, 0);
504  for (size_t i = 0; i != to_average.size(); ++i) {
505  for (size_t j = 0; j != to_average[i]->sorted_vector_of_distances.size(); ++j) {
506  av[j] += to_average[i]->sorted_vector_of_distances[j];
507  }
508  }
509 
510  for (size_t i = 0; i != maximal_length_of_vector; ++i) {
511  av[i] /= static_cast<double>(to_average.size());
512  }
513  this->sorted_vector_of_distances = av;
514  this->where_to_cut = av.size();
515 }
516 
517 template <typename F>
518 double Vector_distances_in_diagram<F>::distance(const Vector_distances_in_diagram& second_, double power) const {
519  bool dbg = false;
520 
521  if (dbg) {
522  std::clog << "Entering double Vector_distances_in_diagram<F>::distance( const Abs_Topological_data_with_distances* "
523  "second , double power ) procedure \n";
524  std::clog << "Power : " << power << std::endl;
525  std::clog << "This : " << *this << std::endl;
526  std::clog << "second : " << second_ << std::endl;
527  }
528 
529  double result = 0;
530  for (size_t i = 0; i != std::min(this->sorted_vector_of_distances.size(), second_.sorted_vector_of_distances.size());
531  ++i) {
532  if (power == 1) {
533  if (dbg) {
534  std::clog << "|" << this->sorted_vector_of_distances[i] << " - " << second_.sorted_vector_of_distances[i]
535  << " | : " << fabs(this->sorted_vector_of_distances[i] - second_.sorted_vector_of_distances[i])
536  << std::endl;
537  }
538  result += fabs(this->sorted_vector_of_distances[i] - second_.sorted_vector_of_distances[i]);
539  } else {
540  if (power < std::numeric_limits<double>::max()) {
541  result += std::pow(fabs(this->sorted_vector_of_distances[i] - second_.sorted_vector_of_distances[i]), power);
542  } else {
543  // max norm
544  if (result < fabs(this->sorted_vector_of_distances[i] - second_.sorted_vector_of_distances[i]))
545  result = fabs(this->sorted_vector_of_distances[i] - second_.sorted_vector_of_distances[i]);
546  }
547  if (dbg) {
548  std::clog << "| " << this->sorted_vector_of_distances[i] << " - " << second_.sorted_vector_of_distances[i]
549  << " : " << fabs(this->sorted_vector_of_distances[i] - second_.sorted_vector_of_distances[i])
550  << std::endl;
551  }
552  }
553  }
554  if (this->sorted_vector_of_distances.size() != second_.sorted_vector_of_distances.size()) {
555  if (this->sorted_vector_of_distances.size() > second_.sorted_vector_of_distances.size()) {
556  for (size_t i = second_.sorted_vector_of_distances.size(); i != this->sorted_vector_of_distances.size(); ++i) {
557  result += fabs(this->sorted_vector_of_distances[i]);
558  }
559  } else {
560  // this->sorted_vector_of_distances.size() < second_.sorted_vector_of_distances.size()
561  for (size_t i = this->sorted_vector_of_distances.size(); i != second_.sorted_vector_of_distances.size(); ++i) {
562  result += fabs(second_.sorted_vector_of_distances[i]);
563  }
564  }
565  }
566 
567  if (power != 1) {
568  result = std::pow(result, (1.0 / power));
569  }
570  return result;
571 }
572 
573 template <typename F>
574 std::vector<double> Vector_distances_in_diagram<F>::vectorize(int number_of_function) const {
575  if ((size_t)number_of_function > this->number_of_functions_for_vectorization)
576  throw "Wrong index of a function in a method Vector_distances_in_diagram<F>::vectorize";
577  if (number_of_function < 0) throw "Wrong index of a function in a method Vector_distances_in_diagram<F>::vectorize";
578 
579  std::vector<double> result(std::min((size_t)number_of_function, this->sorted_vector_of_distances.size()));
580  for (size_t i = 0; i != std::min((size_t)number_of_function, this->sorted_vector_of_distances.size()); ++i) {
581  result[i] = this->sorted_vector_of_distances[i];
582  }
583  return result;
584 }
585 
586 template <typename F>
587 void Vector_distances_in_diagram<F>::write_to_file(const char* filename) const {
588  std::ofstream out;
589  out.open(filename);
590 
591  for (size_t i = 0; i != this->sorted_vector_of_distances.size(); ++i) {
592  out << this->sorted_vector_of_distances[i] << " ";
593  }
594 
595  out.close();
596 }
597 
598 template <typename F>
600  std::ifstream in;
601  in.open(filename);
602  // check if the file exist.
603  if (!in.good()) {
604  std::cerr << "The file : " << filename << " do not exist. The program will now terminate \n";
605  throw "The persistence landscape file do not exist. The program will now terminate \n";
606  }
607 
608  double number;
609  while (in >> number) {
610  this->sorted_vector_of_distances.push_back(number);
611  }
612  in.close();
613 }
614 
615 template <typename F>
617  double result = 0;
618  for (size_t i = 0;
619  i != std::min(this->sorted_vector_of_distances.size(), second_vector.sorted_vector_of_distances.size()); ++i) {
620  result += this->sorted_vector_of_distances[i] * second_vector.sorted_vector_of_distances[i];
621  }
622  return result;
623 }
624 
625 } // namespace Persistence_representations
626 } // namespace Gudhi
627 
628 #endif // PERSISTENCE_VECTORS_H_
double vector_in_position(size_t position) const
Definition: Persistence_vectors.h:88
Vector_distances_in_diagram operator/=(double x)
Definition: Persistence_vectors.h:304
void load_from_file(const char *filename)
Definition: Persistence_vectors.h:599
void print_to_file(const char *filename) const
Definition: Persistence_vectors.h:107
std::vector< double > output_for_visualization() const
Definition: Persistence_vectors.h:173
std::pair< double, double > get_x_range() const
Definition: Persistence_vectors.h:199
A class implementing persistence vectors.
Definition: Persistence_vectors.h:54
Definition: SimplicialComplexForAlpha.h:14
Vector_distances_in_diagram operator*(double scalar)
Definition: Persistence_vectors.h:279
std::vector< double > vectorize(int number_of_function) const
Definition: Persistence_vectors.h:574
size_t number_of_vectorize_functions() const
Definition: Persistence_vectors.h:150
Vector_distances_in_diagram operator*=(double x)
Definition: Persistence_vectors.h:297
void write_to_file(const char *filename) const
Definition: Persistence_vectors.h:587
size_t size() const
Definition: Persistence_vectors.h:97
void plot(const char *filename) const
Definition: Persistence_vectors.h:178
size_t number_of_projections_to_R() const
Definition: Persistence_vectors.h:140
friend Vector_distances_in_diagram operator*(double scalar, const Vector_distances_in_diagram &A)
Definition: Persistence_vectors.h:267
void compute_average(const std::vector< Vector_distances_in_diagram *> &to_average)
Definition: Persistence_vectors.h:490
Global distance functions.
bool operator==(const Vector_distances_in_diagram &second) const
Definition: Persistence_vectors.h:117
Vector_distances_in_diagram operator+=(const Vector_distances_in_diagram &rhs)
Definition: Persistence_vectors.h:283
std::pair< double, double > get_y_range() const
Definition: Persistence_vectors.h:204
friend Vector_distances_in_diagram operator*(const Vector_distances_in_diagram &A, double scalar)
Definition: Persistence_vectors.h:273
Vector_distances_in_diagram()
Definition: Persistence_vectors.h:59
double compute_scalar_product(const Vector_distances_in_diagram &second) const
Definition: Persistence_vectors.h:616
double project_to_R(int number_of_function) const
Definition: Persistence_vectors.h:476
double distance(const Vector_distances_in_diagram &second, double power=1) const
Definition: Persistence_vectors.h:518
friend Vector_distances_in_diagram operator-(const Vector_distances_in_diagram &first, const Vector_distances_in_diagram &second)
Definition: Persistence_vectors.h:260
Vector_distances_in_diagram multiply_by_scalar(double scalar) const
Definition: Persistence_vectors.h:241
friend Vector_distances_in_diagram operator+(const Vector_distances_in_diagram &first, const Vector_distances_in_diagram &second)
Definition: Persistence_vectors.h:253
Vector_distances_in_diagram operator-=(const Vector_distances_in_diagram &rhs)
Definition: Persistence_vectors.h:290
GUDHI  Version 3.4.1  - C++ library for Topological Data Analysis (TDA) and Higher Dimensional Geometry Understanding.  - Copyright : MIT Generated on Fri Jan 22 2021 09:41:15 for GUDHI by Doxygen 1.8.13