doc/latest/_persistence__intervals_8h_source.html

/*    This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.

 *    See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.

 *    Author(s):       Pawel Dlotko

 *

 *    Copyright (C) 2016 Inria

 *

 *    Modification(s):

 *      - YYYY/MM Author: Description of the modification

 *      - 2019/12 Vincent Rouvreau: Fix #118 - Make histogram_of_lengths and cumulative_histogram_of_lengths

 *          return the exact number_of_bins (was failing on x86)

 */


#ifndef PERSISTENCE_INTERVALS_H_

#define PERSISTENCE_INTERVALS_H_


// gudhi include

#include <gudhi/read_persistence_from_file.h>


// standard include

#include <limits>

#include <iostream>

#include <fstream>

#include <vector>

#include <algorithm>

#include <cmath>

#include <functional>

#include <utility>

#include <string>


namespace Gudhi {

namespace Persistence_representations {


class Persistence_intervals {

 public:

  Persistence_intervals(const char* filename, unsigned dimension = std::numeric_limits<unsigned>::max());


  Persistence_intervals(const std::vector<std::pair<double, double> >& intervals);


  std::pair<double, double> get_x_range() const {

    double min_ = std::numeric_limits<int>::max();

    double max_ = -std::numeric_limits<int>::max();

    for (size_t i = 0; i != this->intervals.size(); ++i) {

      if (this->intervals[i].first < min_) min_ = this->intervals[i].first;

      if (this->intervals[i].second > max_) max_ = this->intervals[i].second;

    }

    return std::make_pair(min_, max_);

  }


  std::pair<double, double> get_y_range() const {

    double min_ = std::numeric_limits<int>::max();

    double max_ = -std::numeric_limits<int>::max();

    for (size_t i = 0; i != this->intervals.size(); ++i) {

      if (this->intervals[i].second < min_) min_ = this->intervals[i].second;

      if (this->intervals[i].second > max_) max_ = this->intervals[i].second;

    }

    return std::make_pair(min_, max_);

  }


  std::vector<double> length_of_dominant_intervals(size_t where_to_cut = 100) const;


  std::vector<std::pair<double, double> > dominant_intervals(size_t where_to_cut = 100) const;


  std::vector<size_t> histogram_of_lengths(size_t number_of_bins = 10) const;


  std::vector<size_t> cumulative_histogram_of_lengths(size_t number_of_bins = 10) const;


  std::vector<double> characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins = 10) const;


  std::vector<double> cumulative_characteristic_function_of_diagram(double x_min, double x_max,

                                                                    size_t number_of_bins = 10) const;


  std::vector<std::pair<double, size_t> > compute_persistent_betti_numbers() const;


  std::vector<double> k_n_n(size_t k, size_t where_to_cut = 10) const;


  friend std::ostream& operator<<(std::ostream& out, const Persistence_intervals& intervals) {

    for (size_t i = 0; i != intervals.intervals.size(); ++i) {

      out << intervals.intervals[i].first << " " << intervals.intervals[i].second << std::endl;

    }

    return out;

  }


  void plot(const char* filename, double min_x = std::numeric_limits<double>::max(),

            double max_x = std::numeric_limits<double>::max(), double min_y = std::numeric_limits<double>::max(),

            double max_y = std::numeric_limits<double>::max()) const {

    // this program create a gnuplot script file that allows to plot persistence diagram.

    std::ofstream out;


    std::stringstream gnuplot_script;

    gnuplot_script << filename << "_GnuplotScript";


    out.open(gnuplot_script.str().c_str());


    std::pair<double, double> min_max_values = this->get_x_range();

    if (min_x == max_x) {

      out << "set xrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "

          << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;

      out << "set yrange [" << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " : "

          << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " ]" << std::endl;

    } else {

      out << "set xrange [" << min_x << " : " << max_x << " ]" << std::endl;

      out << "set yrange [" << min_y << " : " << max_y << " ]" << std::endl;

    }

    out << "plot '-' using 1:2 notitle \"" << filename << "\", \\" << std::endl;

    out << "     '-' using 1:2 notitle with lp" << std::endl;

    for (size_t i = 0; i != this->intervals.size(); ++i) {

      out << this->intervals[i].first << " " << this->intervals[i].second << std::endl;

    }

    out << "EOF" << std::endl;

    out << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << " "

        << min_max_values.first - 0.1 * (min_max_values.second - min_max_values.first) << std::endl;

    out << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << " "

        << min_max_values.second + 0.1 * (min_max_values.second - min_max_values.first) << std::endl;


    out.close();


    std::clog << "To visualize, install gnuplot and type the command: gnuplot -persist -e \"load \'"

              << gnuplot_script.str().c_str() << "\'\"" << std::endl;

  }


  size_t size() const { return this->intervals.size(); }


  inline std::pair<double, double> operator[](size_t i) const {

    if (i >= this->intervals.size()) throw("Index out of range! Operator [], one_d_gaussians class\n");

    return this->intervals[i];

  }


  // Implementations of functions for various concepts.

  double project_to_R(int number_of_function) const;

  size_t number_of_projections_to_R() const { return this->number_of_functions_for_projections_to_reals; }


  std::vector<double> vectorize(int number_of_function) const {

    return this->length_of_dominant_intervals(number_of_function);

  }

  size_t number_of_vectorize_functions() const { return this->number_of_functions_for_vectorization; }


  // end of implementation of functions needed for concepts.


  // For visualization use output from vectorize and build histograms.

  std::vector<std::pair<double, double> > output_for_visualization() { return this->intervals; }


 protected:

  void set_up_numbers_of_functions_for_vectorization_and_projections_to_reals() {

    // warning, this function can be only called after filling in the intervals vector.

    this->number_of_functions_for_vectorization = this->intervals.size();

    this->number_of_functions_for_projections_to_reals = 1;

  }


  std::vector<std::pair<double, double> > intervals;

  size_t number_of_functions_for_vectorization;

  size_t number_of_functions_for_projections_to_reals;

};


Persistence_intervals::Persistence_intervals(const char* filename, unsigned dimension) {

  if (dimension == std::numeric_limits<unsigned>::max()) {

    this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename);

  } else {

    this->intervals = read_persistence_intervals_in_one_dimension_from_file(filename, dimension);

  }

  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();

}  // Persistence_intervals


Persistence_intervals::Persistence_intervals(const std::vector<std::pair<double, double> >& intervals_)

    : intervals(intervals_) {

  this->set_up_numbers_of_functions_for_vectorization_and_projections_to_reals();

}


std::vector<double> Persistence_intervals::length_of_dominant_intervals(size_t where_to_cut) const {

  std::vector<double> result(this->intervals.size());

  for (size_t i = 0; i != this->intervals.size(); ++i) {

    result[i] = this->intervals[i].second - this->intervals[i].first;

  }

  std::sort(result.begin(), result.end(), std::greater<double>());


  result.resize(std::min(where_to_cut, result.size()));

  return result;

}  // length_of_dominant_intervals


bool compare(const std::pair<size_t, double>& first, const std::pair<size_t, double>& second) {

  return first.second > second.second;

}


std::vector<std::pair<double, double> > Persistence_intervals::dominant_intervals(size_t where_to_cut) const {

  bool dbg = false;

  std::vector<std::pair<size_t, double> > position_length_vector(this->intervals.size());

  for (size_t i = 0; i != this->intervals.size(); ++i) {

    position_length_vector[i] = std::make_pair(i, this->intervals[i].second - this->intervals[i].first);

  }


  std::sort(position_length_vector.begin(), position_length_vector.end(), compare);


  std::vector<std::pair<double, double> > result;

  result.reserve(std::min(where_to_cut, position_length_vector.size()));


  for (size_t i = 0; i != std::min(where_to_cut, position_length_vector.size()); ++i) {

    result.push_back(this->intervals[position_length_vector[i].first]);

    if (dbg)

      std::clog << "Position : " << position_length_vector[i].first << " length : " << position_length_vector[i].second

                << std::endl;

  }


  return result;

}  // dominant_intervals


std::vector<size_t> Persistence_intervals::histogram_of_lengths(size_t number_of_bins) const {

  bool dbg = false;


  if (dbg) std::clog << "this->intervals.size() : " << this->intervals.size() << std::endl;

  // first find the length of the longest interval:

  double lengthOfLongest = 0;

  for (size_t i = 0; i != this->intervals.size(); ++i) {

    if ((this->intervals[i].second - this->intervals[i].first) > lengthOfLongest) {

      lengthOfLongest = this->intervals[i].second - this->intervals[i].first;

    }

  }


  if (dbg) {

    std::clog << "lengthOfLongest : " << lengthOfLongest << std::endl;

  }


  // this is a container we will use to store the resulting histogram

  std::vector<size_t> result(number_of_bins + 1, 0);


  // for every persistence interval in our collection.

  for (size_t i = 0; i != this->intervals.size(); ++i) {

    // compute its length relative to the length of the dominant interval:

    double relative_length_of_this_interval = (this->intervals[i].second - this->intervals[i].first) / lengthOfLongest;


    // given the relative length (between 0 and 1) compute to which bin should it contribute.

    size_t position = (size_t)(relative_length_of_this_interval * number_of_bins);


    ++result[position];


    if (dbg) {

      std::clog << "i : " << i << std::endl;

      std::clog << "Interval : [" << this->intervals[i].first << " , " << this->intervals[i].second << " ] \n";

      std::clog << "relative_length_of_this_interval : " << relative_length_of_this_interval << std::endl;

      std::clog << "position : " << position << std::endl;

      getchar();

    }

  }

  // we want number of bins equals to number_of_bins (some unexpected results on x86)

  result[number_of_bins-1]+=result[number_of_bins];

  result.resize(number_of_bins);


  if (dbg) {

    for (size_t i = 0; i != result.size(); ++i) std::clog << result[i] << std::endl;

  }

  return result;

}


std::vector<size_t> Persistence_intervals::cumulative_histogram_of_lengths(size_t number_of_bins) const {

  std::vector<size_t> histogram = this->histogram_of_lengths(number_of_bins);

  std::vector<size_t> result(histogram.size());


  size_t sum = 0;

  for (size_t i = 0; i != histogram.size(); ++i) {

    sum += histogram[i];

    result[i] = sum;

  }

  return result;

}


std::vector<double> Persistence_intervals::characteristic_function_of_diagram(double x_min, double x_max,

                                                                              size_t number_of_bins) const {

  bool dbg = false;


  std::vector<double> result(number_of_bins);

  std::fill(result.begin(), result.end(), 0);


  for (size_t i = 0; i != this->intervals.size(); ++i) {

    if (dbg) {

      std::clog << "Interval : " << this->intervals[i].first << " , " << this->intervals[i].second << std::endl;

    }


    size_t beginIt = 0;

    if (this->intervals[i].first < x_min) beginIt = 0;

    if (this->intervals[i].first >= x_max) beginIt = result.size();

    if ((this->intervals[i].first > x_min) && (this->intervals[i].first < x_max)) {

      beginIt = number_of_bins * (this->intervals[i].first - x_min) / (x_max - x_min);

    }


    size_t endIt = 0;

    if (this->intervals[i].second < x_min) endIt = 0;

    if (this->intervals[i].second >= x_max) endIt = result.size();

    if ((this->intervals[i].second > x_min) && (this->intervals[i].second < x_max)) {

      endIt = number_of_bins * (this->intervals[i].second - x_min) / (x_max - x_min);

    }


    if (beginIt > endIt) {

      beginIt = endIt;

    }


    if (dbg) {

      std::clog << "beginIt : " << beginIt << std::endl;

      std::clog << "endIt : " << endIt << std::endl;

    }


    for (size_t pos = beginIt; pos != endIt; ++pos) {

      result[pos] += ((x_max - x_min) / static_cast<double>(number_of_bins)) *

                     (this->intervals[i].second - this->intervals[i].first);

    }

    if (dbg) {

      std::clog << "Result at this stage \n";

      for (size_t aa = 0; aa != result.size(); ++aa) {

        std::clog << result[aa] << " ";

      }

      std::clog << std::endl;

    }

  }

  return result;

}  // characteristic_function_of_diagram


std::vector<double> Persistence_intervals::cumulative_characteristic_function_of_diagram(double x_min, double x_max,

                                                                                         size_t number_of_bins) const {

  std::vector<double> intsOfBars = this->characteristic_function_of_diagram(x_min, x_max, number_of_bins);

  std::vector<double> result(intsOfBars.size());

  double sum = 0;

  for (size_t i = 0; i != intsOfBars.size(); ++i) {

    sum += intsOfBars[i];

    result[i] = sum;

  }

  return result;

}  // cumulative_characteristic_function_of_diagram


template <typename T>

bool compare_first_element_of_pair(const std::pair<T, bool>& f, const std::pair<T, bool>& s) {

  return (f.first < s.first);

}


std::vector<std::pair<double, size_t> > Persistence_intervals::compute_persistent_betti_numbers() const {

  std::vector<std::pair<double, bool> > places_where_pbs_change(2 * this->intervals.size());


  for (size_t i = 0; i != this->intervals.size(); ++i) {

    places_where_pbs_change[2 * i] = std::make_pair(this->intervals[i].first, true);

    places_where_pbs_change[2 * i + 1] = std::make_pair(this->intervals[i].second, false);

  }


  std::sort(places_where_pbs_change.begin(), places_where_pbs_change.end(), compare_first_element_of_pair<double>);

  size_t pbn = 0;

  std::vector<std::pair<double, size_t> > pbns(places_where_pbs_change.size());

  for (size_t i = 0; i != places_where_pbs_change.size(); ++i) {

    if (places_where_pbs_change[i].second == true) {

      ++pbn;

    } else {

      --pbn;

    }

    pbns[i] = std::make_pair(places_where_pbs_change[i].first, pbn);

  }

  return pbns;

}


inline double compute_euclidean_distance(const std::pair<double, double>& f, const std::pair<double, double>& s) {

  return sqrt((f.first - s.first) * (f.first - s.first) + (f.second - s.second) * (f.second - s.second));

}


std::vector<double> Persistence_intervals::k_n_n(size_t k, size_t where_to_cut) const {

  bool dbg = false;

  if (dbg) {

    std::clog << "Here are the intervals : \n";

    for (size_t i = 0; i != this->intervals.size(); ++i) {

      std::clog << "[ " << this->intervals[i].first << " , " << this->intervals[i].second << "] \n";

    }

    getchar();

  }


  std::vector<double> result;

  // compute all to all distance between point in the diagram. Also, consider points in the diagonal with the infinite

  // multiplicity.

  std::vector<std::vector<double> > distances(this->intervals.size());

  for (size_t i = 0; i != this->intervals.size(); ++i) {

    std::vector<double> aa(this->intervals.size());

    std::fill(aa.begin(), aa.end(), 0);

    distances[i] = aa;

  }

  std::vector<double> distances_from_diagonal(this->intervals.size());

  std::fill(distances_from_diagonal.begin(), distances_from_diagonal.end(), 0);


  for (size_t i = 0; i != this->intervals.size(); ++i) {

    std::vector<double> distancesFromI;

    for (size_t j = i + 1; j != this->intervals.size(); ++j) {

      distancesFromI.push_back(compute_euclidean_distance(this->intervals[i], this->intervals[j]));

    }

    // also add a distance from this guy to diagonal:

    double distanceToDiagonal = compute_euclidean_distance(

        this->intervals[i], std::make_pair(0.5 * (this->intervals[i].first + this->intervals[i].second),

                                           0.5 * (this->intervals[i].first + this->intervals[i].second)));

    distances_from_diagonal[i] = distanceToDiagonal;


    if (dbg) {

      std::clog << "Here are the distances form the point : [" << this->intervals[i].first << " , "

                << this->intervals[i].second << "] in the diagram \n";

      for (size_t aa = 0; aa != distancesFromI.size(); ++aa) {

        std::clog << "To : " << i + aa << " : " << distancesFromI[aa] << " ";

      }

      std::clog << std::endl;

      getchar();

    }


    // filling in the distances matrix:

    for (size_t j = i + 1; j != this->intervals.size(); ++j) {

      distances[i][j] = distancesFromI[j - i - 1];

      distances[j][i] = distancesFromI[j - i - 1];

    }

  }

  if (dbg) {

    std::clog << "Here is the distance matrix : \n";

    for (size_t i = 0; i != distances.size(); ++i) {

      for (size_t j = 0; j != distances.size(); ++j) {

        std::clog << distances[i][j] << " ";

      }

      std::clog << std::endl;

    }

    std::clog << std::endl << std::endl << "And here are the distances to the diagonal : " << std::endl;

    for (size_t i = 0; i != distances_from_diagonal.size(); ++i) {

      std::clog << distances_from_diagonal[i] << " ";

    }

    std::clog << std::endl << std::endl;

    getchar();

  }


  for (size_t i = 0; i != this->intervals.size(); ++i) {

    std::vector<double> distancesFromI = distances[i];

    distancesFromI.push_back(distances_from_diagonal[i]);


    // sort it:

    std::sort(distancesFromI.begin(), distancesFromI.end(), std::greater<double>());


    if (k > distancesFromI.size()) {

      if (dbg) {

        std::clog << "There are not enough neighbors in your set. We set the result to plus infty \n";

      }

      result.push_back(std::numeric_limits<double>::max());

    } else {

      if (distances_from_diagonal[i] > distancesFromI[k]) {

        if (dbg) {

          std::clog << "The k-th n.n. is on a diagonal. Therefore we set up a distance to diagonal \n";

        }

        result.push_back(distances_from_diagonal[i]);

      } else {

        result.push_back(distancesFromI[k]);

      }

    }

  }

  std::sort(result.begin(), result.end(), std::greater<double>());

  result.resize(std::min(result.size(), where_to_cut));


  return result;

}


double Persistence_intervals::project_to_R(int number_of_function) const {

  double result = 0;


  for (size_t i = 0; i != this->intervals.size(); ++i) {

    result +=

        (this->intervals[i].second - this->intervals[i].first) * (this->intervals[i].second - this->intervals[i].first);

  }


  return result;

}


}  // namespace Persistence_representations

}  // namespace Gudhi


#endif  // PERSISTENCE_INTERVALS_H_

Gudhi::Persistence_representations::Persistence_intervals
Definition: Persistence_intervals.h:37

Gudhi::Persistence_representations::Persistence_intervals::compute_persistent_betti_numbers
std::vector< std::pair< double, size_t > > compute_persistent_betti_numbers() const
Definition: Persistence_intervals.h:429

Gudhi::Persistence_representations::Persistence_intervals::vectorize
std::vector< double > vectorize(int number_of_function) const
Definition: Persistence_intervals.h:226

Gudhi::Persistence_representations::Persistence_intervals::project_to_R
double project_to_R(int number_of_function) const
Definition: Persistence_intervals.h:549

Gudhi::Persistence_representations::Persistence_intervals::get_x_range
std::pair< double, double > get_x_range() const
Definition: Persistence_intervals.h:59

Gudhi::Persistence_representations::Persistence_intervals::cumulative_characteristic_function_of_diagram
std::vector< double > cumulative_characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:412

Gudhi::Persistence_representations::Persistence_intervals::number_of_projections_to_R
size_t number_of_projections_to_R() const
Definition: Persistence_intervals.h:220

Gudhi::Persistence_representations::Persistence_intervals::characteristic_function_of_diagram
std::vector< double > characteristic_function_of_diagram(double x_min, double x_max, size_t number_of_bins=10) const
Definition: Persistence_intervals.h:362

Gudhi::Persistence_representations::Persistence_intervals::dominant_intervals
std::vector< std::pair< double, double > > dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:281

Gudhi::Persistence_representations::Persistence_intervals::operator<<
friend std::ostream & operator<<(std::ostream &out, const Persistence_intervals &intervals)
Definition: Persistence_intervals.h:144

Gudhi::Persistence_representations::Persistence_intervals::cumulative_histogram_of_lengths
std::vector< size_t > cumulative_histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:350

Gudhi::Persistence_representations::Persistence_intervals::plot
void plot(const char *filename, double min_x=std::numeric_limits< double >::max(), double max_x=std::numeric_limits< double >::max(), double min_y=std::numeric_limits< double >::max(), double max_y=std::numeric_limits< double >::max()) const
Definition: Persistence_intervals.h:154

Gudhi::Persistence_representations::Persistence_intervals::length_of_dominant_intervals
std::vector< double > length_of_dominant_intervals(size_t where_to_cut=100) const
Definition: Persistence_intervals.h:266

Gudhi::Persistence_representations::Persistence_intervals::histogram_of_lengths
std::vector< size_t > histogram_of_lengths(size_t number_of_bins=10) const
Definition: Persistence_intervals.h:303

Gudhi::Persistence_representations::Persistence_intervals::number_of_vectorize_functions
size_t number_of_vectorize_functions() const
Definition: Persistence_intervals.h:233

Gudhi::Persistence_representations::Persistence_intervals::operator[]
std::pair< double, double > operator[](size_t i) const
Definition: Persistence_intervals.h:201

Gudhi::Persistence_representations::Persistence_intervals::size
size_t size() const
Definition: Persistence_intervals.h:195

Gudhi::Persistence_representations::Persistence_intervals::k_n_n
std::vector< double > k_n_n(size_t k, size_t where_to_cut=10) const
Definition: Persistence_intervals.h:455

Gudhi::Persistence_representations::Persistence_intervals::get_y_range
std::pair< double, double > get_y_range() const
Definition: Persistence_intervals.h:72

Gudhi::Persistence_representations::Persistence_intervals::Persistence_intervals
Persistence_intervals(const char *filename, unsigned dimension=std::numeric_limits< unsigned >::max())
Definition: Persistence_intervals.h:252

Gudhi
Gudhi namespace.
Definition: SimplicialComplexForAlpha.h:14