Flag_complex_edge_collapser.h
1/* This file is part of the Gudhi Library - https://gudhi.inria.fr/ - which is released under MIT.
2 * See file LICENSE or go to https://gudhi.inria.fr/licensing/ for full license details.
3 * Author(s): Siddharth Pritam, Marc Glisse
4 *
5 * Copyright (C) 2020 Inria
6 *
7 * Modification(s):
8 * - 2020/03 Vincent Rouvreau: integration to the gudhi library
9 * - 2021 Marc Glisse: complete rewrite
10 * - YYYY/MM Author: Description of the modification
11 */
12
13#ifndef FLAG_COMPLEX_EDGE_COLLAPSER_H_
14#define FLAG_COMPLEX_EDGE_COLLAPSER_H_
15
16#include <gudhi/Debug_utils.h>
17
18#include <boost/container/flat_map.hpp>
19#include <boost/container/flat_set.hpp>
20
21#ifdef GUDHI_USE_TBB
22#include <tbb/parallel_sort.h>
23#endif
24
25#include <utility>
26#include <vector>
27#include <tuple>
28#include <algorithm>
29#include <limits>
30
31namespace Gudhi {
32
33namespace collapse {
34
42template<typename Vertex, typename Filtration_value>
43struct Flag_complex_edge_collapser {
44 using Filtered_edge = std::tuple<Vertex, Vertex, Filtration_value>;
45 typedef std::pair<Vertex,Vertex> Edge;
46 struct Cmpi { template<class T, class U> bool operator()(T const&a, U const&b)const{return b<a; } };
47 typedef boost::container::flat_map<Vertex, Filtration_value> Ngb_list;
48 typedef std::vector<Ngb_list> Neighbors;
49 Neighbors neighbors; // closed neighborhood
50 std::size_t num_vertices;
51 std::vector<std::tuple<Vertex, Vertex, Filtration_value>> res;
52
53#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
54 // Minimal matrix interface
55 // Using this matrix generally helps performance, but the memory use may be excessive for a very sparse graph
56 // (and in extreme cases the constant initialization of the matrix may start to dominate the running time).
57 // Are there cases where the matrix is too big but a hash table would help?
58 std::vector<Filtration_value> neighbors_data;
59 void init_neighbors_dense(){
60 neighbors_data.clear();
61 neighbors_data.resize(num_vertices*num_vertices, std::numeric_limits<Filtration_value>::infinity());
62 }
63 Filtration_value& neighbors_dense(Vertex i, Vertex j){return neighbors_data[num_vertices*j+i];}
64#endif
65
66 // This does not touch the events list, only the adjacency matrix(es)
67 void delay_neighbor(Vertex u, Vertex v, Filtration_value f) {
68 neighbors[u][v]=f;
69 neighbors[v][u]=f;
70#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
71 neighbors_dense(u,v)=f;
72 neighbors_dense(v,u)=f;
73#endif
74 }
75 void remove_neighbor(Vertex u, Vertex v) {
76 neighbors[u].erase(v);
77 neighbors[v].erase(u);
78#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
79 neighbors_dense(u,v)=std::numeric_limits<Filtration_value>::infinity();
80 neighbors_dense(v,u)=std::numeric_limits<Filtration_value>::infinity();
81#endif
82 }
83
84 template<class FilteredEdgeRange>
85 void read_edges(FilteredEdgeRange const&r){
86 neighbors.resize(num_vertices);
87#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
88 init_neighbors_dense();
89#endif
90 // Use the raw sequence to avoid maintaining the order
91 std::vector<typename Ngb_list::sequence_type> neighbors_seq(num_vertices);
92 for(auto&&e : r){
93 using std::get;
94 Vertex u = get<0>(e);
95 Vertex v = get<1>(e);
96 Filtration_value f = get<2>(e);
97 neighbors_seq[u].emplace_back(v, f);
98 neighbors_seq[v].emplace_back(u, f);
99#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
100 neighbors_dense(u,v)=f;
101 neighbors_dense(v,u)=f;
102#endif
103 }
104 for(std::size_t i=0;i<neighbors_seq.size();++i){
105 neighbors_seq[i].emplace_back(i, -std::numeric_limits<Filtration_value>::infinity());
106 neighbors[i].adopt_sequence(std::move(neighbors_seq[i])); // calls sort
107#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
108 neighbors_dense(i,i)=-std::numeric_limits<Filtration_value>::infinity();
109#endif
110 }
111 }
112
113 // Open neighborhood
114 // At some point it helped gcc to add __attribute__((noinline)) here, otherwise we had +50% on the running time
115 // on one example. It looks ok now, or I forgot which example that was.
116 void common_neighbors(boost::container::flat_set<Vertex>& e_ngb,
117 std::vector<std::pair<Filtration_value, Vertex>>& e_ngb_later,
118 Vertex u, Vertex v, Filtration_value f_event){
119 // Using neighbors_dense here seems to hurt, even if we loop on the smaller of nu and nv.
120 Ngb_list const&nu = neighbors[u];
121 Ngb_list const&nv = neighbors[v];
122 auto ui = nu.begin();
123 auto ue = nu.end();
124 auto vi = nv.begin();
125 auto ve = nv.end();
126 assert(ui != ue && vi != ve);
127 while(ui != ue && vi != ve){
128 Vertex w = ui->first;
129 if(w < vi->first) { ++ui; continue; }
130 if(w > vi->first) { ++vi; continue; }
131 // nu and nv are closed, so we need to exclude e here.
132 if(w != u && w != v) {
133 Filtration_value f = std::max(ui->second, vi->second);
134 if(f > f_event)
135 e_ngb_later.emplace_back(f, w);
136 else
137 e_ngb.insert(e_ngb.end(), w);
138 }
139 ++ui; ++vi;
140 }
141 }
142
143 // Test if the neighborhood of e is included in the closed neighborhood of c
144 template<class Ngb>
145 bool is_dominated_by(Ngb const& e_ngb, Vertex c, Filtration_value f){
146 // The best strategy probably depends on the distribution, how sparse / dense the adjacency matrix is,
147 // how (un)balanced the sizes of e_ngb and nc are.
148 // Some efficient operations on sets work best with bitsets, although the need for a map complicates things.
149#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
150 for(auto v : e_ngb) {
151 // if(v==c)continue;
152 if(neighbors_dense(v,c) > f) return false;
153 }
154 return true;
155#else
156 auto&&nc = neighbors[c];
157 // if few neighbors, use dichotomy? Seems slower.
158 // I tried storing a copy of neighbors as a vector<absl::flat_hash_map> and using it for nc, but it was
159 // a bit slower here. It did help with neighbors[dominator].find(w) in the main function though,
160 // sometimes enough, sometimes not.
161 auto ci = nc.begin();
162 auto ce = nc.end();
163 auto eni = e_ngb.begin();
164 auto ene = e_ngb.end();
165 assert(eni != ene);
166 assert(ci != ce);
167 // if(*eni == c && ++eni == ene) return true;
168 for(;;){
169 Vertex ve = *eni;
170 Vertex vc = ci->first;
171 while(ve > vc) {
172 // try a gallop strategy (exponential search)? Seems slower
173 if(++ci == ce) return false;
174 vc = ci->first;
175 }
176 if(ve < vc) return false;
177 // ve == vc
178 if(ci->second > f) return false;
179 if(++eni == ene)return true;
180 // If we stored an open neighborhood of c (excluding c), we would need to test for c here and before the loop
181 // if(*eni == c && ++eni == ene)return true;
182 if(++ci == ce) return false;
183 }
184#endif
185 }
186
187 template<class FilteredEdgeRange, class Delay>
188 void process_edges(FilteredEdgeRange const& edges, Delay&& delay) {
189 {
190 Vertex maxi = 0, maxj = 0;
191 for(auto& fe : edges) {
192 Vertex i = std::get<0>(fe);
193 Vertex j = std::get<1>(fe);
194 if (i > maxi) maxi = i;
195 if (j > maxj) maxj = j;
196 }
197 num_vertices = std::max(maxi, maxj) + 1;
198 }
199
200 read_edges(edges);
201
202 boost::container::flat_set<Vertex> e_ngb;
203 e_ngb.reserve(num_vertices);
204 std::vector<std::pair<Filtration_value, Vertex>> e_ngb_later;
205 for(auto&e:edges) {
206 {
207 Vertex u = std::get<0>(e);
208 Vertex v = std::get<1>(e);
209 Filtration_value input_time = std::get<2>(e);
210 auto time = delay(input_time);
211 auto start_time = time;
212 e_ngb.clear();
213 e_ngb_later.clear();
214 common_neighbors(e_ngb, e_ngb_later, u, v, time);
215 // If we identify a good candidate (the first common neighbor) for being a dominator of e until infinity,
216 // we could check that a bit more cheaply. It does not seem to help though.
217 auto cmp1=[](auto const&a, auto const&b){return a.first > b.first;};
218 auto e_ngb_later_begin=e_ngb_later.begin();
219 auto e_ngb_later_end=e_ngb_later.end();
220 bool heapified = false;
221
222 bool dead = false;
223 while(true) {
224 Vertex dominator = -1;
225 // special case for size 1
226 // if(e_ngb.size()==1){dominator=*e_ngb.begin();}else
227 // It is tempting to test the dominators in increasing order of filtration value, which is likely to reduce
228 // the number of calls to is_dominated_by before finding a dominator, but sorting, even partially / lazily,
229 // is very expensive.
230 for(auto c : e_ngb){
231 if(is_dominated_by(e_ngb, c, time)){
232 dominator = c;
233 break;
234 }
235 }
236 if(dominator==-1) break;
237 // Push as long as dominator remains a dominator.
238 // Iterate on times where at least one neighbor appears.
239 for (bool still_dominated = true; still_dominated; ) {
240 if(e_ngb_later_begin == e_ngb_later_end) {
241 dead = true; goto end_move;
242 }
243 if(!heapified) {
244 // Eagerly sorting can be slow
245 std::make_heap(e_ngb_later_begin, e_ngb_later_end, cmp1);
246 heapified=true;
247 }
248 time = e_ngb_later_begin->first; // first place it may become critical
249 // Update the neighborhood for this new time, while checking if any of the new neighbors break domination.
250 while (e_ngb_later_begin != e_ngb_later_end && e_ngb_later_begin->first <= time) {
251 Vertex w = e_ngb_later_begin->second;
252#ifdef GUDHI_COLLAPSE_USE_DENSE_ARRAY
253 if (neighbors_dense(dominator,w) > e_ngb_later_begin->first)
254 still_dominated = false;
255#else
256 auto& ngb_dom = neighbors[dominator];
257 auto wit = ngb_dom.find(w); // neighborhood may be open or closed, it does not matter
258 if (wit == ngb_dom.end() || wit->second > e_ngb_later_begin->first)
259 still_dominated = false;
260#endif
261 e_ngb.insert(w);
262 std::pop_heap(e_ngb_later_begin, e_ngb_later_end--, cmp1);
263 }
264 } // this doesn't seem to help that much...
265 }
266end_move:
267 if(dead) {
268 remove_neighbor(u, v);
269 } else if(start_time != time) {
270 delay_neighbor(u, v, time);
271 res.emplace_back(u, v, time);
272 } else {
273 res.emplace_back(u, v, input_time);
274 }
275 }
276 }
277 }
278
279 std::vector<Filtered_edge> output() {
280 return std::move(res);
281 }
282
283};
284
285template<class R> R to_range(R&& r) { return std::move(r); }
286template<class R, class T> R to_range(T&& t) { R r; r.insert(r.end(), t.begin(), t.end()); return r; }
287
288template<class FilteredEdgeRange, class Delay>
289auto flag_complex_collapse_edges(FilteredEdgeRange&& edges, Delay&&delay) {
290 // Would it help to label the points according to some spatial sorting?
291 auto first_edge_itr = std::begin(edges);
292 using Vertex = std::decay_t<decltype(std::get<0>(*first_edge_itr))>;
293 using Filtration_value = std::decay_t<decltype(std::get<2>(*first_edge_itr))>;
294 using Edge_collapser = Flag_complex_edge_collapser<Vertex, Filtration_value>;
295 if (first_edge_itr != std::end(edges)) {
296 auto edges2 = to_range<std::vector<typename Edge_collapser::Filtered_edge>>(std::forward<FilteredEdgeRange>(edges));
297#ifdef GUDHI_USE_TBB
298 // I think this sorting is always negligible compared to the collapse, but parallelizing it shouldn't hurt.
299 tbb::parallel_sort(edges2.begin(), edges2.end(),
300 [](auto const&a, auto const&b){return std::get<2>(a)>std::get<2>(b);});
301#else
302 std::sort(edges2.begin(), edges2.end(), [](auto const&a, auto const&b){return std::get<2>(a)>std::get<2>(b);});
303#endif
304 Edge_collapser edge_collapser;
305 edge_collapser.process_edges(edges2, std::forward<Delay>(delay));
306 return edge_collapser.output();
307 }
308 return std::vector<typename Edge_collapser::Filtered_edge>();
309}
310
329template<class FilteredEdgeRange> auto flag_complex_collapse_edges(const FilteredEdgeRange& edges) {
330 return flag_complex_collapse_edges(edges, [](auto const&d){return d;});
331}
332
333} // namespace collapse
334
335} // namespace Gudhi
336
337#endif // FLAG_COMPLEX_EDGE_COLLAPSER_H_
auto flag_complex_collapse_edges(const FilteredEdgeRange &edges)
Implicitly constructs a flag complex from edges as an input, collapses edges while preserving the per...
Definition: Flag_complex_edge_collapser.h:329
Value type for a filtration function on a cell complex.
Definition: FiltrationValue.h:20