graphnode.h 38.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#ifndef _SPEL_BAYES_GRAPH_NODE_H_
#define _SPEL_BAYES_GRAPH_NODE_H_

#include <memory>
#include <vector>
#include <algorithm>
#include <iostream>
#include <deque>
#include <list>
#include <map>

12
#include "factor.h"
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48


struct colour_proxy_impl;
typedef std::shared_ptr<colour_proxy_impl> colour_proxy;
struct colour_proxy_impl {
    colour_proxy proxy;

    friend
        colour_proxy
        get_colour_impl(colour_proxy col)
        {
            while (col->proxy) { col = col->proxy; }
            return col;
        }

    friend
        colour_proxy
        assign_colour_impl(colour_proxy old_col, colour_proxy new_col)
        {
            get_colour_impl(old_col)->proxy = get_colour_impl(new_col);
            return old_col;
        }

    friend
        bool
        colour_equal(colour_proxy c1, colour_proxy c2)
        {
            return get_colour_impl(c1) == get_colour_impl(c2);
        }
};

inline
colour_proxy
create_colour() { return std::make_shared<colour_proxy_impl>(); }


49
template <typename V>
50
void
51
sort_and_unique(std::vector<V>& v)
52
53
54
55
56
57
{
    std::sort(v.begin(), v.end());
    v.erase(std::unique(v.begin(), v.end()), v.end());
}


58
enum node_type { Factor, Interface, Aggregate };
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
struct graph_type {
    typedef int variable_index_type;
    typedef size_t node_index_type;
    typedef std::vector<node_index_type> node_vec;
    typedef std::vector<variable_index_type> var_vec;

    node_vec rank;
    node_vec represented_by;
    std::vector<node_type> type;
    std::vector<colour_proxy> colour;
    std::vector<node_vec> neighbours_in;
    std::vector<node_vec> neighbours_out;
    std::vector<node_vec> inner_nodes;
    std::vector<var_vec> rules;
    var_vec variables;
75

76
77
    std::map<variable_index_type, node_index_type> interface_to_node;
    std::map<node_index_type, variable_index_type> node_to_interface;
78

79
80
    bool aggregate_cycles = true;
    bool generate_interfaces = true;
81

82
83
84
85
86
87
88
89
90
91
92
93
94
95
    bool is_aggregate(node_index_type node) const { return inner_nodes[node].size() > 1; }
    bool
        is_compound_interface(node_index_type node) const
        {
            if (is_aggregate(node)) {
                for (node_index_type n: inner_nodes[node]) {
                    if (type[n] != Interface) {
                        return false;
                    }
                }
                return true;
            }
            return false;
        }
96

97
98
99
100
101
102
103
104
105
106
107
108
109
    bool
        is_interface(node_index_type node) const
        {
            if (is_aggregate(node)) {
                for (node_index_type n: inner_nodes[node]) {
                    if (type[n] != Interface) {
                        return false;
                    }
                }
                return true;
            }
            return type[node] == Interface;
        }
110

111
112
113
114
115
    bool
        is_computable(node_index_type node) const
        {
            return !is_interface(node);
        }
116
117
118

    size_t size() const { return rank.size(); }

119
    node_vec
120
121
        active_nodes() const
        {
122
            node_vec ret;
123
            ret.reserve(represented_by.size());
124
            for (node_index_type i = 0; i < represented_by.size(); ++i) {
125
126
127
128
129
130
131
                if (represented_by[i] == i) {
                    ret.push_back(i);
                }
            }
            return ret;
        }

132
133
    node_vec
        resolve_vector(const node_vec& vec) const
134
        {
135
            node_vec ret;
136
            ret.reserve(vec.size());
137
            for (node_index_type i: vec) {
138
139
140
141
142
143
                ret.push_back(resolve(i));
            }
            sort_and_unique(ret);
            return ret;
        }

144
145
    node_vec
        expand(node_index_type node) const
146
        {
147
            node_vec ret;
148
149
150
151
152
            build_inner_nodes(ret, node);
            sort_and_unique(ret);
            return ret;
        }

153
154
    node_vec
        expand(node_vec nodes) const
155
        {
156
157
            node_vec ret;
            for (node_index_type node: nodes) {
158
159
160
161
162
163
                build_inner_nodes(ret, node);
            }
            sort_and_unique(ret);
            return ret;
        }

164
165
    var_vec
        interface_nodes(var_vec inputs) const
166
        {
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
            var_vec ret;
            for (variable_index_type v: inputs) {
                ret.push_back(resolve(interface_to_node.find(v)->second));
            }
            sort_and_unique(ret);
            return ret;
        }

    std::vector<std::pair<node_index_type, node_index_type>>
        active_edges() const
        {
            std::vector<std::pair<node_index_type, node_index_type>> ret;
            for (node_index_type n: active_nodes()) {
                for (node_index_type o: resolve_vector(neighbours_out[n])) {
                    ret.emplace_back(n, o);
                }
            }
            return ret;
        }

#if 0
    node_vec
        restrict_inputs(const node_vec& inputs, const node_vec& restriction) const
        {
            node_vec ret(inputs);
192
193
194
195
196
197
            auto it = std::set_intersection(inputs.begin(), inputs.end(), restriction.begin(), restriction.end(), ret.begin());
            ret.resize(it - ret.begin());
            return ret;
        }

    void
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
        restrict_inputs(const var_vec& inputs, const node_vec& restriction, var_vec& remaining, var_vec& imported) const
        {
            auto in = interface_nodes(inputs);
            remaining = in % restriction;
            imported = in - remaining;
            /*node_vec ret(inputs);*/
            /*auto it = std::set_intersection(inputs.begin(), inputs.end(), restriction.begin(), restriction.end(), ret.begin());*/
            /*ret.resize(it - ret.begin());*/
            /*return ret;*/
        }
#endif


    void
        dump_node(node_index_type n)
213
214
        {
            std::cout
215
                << '[' << rank[n] << "] " << (type[n] == Interface ? "INTERFACE " : (n == inner_nodes[n][0] ? "FACTOR " : "AGGREGATE ")) << n << std::endl
216
217
218
219
220
221
                << "  creation rule " << rules[n] << std::endl
                << "  represented by " << represented_by[n] << " (" << resolve(n) << ')' << std::endl
                << "  colour " << get_colour_impl(colour[n]) << std::endl
                << "  inputs " << neighbours_in[n] << " (" << resolve_vector(neighbours_in[n]) << ')' << std::endl
                << "  outputs " << neighbours_out[n] << " (" << resolve_vector(neighbours_out[n]) << ')' << std::endl
                << "  inner nodes " << inner_nodes[n] << std::endl
222
                << "  variable " << variables[n] << std::endl
223
224
                ;
            if (inner_nodes[n].size() > 1) {
225
226
227
228
229
                for (node_index_type i: inner_nodes[n]) {
                    /*var_vec remaining, imported;*/
                    /*restrict_inputs(rules[i], inner_nodes[n], remaining, imported);*/
                    /*std::cout << "  * rule for " << i << ": " << remaining << " / " << imported << std::endl;*/
                    std::cout << "  * rule for " << i << ": " << rules[i] << std::endl;
230
231
232
233
234
235
                }
            }
            std::cout
                << std::endl;
        }

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#define DUMP_SZ(_x) << " * " #_x " " << _x.size() << std::endl

    void
        dump_sizes() const
        {
            std::cout
                DUMP_SZ(rank)
                DUMP_SZ(type)
                DUMP_SZ(rules)
                DUMP_SZ(colour)
                DUMP_SZ(variables)
                DUMP_SZ(inner_nodes)
                DUMP_SZ(neighbours_in)
                DUMP_SZ(neighbours_out)
                DUMP_SZ(represented_by)
                ;
        }

254
255
256
257
    void
        dump()
        {
            std::cout << "ALL NODES" << std::endl;
258
259
            dump_sizes();
            for (node_index_type i = 0; i < rank.size(); ++i) {
260
261
262
263
264
265
266
267
                dump_node(i);
            }
        }

    void
        dump_active()
        {
            std::cout << "ACTIVE NODES" << std::endl;
268
269
            dump_sizes();
            for (node_index_type i: active_nodes()) {
270
271
272
273
                dump_node(i);
            }
        }

274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    void
        compute_ranks()
        {
            std::vector<bool> visited(rank.size(), false);
            compute_ranks(active_nodes(), visited);
        }

    void
        compute_ranks(const node_vec& nodes, std::vector<bool>& visited)
        {
            for (node_index_type n: nodes) {
                if (visited[n]) { continue; }
                visited[n] = true;
                auto nin = resolve_vector(neighbours_in[n]);
                compute_ranks(nin, visited);
                rank[n] = 0;
                if (nin.size()) {
                    for (node_index_type i: nin) {
                        if (rank[n] < rank[i]) {
                            rank[n] = rank[n];
                        }
                    }
                    ++rank[n];
                }
            }
        }

    void
        cleanup()
        {
            /*for (node_index_type n: active_nodes()) {*/
                /*if (type[n] == Interface && neighbours_out[n].size() == 0) {*/
                    /*represented_by[n] = (node_index_type) -1;*/
                /*}*/
                /*node_vec rm = {n};*/
                /*for (node_index_type i: neighbours_in[n]) {*/
                    /*neighbours_out[i] = neighbours_out[i] - rm;*/
                /*}*/
            /*}*/

            dump_active();
            auto edges = active_edges();
            for (const auto& e: edges) {
                if (is_interface(e.first) ^ !is_interface(e.second)) {
                    /* By construction, it's a factor->factor edge */
                    auto varset = variables_of(e.first) % variables_of(e.second);
                    std::cout << "edge between factor(graph)s " << e.first << " and " << e.second << " carries variables " << varset << std::endl;
                    node_index_type i;
                    if (varset.size() == 1) {
                        i = add_interface(node_vec{e.first}, varset.front());
                    } else {
                        node_vec iv;
                        iv.reserve(varset.size());
                        for (variable_index_type v: varset) {
                            iv.push_back(add_interface(node_vec{}, v));
                        }
                        i = add_node(node_vec{e.first}, node_vec{e.second}, var_vec{}, colour[e.first], Aggregate, iv, -1);
                        for (node_index_type ni: iv) {
                            represented_by[ni] = i;
                        }
                    }
                    filter_out_and_replace_by(neighbours_out[e.first], node_vec{e.second}, i);
                    filter_out_and_replace_by(neighbours_in[e.second], node_vec{e.first}, i);
                    dump_node(e.first);
                    dump_node(i);
                    dump_node(e.second);
                }
            }

            if (size()) {
                var_vec all_variables;
                std::vector<bool> var_represented(1 + *std::max_element(variables.begin(), variables.end()), false);
                node_vec A = active_nodes();
                for (node_index_type n: A) {
                    if (is_interface(n)) {
                        for (variable_index_type v: variables_of(n)) {
                            var_represented[v] = true;
                        }
                    }
                }

                for (node_index_type n: A) {
                    if (!is_interface(n)) {
                        for (variable_index_type v: variables_of(n)) {
                            if (var_represented[v]) { continue; }
                            node_index_type i = add_interface(node_vec{n}, v);
                            neighbours_out[n].push_back(i);
                        }
                    }
                }
            }
        }

    node_index_type
        add_node(const node_vec& in, const node_vec& out,
                 const var_vec& rule, colour_proxy col, node_type t,
                 const node_vec& inner, variable_index_type var)
371
        {
372
            node_index_type ret = rank.size();
373
            std::cout << "adding node " << ret << std::endl;
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
            neighbours_out.emplace_back(out);
            neighbours_in.emplace_back(in);
            rules.emplace_back(rule);
            colour.emplace_back(col);
            if (in.size()) {
                size_t r = 0;
                for (node_index_type i: in) {
                    r = std::max(r, rank[i]);
                }
                rank.push_back(r + 1);
            } else {
                rank.push_back(0);
            }
            /*std::cout << "rank=" << rank.back() << std::endl;*/
            type.push_back(t);
            variables.push_back(var);
390
            represented_by.push_back(ret);
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
            if (inner.size()) {
                inner_nodes.emplace_back(inner);
            } else {
                inner_nodes.emplace_back(node_vec{ret});
            }
            /*dump();*/
            return ret;
        }

    node_index_type
        resolve_interface(variable_index_type var)
        {
            auto it = interface_to_node.find(var);
            if (it == interface_to_node.end()) {
                /*std::cout << "resolve_interface(" << var << ") => new interface" << std::endl;*/
                return add_interface(node_vec{}, var);
            }
            /*std::cout << "resolve_interface(" << var << ") => resolve(" << it->second << ") = " << resolve(it->second) << std::endl;*/
            return resolve(it->second);
        }

    node_index_type
        add_interface(const node_vec& producer, variable_index_type var)
        {
            node_index_type ret = add_node(producer, node_vec{}, var_vec{}, create_colour(), Interface, node_vec{}, var);
            interface_to_node[var] = ret;
            node_to_interface[ret] = var;
            for (node_index_type p: producer) {
                neighbours_out[p].push_back(ret);
            }
            return ret;
        }

    node_index_type
        add_factor(const var_vec& rule, colour_proxy col,
                 variable_index_type var)
        {
            node_vec in, out;
            if (generate_interfaces) {
                for (variable_index_type v: rule) {
                    in.push_back(resolve_interface(v));
                }
            } else {
                for (variable_index_type v: rule) {
                    auto it = interface_to_node.find(v);
                    if (it != interface_to_node.end()) {
                        in.push_back(resolve(it->second));
                    }
                }
            }
            sort_and_unique(in);
            node_index_type ret = add_node(in, node_vec{}, rule, col, Factor, node_vec{}, var);
            for (node_index_type n: in) {
                neighbours_out[n].push_back(ret);
            }
            if (generate_interfaces) {
                node_index_type i = add_node(node_vec{ret}, node_vec{}, var_vec{}, colour[ret], Interface, node_vec{}, var);
                interface_to_node[var] = i;
                node_to_interface[i] = var;
                neighbours_out[ret].push_back(i);
            } else {
                interface_to_node[var] = ret;
                node_to_interface[ret] = var;
            }
            return ret;
        }

    node_index_type
        add_factor(variable_index_type var)
        {
            std::cout << "add_factor(" << var << ')' << std::endl;
            node_index_type ret = add_factor(var_vec{}, create_colour(), var);
            /*dump();*/
464
            return ret;
465
466
467
468
469
470
471
472
473
474
475
            /*node_index_type ret = rank.size();*/
            /*std::cout << "adding node " << ret << std::endl;*/
            /*neighbours_out.emplace_back();*/
            /*neighbours_in.emplace_back();*/
            /*rules.emplace_back();*/
            /*colour.emplace_back(create_colour());*/
            /*rank.push_back(0);*/
            /*type.push_back(Factor);*/
            /*represented_by.push_back(ret);*/
            /*inner_nodes.emplace_back(std::vector<node_index_type>{ret});*/
            /*return ret;*/
476
477
        }

478
479
    node_index_type
        resolve(node_index_type n) const
480
481
482
483
484
        {
            while (n != represented_by[n]) { n = represented_by[n]; }
            return n;
        }

485
486
    node_index_type
        add_factor(variable_index_type v1, variable_index_type var)
487
        {
488
489
490
491
492
493
494
495
496
497
498
499
500
            std::cout << "add_factor(" << v1 << ", " << var << ')' << std::endl;
            node_index_type p1r;
            if (!generate_interfaces) {
                auto it = interface_to_node.find(v1);
                if (it == interface_to_node.end()) {
                    auto ret = add_factor(var);
                    rules.back() = {v1};
                    return ret;
                } else {
                    p1r = resolve(it->second);
                }
            } else {
                p1r = resolve_interface(v1);
501
            }
502
503
504
            node_index_type ret = add_factor(var_vec{v1}, colour[p1r], var);
            /*compute_ranks();*/
            /*dump();*/
505
            return ret;
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
            /*node_index_type ret = rank.size();*/
            /*rules.emplace_back(var_vec{v1});*/
            /*if (p1r != p1) {*/
                /*neighbours_out[p1].push_back(ret);*/
                /*p1 = p1r;*/
            /*}*/
            /*std::cout << "adding node (single parent) " << ret << std::endl;*/
            /*neighbours_out.emplace_back();*/
            /*neighbours_in.emplace_back(std::vector<node_index_type>{p1});*/
            /*neighbours_out[p1].push_back(ret);*/
            /*colour.emplace_back(colour[p1]);*/
            /*rank.push_back(1 + rank[p1]);*/
            /*represented_by.push_back(ret);*/
            /*inner_nodes.emplace_back(std::vector<node_index_type>{ret});*/
            /*return ret;*/
521
522
        }

523
524
    node_index_type
        add_factor(variable_index_type v1,variable_index_type v2, variable_index_type var)
525
        {
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
            std::cout << "add_factor(" << v1 << ", " << v2 << ", " << var << ')' << std::endl;
            node_index_type p1r, p2r;
            if (!generate_interfaces) {
                auto it = interface_to_node.find(v1);
                if (it == interface_to_node.end()) {
                    node_index_type ret = add_factor(v2, var);
                    rules.back() = {v1, v2};
                    return ret;
                } else {
                    p1r = resolve(it->second);
                }
                it = interface_to_node.find(v2);
                if (it == interface_to_node.end()) {
                    node_index_type ret = add_factor(v1, var);
                    rules.back() = {v1, v2};
                    return ret;
                } else {
                    p2r = resolve(it->second);
                }
            } else {
                p1r = resolve_interface(v1);
                p2r = resolve_interface(v2);
            }

            if (p1r > p2r) {
                p1r ^= p2r;
                p2r ^= p1r;
                p1r ^= p2r;
            }

            node_index_type ret = add_factor(var_vec{v1, v2}, colour[p1r], var);

            bool cycle = false;
            if (p1r != p2r) {
                if (colour_equal(colour[p1r], colour[p2r])) {
                    /* cycle! */
                    std::cout << "cycle!" << std::endl;
                    cycle = true;
                } else {
                    std::cout << "no cycle." << std::endl;
                    assign_colour_impl(colour[p1r], colour[p2r]);
                }
            }

            if (cycle && aggregate_cycles) {
                std::cout << "search a path between " << p1r << " and " << p2r << std::endl;
                auto path = find_path_between_parents(p1r, p2r, ret);
                /*dump_active();*/
                std::cout << "Found path: "; for (size_t n: path) { std::cout << ' ' << n; } std::cout << std::endl;
                parents_of_max_rank aggr_first;
                size_t min_rank = 1 + rank[*std::min_element(path.begin(), path.end(), [this](node_index_type i1, node_index_type i2) { return rank[i1] < rank[i2]; })];
                while (path.size() > 2 && ((aggr_first = find_parents_of_max_rank(path)), !aggregate(*aggr_first.p1, *aggr_first.p2, ret, path, aggr_first.p1))) {
                    /*bool long_path = path.size() > 3;*/
                    /*bool rank_min_p1 = rank[*aggr_first.p1] == min_rank;*/
                    /*bool rank_min_p2 = rank[*aggr_first.p2] == min_rank;*/
                    /*std::cout << (*aggr_first.p1) << '/' << (*aggr_first.child) << '/' << (*aggr_first.p2) << " " << long_path << rank_min_p1 << rank_min_p2 << std::endl;*/
                    /*if (!(long_path && rank_min_p1 && !rank_min_p2)) {*/
                        path.erase(aggr_first.p1);
                    /*}*/
                    /*if (!(long_path && rank_min_p2 && !rank_min_p1)) {*/
                        path.erase(aggr_first.p2);
                    /*}*/
                    path.erase(aggr_first.child);
                }
                /*while (path.size() > 2) {*/
                    /*auto aggr_first = find_parents_of_max_rank(path);*/
                    /*if (!aggregate(*aggr_first.p1, *aggr_first.p2)) {*/
                        /*path.erase(aggr_first.p1);*/
                        /*path.erase(aggr_first.p2);*/
                    /*}*/
                    /*path.erase(aggr_first.child);*/
                /*}*/
            }

#if 0
            node_index_type ret = rank.size();
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655

            if (p1 > p2) {
                p1 ^= p2;
                p2 ^= p1;
                p1 ^= p2;
            }

            rules.emplace_back(std::vector<size_t>{p1, p2});

            if (p1r != p1) {
                neighbours_out[p1].push_back(ret);
                p1 = p1r;
            }
            if (p2r != p2) {
                neighbours_out[p2].push_back(ret);
                p2 = p2r;
            }

            if (p1 == p2) {
                return add_node(p1, false);
            }

            std::cout << "adding node " << ret << std::endl;

            if (p1 > p2) {
                p1 ^= p2;
                p2 ^= p1;
                p1 ^= p2;
            }

            neighbours_out[p1].push_back(ret);
            neighbours_out[p2].push_back(ret);

            neighbours_out.emplace_back();
            neighbours_in.push_back({p1, p2});

            /* compute rank */
            rank.push_back(1 + std::max(rank[p1], rank[p2]));

            bool cycle = false;
            /* compute/update colours */
            if (colour_equal(colour[p1], colour[p2])) {
                /* cycle! */
                /*std::cout << "cycle!" << std::endl;*/
                cycle = true;
            } else {
                assign_colour_impl(colour[p1], colour[p2]);
            }
            colour.push_back(colour[p1]);

            represented_by.push_back(ret);

            inner_nodes.emplace_back(std::vector<size_t>{ret});

656
            if (cycle && aggregate_cycles) {
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
                auto path = find_path_between_parents(p1, p2, ret);
                /*std::cout << "Found path:"; for (size_t n: path) { std::cout << ' ' << n; } std::cout << std::endl;*/
                parents_of_max_rank aggr_first;
                while (path.size() > 2 && ((aggr_first = find_parents_of_max_rank(path)), !aggregate(*aggr_first.p1, *aggr_first.p2, path, aggr_first.p1))) {
                    path.erase(aggr_first.p1);
                    path.erase(aggr_first.p2);
                    path.erase(aggr_first.child);
                }
                /*while (path.size() > 2) {*/
                    /*auto aggr_first = find_parents_of_max_rank(path);*/
                    /*if (!aggregate(*aggr_first.p1, *aggr_first.p2)) {*/
                        /*path.erase(aggr_first.p1);*/
                        /*path.erase(aggr_first.p2);*/
                    /*}*/
                    /*path.erase(aggr_first.child);*/
                /*}*/
            }
674
675
676
#endif
            /*compute_ranks();*/
            /*dump();*/
677
678
679
680
            return ret;
        }

    bool
681
        find_ascending_path(node_index_type p1, node_index_type p2, node_vec& path)
682
683
684
685
686
        {
            /*std::cout << "path from " << p1 << " to " << p2 << std::endl;*/
            if (p1 == p2) {
                return true;
            }
687
            for (node_index_type i: neighbours_in[p1]) {
688
689
690
691
692
693
694
695
696
                i = resolve(i);
                if (find_ascending_path(i, p2, path)) {
                    path.push_back(i);
                    return true;
                }
            }
            return false;
        }

697
698
    node_vec
        find_aggregate_chain(node_index_type p1, node_index_type p2)
699
        {
700
            node_vec path;
701
702
703
704
705
706
707
708
709
710
711
712
713
714
            if (rank[p1] > rank[p2] && find_ascending_path(p1, p2, path)) {
                /*std::cout << "found ascending path from " << p1 << " to " << p2 << std::endl;*/
                path.push_back(p1);
            } else if (rank[p2] > rank[p1] && find_ascending_path(p2, p1, path)) {
                /*std::cout << "found ascending path from " << p2 << " to " << p1 << std::endl;*/
                path.push_back(p2);
            } else {
                /*std::cout << "no ascending path between parents" << std::endl;*/
                path = {p1, p2};
            }
            return path;
        }

    bool
715
        filter_out(node_vec& vec, const node_vec& aggr)
716
        {
717
            node_vec tmp(vec.size());
718
719
720
721
722
723
724
            auto it = std::set_difference(vec.begin(), vec.end(), aggr.begin(), aggr.end(), tmp.begin());
            tmp.resize(it - tmp.begin());
            tmp.swap(vec);
            return tmp.size() != vec.size();
        }

    void
725
        filter_out_and_replace_by(node_vec& vec, const node_vec& aggr, node_index_type new_node)
726
        {
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
            std::cout << "filter_out_and_replace_by(" << vec << ", " << aggr << ", " << new_node << ')' << std::endl;
            /*if (filter_out(vec, aggr)) {*/
                /*vec.push_back(new_node);*/
            /*}*/
            node_vec tmp;
            tmp.reserve(vec.size());
            for (auto n: vec) {
                node_index_type nr = resolve(n);
                std::cout << "" << nr << " vs " << aggr << std::endl;
                if (std::find(aggr.begin(), aggr.end(), nr) == aggr.end()) {
                    tmp.push_back(n);
                }
            }
            sort_and_unique(tmp);
            if (tmp.size() != vec.size()) {
                tmp.push_back(new_node);
743
            }
744
            vec.swap(tmp);
745
746
747
748
            std::cout << " => " << vec << std::endl;
        }

    bool
749
        update_rank(node_index_type node)
750
        {
751
            node_index_type r = 0;
752
            if (neighbours_in[node].size()) {
753
                for (node_index_type i: neighbours_in[node]) {
754
755
756
757
758
759
760
761
762
763
764
765
766
767
                    if (rank[i] > r) {
                        r = rank[i];
                    }
                }
                ++r;
            }
            if (rank[node] != r) {
                rank[node] = r;
                return true;
            }
            return false;
        }

    void
768
        propagate_update_rank(node_index_type source)
769
        {
770
            std::deque<node_index_type> stack(neighbours_out[source].begin(), neighbours_out[source].end());
771
            while (stack.size()) {
772
                node_index_type n = stack.front();
773
774
775
776
777
778
779
780
781
782
                stack.pop_front();
                if (update_rank(n)) {
                    stack.insert(stack.end(), neighbours_out[n].begin(), neighbours_out[n].end());
                }
            }
        }

    void
        check_neighbours()
        {
783
784
            for (node_index_type i = 0; i < rank.size(); ++i) {
                for (node_index_type n: neighbours_in[i]) {
785
786
787
788
                    if (represented_by[n] != n) {
                        std::cout << "error neighbour_in[" << i << "] " << n << " is represented by " << represented_by[n] << std::endl;
                    }
                }
789
                for (node_index_type n: neighbours_out[i]) {
790
791
792
793
794
795
796
797
                    if (represented_by[n] != n) {
                        std::cout << "error neighbour_out[" << i << "] " << n << " is represented by " << represented_by[n] << std::endl;
                    }
                }
            }
        }

    bool
798
        recursive_path_finder(node_index_type node, node_index_type goal, std::list<node_index_type>& path, std::vector<bool>& visited)
799
        {
800
            std::cout << "recursive_path_finder(" << node << ", " << goal << ')' << std::endl;
801
802
803
804
            if (node == goal) {
                return true;
            }
            visited[node] = true;
805
            for (node_index_type n: neighbours_in[node]) {
806
807
808
809
810
811
                n = resolve(n);
                if (!visited[n] && recursive_path_finder(n, goal, path, visited)) {
                    path.push_back(n);
                    return true;
                }
            }
812
            for (node_index_type n: neighbours_out[node]) {
813
814
815
816
817
818
819
820
821
                n = resolve(n);
                if (!visited[n] && recursive_path_finder(n, goal, path, visited)) {
                    path.push_back(n);
                    return true;
                }
            }
            return false;
        }

822
823
    std::list<node_index_type>
        find_path_between_parents(node_index_type p1, node_index_type p2, node_index_type child)
824
825
826
        {
            std::vector<bool> visited(rank.size(), false);
            visited[child] = true;
827
            std::list<node_index_type> path;
828
829
830
831
832
833
834
835
            if (recursive_path_finder(p1, p2, path, visited)) {
                path.push_back(p1);
                path.push_back(child);
            }
            return path;
        }

    struct parents_of_max_rank {
836
        std::list<node_index_type>::iterator p1, child, p2;
837
838
839
    };

    parents_of_max_rank
840
841
842
843
844
845
846
847
848
849
850
851
852
        find_parents_of_max_rank(std::list<node_index_type>& path)
        {
            auto child = std::max_element(path.begin(), path.end(), [this](node_index_type n1, node_index_type n2) { return rank[n1] < rank[n2]; });
            auto p1 = child, p2 = child;
            if (child == path.begin()) {
                p1 = path.end();
            }
            --p1;
            ++p2;
            if (p2 == path.end()) {
                p2 = path.begin();
            }
            return {p1, child, p2};
853
854
855
        }

    void
856
        cleanup_nodeset(std::vector<node_index_type>& v)
857
        {
858
            for (node_index_type& i: v) { i = represented_by[i]; }
859
860
861
862
863
864
            sort_and_unique(v);
        }

    void
        cleanup_neighbours()
        {
865
            for (node_index_type i = 0; i < rank.size(); ++i) {
866
867
868
869
870
871
                cleanup_nodeset(neighbours_in[i]);
                cleanup_nodeset(neighbours_out[i]);
            }
        }

    void
872
        build_inner_nodes(std::vector<node_index_type>& all_nodes, node_index_type node) const
873
        {
874
            for (node_index_type i: inner_nodes[node]) {
875
876
877
878
879
880
881
882
                if (inner_nodes[i].size() > 1) {
                    build_inner_nodes(all_nodes, i);
                } else {
                    all_nodes.push_back(i);
                }
            }
        }

883
884
885
886
887
888
889
890
891
892
893
894
    var_vec
        variables_of(node_index_type n)
        {
            var_vec ret;
            for (node_index_type i: inner_nodes[resolve(n)]) {
                ret.insert(ret.end(), rules[i].begin(), rules[i].end());
                ret.push_back(variables[i]);
            }
            sort_and_unique(ret);
            return ret;
        }

895
    bool
896
        aggregate(node_index_type p1, node_index_type p2, node_index_type new_node, std::list<node_index_type>& path, std::list<node_index_type>::iterator pos)
897
898
899
        {
            p1 = resolve(p1);
            p2 = resolve(p2);
900
901
902
            node_index_type ret = rank.size();
            std::cout << "-----------------------------------------------------------------------------" << std::endl;
            std::cout << "adding aggregate " << ret << std::endl;
903
904
905
906
            colour.emplace_back(colour[p1]);
            rank.push_back(1 + std::min(rank[p1], rank[p2]));
            represented_by.push_back(ret);

907
            node_vec aggr = find_aggregate_chain(p1, p2);
908
909
            sort_and_unique(aggr);

910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
            std::cout << "initial aggr " << aggr << std::endl;

            bool aggregate_down = 0 && (type[p1] == Interface || is_compound_interface(p1)) && aggr.size() > 2;

            std::cout << "aggregate_down " << aggregate_down << std::endl;

            if (aggregate_down) {
                /*node_vec tmp;*/
                /*tmp.reserve(aggr.size());*/
                /*tmp.assign(aggr.begin() + 1, aggr.end());*/
                /*tmp.push_back(new_node);*/
                /*aggr.swap(tmp);*/
                aggr = aggr - node_vec{rank[p1] > rank[p2] ? p2 : p1};
                aggr.push_back(new_node);
            }

            std::cout << "aggr " << aggr << std::endl;

#if 1
            bool compound_interface = true;
            for (node_index_type n: aggr) {
                if (type[n] != Interface && !is_compound_interface(n)) {
                    compound_interface = false;
                    break;
                }
            }

            node_vec interface_inputs, remove_interfaces;
            if (1 && aggregate_down && !compound_interface) {
                for (node_index_type n: aggr) {
                    if (type[n] == Interface || is_compound_interface(n)) {
                        /*interface_inputs.insert(interface_inputs.end(), neighbours_in[n].begin(), neighbours_in[n].end());*/
                        /*if (!(neighbours_in[n].size() * neighbours_out[n].size()) || !((neighbours_in[n] % aggr).size() * (neighbours_out[n] % aggr).size())) {*/
                        /*std::cout << "interface " << n << " has inputs " << neighbours_in[n] << ", % " << (neighbours_in[n] % aggr) << std::endl;*/
                        /*if (!(neighbours_in[n] % aggr).size()) {*/
                            remove_interfaces.push_back(n);
                        /*}*/
                    }
                }
                sort_and_unique(interface_inputs);
                std::cout << "interface inputs " << interface_inputs << std::endl;
                aggr.insert(aggr.end(), interface_inputs.begin(), interface_inputs.end());
                sort_and_unique(aggr);
                std::cout << "remove interface " << remove_interfaces << std::endl;
                aggr = aggr - remove_interfaces;
            }
#endif

            node_vec all_nodes, nin, nout;
            for (node_index_type i: aggr) {
960
                all_nodes.insert(all_nodes.end(), inner_nodes[i].begin(), inner_nodes[i].end());
961
                for (node_index_type n: neighbours_in[i]) {
962
963
                    nin.push_back(resolve(n));
                }
964
                for (node_index_type n: neighbours_out[i]) {
965
966
967
968
                    nout.push_back(resolve(n));
                }
            }
            sort_and_unique(all_nodes);
969
970
971
972
973
974
975
976
977
978
979
980
981
982

            for (node_index_type i: remove_interfaces) {
                filter_out(neighbours_out[i], all_nodes);
                filter_out(neighbours_out[i], aggr);
            }

            sort_and_unique(nin); std::cout << "nin " << nout << std::endl; filter_out(nin, all_nodes); filter_out(nin, aggr); filter_out(nin, remove_interfaces);
            sort_and_unique(nout); std::cout << "nout " << nout << std::endl; filter_out(nout, all_nodes); filter_out(nout, aggr);
            std::cout
                << "ready to aggregate nodes " << all_nodes << std::endl
                << "           aggr" << aggr << std::endl
                << "  neighbours in" << nin << std::endl
                << " neighbours out" << nout << std::endl;
            /*for (node_index_type i: nin) {*/
983
984
                /*filter_out_and_replace_by(neighbours_out[i], aggr, ret);*/
            /*}*/
985
            /*for (node_index_type i: nout) {*/
986
987
988
989
990
991
992
                /*filter_out_and_replace_by(neighbours_in[i], aggr, ret);*/
            /*}*/

            inner_nodes.emplace_back(); inner_nodes.back().swap(all_nodes);
            neighbours_in.emplace_back(); neighbours_in.back().swap(nin);
            neighbours_out.emplace_back(); neighbours_out.back().swap(nout);
            rules.emplace_back();
993
994
            type.push_back(Aggregate);
            variables.push_back(-1);
995

996
            for (node_index_type i: aggr) {
997
998
999
                represented_by[i] = ret;
            }

1000
            var_vec inner_vars = variables_of(ret);