partitioning.cc
Go to the documentation of this file.
1// LIC// ====================================================================
2// LIC// This file forms part of oomph-lib, the object-oriented,
3// LIC// multi-physics finite-element library, available
4// LIC// at http://www.oomph-lib.org.
5// LIC//
6// LIC// Copyright (C) 2006-2024 Matthias Heil and Andrew Hazel
7// LIC//
8// LIC// This library is free software; you can redistribute it and/or
9// LIC// modify it under the terms of the GNU Lesser General Public
10// LIC// License as published by the Free Software Foundation; either
11// LIC// version 2.1 of the License, or (at your option) any later version.
12// LIC//
13// LIC// This library is distributed in the hope that it will be useful,
14// LIC// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// LIC// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16// LIC// Lesser General Public License for more details.
17// LIC//
18// LIC// You should have received a copy of the GNU Lesser General Public
19// LIC// License along with this library; if not, write to the Free Software
20// LIC// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21// LIC// 02110-1301 USA.
22// LIC//
23// LIC// The authors may be contacted at oomph-lib@maths.man.ac.uk.
24// LIC//
25// LIC//====================================================================
26#include <float.h>
27
28#include "partitioning.h"
29#include "mesh.h"
30#include "refineable_mesh.h"
31// Include to fill in additional_setup_shared_node_scheme() function
33
34#ifdef OOMPH_TRANSITION_TO_VERSION_3
35
36// for the new METIS API, need to use symbols defined in the standard header
37// which aren't available in the current frozen (old) version of METIS
38// Version 3 will (presumably) have this header in the include path as standard
39#include "metis.h"
40
41#endif
42
43namespace oomph
44{
45 //====================================================================
46 /// Namespace for METIS graph partitioning routines
47 //====================================================================
48 namespace METIS
49 {
50 /// Default function that translates spatial
51 /// error into weight for METIS partitioning (unit weight regardless
52 /// of input).
54 const double& max_error,
55 const double& min_error,
56 int& weight)
57 {
58 weight = 1;
59 }
60
61 /// Function pointer to to function that translates spatial
62 /// error into weight for METIS partitioning.
64
65 } // namespace METIS
66
67
68 //==================================================================
69 /// Partition mesh uniformly by dividing elements
70 /// equally over the partitions, in the order
71 /// in which they are returned by problem.
72 /// On return, element_domain[ielem] contains the number
73 /// of the domain [0,1,...,ndomain-1] to which
74 /// element ielem has been assigned.
75 //==================================================================
77 const unsigned& ndomain,
79 {
80 // Number of elements
81 unsigned nelem = problem_pt->mesh_pt()->nelement();
82
83#ifdef PARANOID
84 if (nelem != element_domain.size())
85 {
86 std::ostringstream error_stream;
87 error_stream << "element_domain Vector has wrong length " << nelem << " "
88 << element_domain.size() << std::endl;
89
90 throw OomphLibError(
92 }
93#endif
94
95
96 // Uniform partitioning
97 unsigned nel_per_domain = int(float(nelem) / float(ndomain));
98 for (unsigned ielem = 0; ielem < nelem; ielem++)
99 {
100 unsigned idomain = unsigned(float(ielem) / float(nel_per_domain));
102 }
103 }
104
105
106 //==================================================================
107 /// Use METIS to assign each element to a domain.
108 /// On return, element_domain[ielem] contains the number
109 /// of the domain [0,1,...,ndomain-1] to which
110 /// element ielem has been assigned.
111 /// - objective=0: minimise edgecut.
112 /// - objective=1: minimise total communications volume.
113 /// .
114 /// Partioning is based on dual graph of mesh.
115 //==================================================================
117 const unsigned& ndomain,
118 const unsigned& objective,
120 {
121 // Global mesh
122 Mesh* mesh_pt = problem_pt->mesh_pt();
123
124 // Number of elements
125 unsigned nelem = mesh_pt->nelement();
126
127#ifdef PARANOID
128 if (nelem != element_domain.size())
129 {
130 std::ostringstream error_stream;
131 error_stream << "element_domain Vector has wrong length " << nelem << " "
132 << element_domain.size() << std::endl;
133
134 throw OomphLibError(
136 }
137#endif
138
139 // Setup dual graph
140 //------------------
141
142 // Start timer
144
145 // Container to collect all elements associated with given global eqn number
146 std::map<unsigned, std::set<unsigned>> elements_connected_with_global_eqn;
147
148 // Container for all unique global eqn numbers
149 std::set<unsigned> all_global_eqns;
150
151 // Loop over all elements
152 for (unsigned e = 0; e < nelem; e++)
153 {
155
156 // Add all global eqn numbers
157 unsigned ndof = el_pt->ndof();
158 for (unsigned j = 0; j < ndof; j++)
159 {
160 // Get global eqn number
161 unsigned eqn_number = el_pt->eqn_number(j);
162 elements_connected_with_global_eqn[eqn_number].insert(e);
163 all_global_eqns.insert(eqn_number);
164 }
165 }
166
167 // Now reverse the lookup scheme to find out all elements
168 // that are connected because they share the same global eqn
170
171 // Counter for total number of entries in connected_elements structure
172 unsigned count = 0;
173
174 // Loop over all global eqns
175 for (std::set<unsigned>::iterator it = all_global_eqns.begin();
176 it != all_global_eqns.end();
177 it++)
178 {
179 // Get set of elements connected with this data item
180 std::set<unsigned> elements = elements_connected_with_global_eqn[*it];
181
182 // Double loop over connnected elements: Everybody's connected to
183 // everybody
184 for (std::set<unsigned>::iterator it1 = elements.begin();
185 it1 != elements.end();
186 it1++)
187 {
188 for (std::set<unsigned>::iterator it2 = elements.begin();
189 it2 != elements.end();
190 it2++)
191 {
192 if ((*it1) != (*it2))
193 {
194 connected_elements[(*it1)].insert(*it2);
195 }
196 }
197 }
198 }
199
200
201 // Now convert into C-style packed array for interface with METIS
202 int* xadj = new int[nelem + 1];
204
205 // Reserve (too much) space
206 adjacency_vector.reserve(count);
207
208 // Initialise counters
209 unsigned ientry = 0;
210
211 // Loop over all elements
212 for (unsigned e = 0; e < nelem; e++)
213 {
214 // First entry for current element
215 xadj[e] = ientry;
216
217 // Loop over elements that are connected to current element
218 typedef std::set<unsigned>::iterator IT;
219 for (IT it = connected_elements[e].begin();
220 it != connected_elements[e].end();
221 it++)
222 {
223 // Copy into adjacency array
224 adjacency_vector.push_back(*it);
225
226 // We've just made another entry
227 ientry++;
228 }
229
230 // Entry after last entry for current element:
231 xadj[e + 1] = ientry;
232 }
233
234 // End timer
236
237 // Doc
240 << "CPU time for setup of METIS data structures [nelem="
241 << nelem << "]: " << cpu0 << " sec" << std::endl;
242
243
244 // If the adjacency vector is empty then the elements are
245 // actually unconnected (can happen in dummy problems where
246 // each element only has internal data). In that case the
247 // partition is irrelevant and we may as well distribute the
248 // elements in round-robin fashion
249 if (adjacency_vector.size() == 0)
250 {
251 unsigned n_proc = problem_pt->communicator_pt()->nproc();
253 << "Note: All elements in the Problem's Mesh appear to be\n"
254 << "unconnected. This happens, e.g. if all elements only have\n"
255 << "internal Data. Bypassing metis and distributing elements\n"
256 << "in round-robin fashion amongst the " << n_proc << " processors."
257 << std::endl;
258 for (unsigned e = 0; e < nelem; e++)
259 {
261 }
262 return;
263 }
264
265
266 // Call METIS graph partitioner
267 //-----------------------------
268
269 // Start timer
270 cpu_start = clock();
271
272 // Number of vertices in graph
273 int nvertex = nelem;
274
275 // No vertex weights
276 int* vwgt = 0;
277
278 // No edge weights
279 int* adjwgt = 0;
280
281 // Flag indicating that graph isn't weighted: 0; vertex weights only: 2
282 // Note that wgtflag==2 requires nodal weights to be stored in vwgt.
283 int wgtflag = 0;
284
285 // Use C-style numbering (first array entry is zero)
286 int numflag = 0;
287
288 // Number of desired partitions
289 int nparts = ndomain;
290
291 // Use default options
292 int* options = new int[10];
293 options[0] = 0;
294
295#ifdef OOMPH_TRANSITION_TO_VERSION_3
296 switch (objective)
297 {
298 case 0:
299 // Edge-cut minimization
300 options[0] = METIS_OBJTYPE_CUT;
301 break;
302
303 case 1:
304 // communication volume minimisation
305 options[0] = METIS_OBJTYPE_VOL;
306 break;
307
308 default:
309 std::ostringstream error_stream;
310 error_stream << "Wrong objective for METIS. objective = " << objective
311 << std::endl;
312
313 throw OomphLibError(
315 }
316#endif
317
318 // Number of cut edges in graph
319 int* edgecut = new int[nelem];
320
321 // Array containing the partition information
322 int* part = new int[nelem];
323
324 // Can we get an error estimate?
325
326 unsigned n_mesh = problem_pt->nsub_mesh();
327
328 if (n_mesh == 0)
329 {
330 RefineableMeshBase* mmesh_pt = dynamic_cast<RefineableMeshBase*>(mesh_pt);
331 if (mmesh_pt != 0)
332 {
333 // Bias distribution?
335 {
337 << "Biasing element distribution via spatial error estimate\n";
338
339 // Adjust flag and provide storage for weights
340 wgtflag = 2;
341 vwgt = new int[nelem];
342
343 // Get error for all elements
345 mmesh_pt->spatial_error_estimator_pt()->get_element_errors(
346 mesh_pt, elemental_error);
347
348 double max_error =
349 *(std::max_element(elemental_error.begin(), elemental_error.end()));
350 double min_error =
351 *(std::min_element(elemental_error.begin(), elemental_error.end()));
352
353 // Bias weights
354 int weight = 1;
355 for (unsigned e = 0; e < nelem; e++)
356 {
357 // Translate error into weight
359 elemental_error[e], max_error, min_error, weight);
360 vwgt[e] = weight;
361 }
362 }
363 }
364 }
365 else // There are submeshes
366 {
367 // Are any of the submeshes refineable?
368 bool refineable_submesh_exists = false;
369 // Vector to store "start and end point" for loops in submeshes
371 loop_helper[0] = 0;
372
373 // Loop over submeshes
374 for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
375 {
376 // Store the end of the loop
377 loop_helper[i_mesh + 1] =
378 problem_pt->mesh_pt(i_mesh)->nelement() + loop_helper[i_mesh];
379
381 dynamic_cast<RefineableMeshBase*>(problem_pt->mesh_pt(i_mesh));
382 if (mmesh_pt != 0)
383 {
385 }
386 }
387
388 // If a refineable submesh exists
390 {
391 // Bias distribution?
393 {
395 << "Biasing element distribution via spatial error estimate\n";
396
397 // Adjust flag and provide storage for weights
398 wgtflag = 2;
399 vwgt = new int[nelem];
400
401 // Loop over submeshes
402 for (unsigned i_mesh = 0; i_mesh < n_mesh; i_mesh++)
403 {
405 dynamic_cast<RefineableMeshBase*>(problem_pt->mesh_pt(i_mesh));
406 if (mmesh_pt != 0)
407 {
408 // Get error for all elements
409 unsigned nsub_elem =
412 mmesh_pt->spatial_error_estimator_pt()->get_element_errors(
413 problem_pt->mesh_pt(i_mesh), elemental_error);
414
415 double max_error = *(std::max_element(elemental_error.begin(),
416 elemental_error.end()));
417 double min_error = *(std::min_element(elemental_error.begin(),
418 elemental_error.end()));
419
420 // Bias weights
421 int weight = 1;
422 unsigned start = loop_helper[i_mesh];
423 unsigned end = loop_helper[i_mesh + 1];
424 for (unsigned e = start; e < end; e++)
425 {
426 unsigned error_index = e - start;
427 // Translate error into weight
429 elemental_error[error_index], max_error, min_error, weight);
430 vwgt[e] = weight;
431 }
432 }
433 else // This mesh is not refineable
434 {
435 // There's no error estimator, so use the default weight
436 int weight = 1;
437 unsigned start = loop_helper[i_mesh];
438 unsigned end = loop_helper[i_mesh + 1];
439 for (unsigned e = start; e < end; e++)
440 {
441 vwgt[e] = weight;
442 }
443 }
444 }
445 }
446 }
447 }
448
449#ifdef OOMPH_TRANSITION_TO_VERSION_3
450
451 // Call partitioner
452 METIS_PartGraphKway(&nvertex,
453 xadj,
455 vwgt,
456 adjwgt,
457 &wgtflag,
458 &numflag,
459 &nparts,
460 options,
461 edgecut,
462 part);
463#else
464 // original code to delete in version 3
465
466 // Call partitioner
467 if (objective == 0)
468 {
469 // Partition with the objective of minimising the edge cut
470 METIS_PartGraphKway(&nvertex,
471 xadj,
473 vwgt,
474 adjwgt,
475 &wgtflag,
476 &numflag,
477 &nparts,
478 options,
479 edgecut,
480 part);
481 }
482 else if (objective == 1)
483 {
484 // Partition with the objective of minimising the total communication
485 // volume
486 METIS_PartGraphVKway(&nvertex,
487 xadj,
489 vwgt,
490 adjwgt,
491 &wgtflag,
492 &numflag,
493 &nparts,
494 options,
495 edgecut,
496 part);
497 }
498 else
499 {
500 std::ostringstream error_stream;
501 error_stream << "Wrong objective for METIS. objective = " << objective
502 << std::endl;
503
504 throw OomphLibError(
506 }
507#endif
508
509#ifdef PARANOID
510 std::vector<bool> done(nparts, false);
511#endif
512
513 // Copy across
514 for (unsigned e = 0; e < nelem; e++)
515 {
517#ifdef PARANOID
518 done[part[e]] = true;
519#endif
520 }
521
522
523#ifdef PARANOID
524 // Check
525 std::ostringstream error_stream;
526 bool shout = false;
527 for (int p = 0; p < nparts; p++)
528 {
529 if (!done[p])
530 {
531 shout = true;
532 error_stream << "No elements on processor " << p
533 << "when trying to partition " << nelem << "elements over "
534 << nparts << " processors!\n";
535 }
536 }
537 if (shout)
538 {
539 throw OomphLibError(
541 }
542#endif
543
544
545 // End timer
546 cpu_end = clock();
547
548 // Doc
551 << "CPU time for METIS mesh partitioning [nelem="
552 << nelem << "]: " << cpu1 << " sec" << std::endl;
553
554
555 // Cleanup
556 delete[] xadj;
557 delete[] part;
558 delete[] edgecut;
559 delete[] options;
560 }
561
562
563#ifdef OOMPH_HAS_MPI
564
565
566 //==================================================================
567 /// Use METIS to assign each element in an already-distributed mesh
568 /// to a domain. On return, element_domain_on_this_proc[e] contains the number
569 /// of the domain [0,1,...,ndomain-1] to which non-halo element e on THE
570 /// CURRENT PROCESSOR ONLY has been assigned. The order of the non-halo
571 /// elements is the same as in the Problem's mesh, with the halo
572 /// elements being skipped.
573 /// Objective:
574 /// - objective=0: minimise edgecut.
575 /// - objective=1: minimise total communications volume.
576 /// .
577 /// The partioning is based on the dof graph of the complete mesh by
578 /// taking into
579 /// account which global equation numbers are affected by each element and
580 /// connecting elements which affect the same global equation number.
581 /// Partitioning is done such that all elements associated with the
582 /// same tree root move together. Non-refineable elements are
583 /// treated as their own root elements. If the optional boolean
584 /// flag is set to true (it defaults to false) each processor
585 /// assigns a dumb-but-repeatable equidistribution of its non-halo
586 /// elements over the domains and outputs the input that would have
587 /// gone into METIS in the file metis_input_for_validation.dat
588 //==================================================================
590 Problem* problem_pt,
591 const unsigned& objective,
593 const bool& bypass_metis)
594 {
595 // Start timer
597
598 // Communicator
599 OomphCommunicator* comm_pt = problem_pt->communicator_pt();
600
601 // Number of processors / domains
602 unsigned n_proc = comm_pt->nproc();
603 unsigned my_rank = comm_pt->my_rank();
604
605 // Global mesh
606 Mesh* mesh_pt = problem_pt->mesh_pt();
607
608 // Total number of elements (halo and nonhalo) on this proc
609 unsigned n_elem = mesh_pt->nelement();
610
611 // Get elemental assembly times
612 Vector<double> elemental_assembly_time =
613 problem_pt->elemental_assembly_time();
614
615#ifdef PARANOID
616 unsigned n = elemental_assembly_time.size();
617 if ((n != 0) && (n != n_elem))
618 {
619 std::ostringstream error_stream;
620 error_stream << "Number of elements doesn't match the \n"
621 << "number of elemental assembly times: " << n_elem << " "
622 << n << std::endl;
623 throw OomphLibError(
625 }
626#endif
627
628 // Can we base load balancing on assembly times?
630 if (elemental_assembly_time.size() != 0)
631 {
633 }
634
635 // Storage for global eqn numbers on current processor
636 std::set<unsigned> global_eqns_on_this_proc;
637
638 // Storage for pointers to root elements that are connected with given
639 // eqn number -- assembled on local processor
640 std::map<unsigned, std::set<GeneralisedElement*>>
642
643 // Storage for long sequence of equation numbers as encountered
644 // by the root elements on this processor
646
647 // Reserve number of elements x average/estimate (?) for number of dofs
648 // per element
650
651 // Storage for the number of eqn numbers associated with each
652 // root element on this processors -- once this and the previous
653 // container have been collected from all processors we're
654 // able to reconstruct which root element (in the nominal "global" mesh)
655 // is connected with which global equations
658
659 // Ditto for number of "leaf" elements connected with each root
662
663 // Ditto for total assembly time of "leaf" elements connected with each root
666
667 // Map storing the number of the root elements on this processor
668 // (offset by one to bypass the zero default).
669 std::map<GeneralisedElement*, unsigned> root_el_number_plus_one;
670
671 // Loop over non-halo elements on this processor
673 unsigned number_of_non_halo_elements = 0;
674 for (unsigned e = 0; e < n_elem; e++)
675 {
676 double el_assembly_time = 0.0;
678 if (!el_pt->is_halo())
679 {
681 {
682 el_assembly_time = elemental_assembly_time[e];
683 }
684
685 // Get the associated root element which is either...
688 if (ref_el_pt != 0)
689 {
690 //...the actual root element
691 root_el_pt = ref_el_pt->root_element_pt();
692 }
693 // ...or the element itself
694 else
695 {
697 }
698
699 // Have we already encountered this root element?
700 // (offset of one to bypass the default return of zero)
701 bool already_encountered = false;
704 {
705 // This is a new one
706 already_encountered = false;
707
708 // Give it a number
711
712 // Remove offset
714 }
715 else
716 {
717 // We've already visited this one before...
718 already_encountered = true;
719
720 // Remove offset
721 root_el_number -= 1;
722 }
723
724
725 // Get global equation numbers of actual element
726 unsigned n_dof = el_pt->ndof();
727 for (unsigned i = 0; i < n_dof; i++)
728 {
729 unsigned eqn_no = el_pt->eqn_number(i);
730
731 // Record which root elements are connected with this eqn number
733 root_el_pt);
734
735 // Record all global eqn numbers on this processor
737
738 // Add eqn number of the current element to the long sequence
739 // of eqn numbers
741 }
742
743 // Now record how many equations are associated with the current
744 // non-halo element
746 {
751 }
752 else
753 {
757 }
758
759 // Bump up number of non-halos
761 }
762 }
763
764 // Tell everybody how many root elements
765 // are on each processor
766 unsigned root_processor = 0;
769 1,
770 MPI_INT,
772 1,
773 MPI_INT,
774 comm_pt->mpi_comm());
775
776
777 // In the big sequence of concatenated root elements (enumerated
778 // individually on the various processors) where do the root elements from a
779 // given processor start? Also figure out how many root elements there are
780 // in total by summing up their numbers
783 for (unsigned i_proc = 0; i_proc < n_proc; i_proc++)
784 {
787 if (i_proc != 0)
788 {
791 }
792 else
793 {
794 start_index[0] = 0;
795 }
796 }
797
798
799 // How many global equations are held on this processor?
802
803 // Gather this information for all processors:
804 // n_eqns_on_each_proc[iproc] now contains the number of global
805 // equations held on processor iproc.
808 1,
809 MPI_INT,
811 1,
812 MPI_INT,
813 comm_pt->mpi_comm());
814
815
816 // In the big sequence of equation numbers from the root elements
817 // (enumerated individually on the various processors) where do the
818 // equation numbers associated with the root elements from a given
819 // processor start? Also figure out how long the sequence of equation
820 // numbers is
822 unsigned total_n_eqn = 0;
823 for (unsigned i_proc = 0; i_proc < n_proc; i_proc++)
824 {
826 if (i_proc != 0)
827 {
829 }
830 else
831 {
832 start_eqns_index[0] = 0;
833 }
834 }
835
836
837 // Big vector that contains the number of dofs for each root element
838 // (concatenated in processor-by-processor order)
841 // Create at least one entry so we don't get a seg fault below
843 {
845 }
847 &number_of_dofs_for_root_element[0], // pointer to first entry in
848 // vector to be gathered on root
849 number_of_root_elements, // Number of entries to be sent
850 // from current processor
852 &number_of_dofs_for_global_root_element[0], // Target -- this will
853 // store the concatenated
854 // vectors sent from
855 // everywhere
856 &number_of_root_elements_on_each_proc[0], // Pointer to
857 // vector containing
858 // the length of the
859 // vectors received
860 // from elsewhere
861 &start_index[0], // "offset" for storage of vector received
862 // from various processors in the global
863 // concatenated vector stored on root
866 comm_pt->mpi_comm());
867
868
869 // ditto for number of non-halo elements associated with root element
872
873 // Create at least one entry so we don't get a seg fault below
875 {
877 }
879 // pointer to first entry in
880 // vector to be gathered on root
881 number_of_root_elements, // Number of entries to be sent
882 // from current processor
885 // Target -- this will
886 // store the concatenated
887 // vectors sent from
888 // everywhere
889 &number_of_root_elements_on_each_proc[0], // Pointer to
890 // vector containing
891 // the length of the
892 // vectors received
893 // from elsewhere
894 &start_index[0], // "offset" for storage of vector received
895 // from various processors in the global
896 // concatenated vector stored on root
899 comm_pt->mpi_comm());
900
901
902 // ditto for assembly times elements associated with root element
905
906 // Create at least one entry so we don't get a seg fault below
908 {
910 }
912 // pointer to first entry in
913 // vector to be gathered on root
914 number_of_root_elements, // Number of entries to be sent
915 // from current processor
918 // Target -- this will
919 // store the concatenated
920 // vectors sent from
921 // everywhere
922 &number_of_root_elements_on_each_proc[0], // Pointer to
923 // vector containing
924 // the length of the
925 // vectors received
926 // from elsewhere
927 &start_index[0], // "offset" for storage of vector received
928 // from various processors in the global
929 // concatenated vector stored on root
932 comm_pt->mpi_comm());
933
934
935 // Big vector to store the long sequence of global equation numbers
936 // associated with the long sequence of root elements
938
939 // Create at least one entry so we don't get a seg fault below
941 {
943 }
952 comm_pt->mpi_comm());
953
954 // Doc
956
959 << "CPU time for global setup of METIS data structures [nroot_elem="
960 << total_number_of_root_elements << "]: " << cpu0 << " sec" << std::endl;
961
962
963 // Now the root processor has gathered all the data needed to establish
964 // the root element connectivity (as in the serial case) so use METIS
965 // to determine "partitioning" for non-uniformly refined mesh
966 //----------------------------------------------------------------------
967
968 // Vector to store target domain for each of the root elements (concatenated
969 // in processor-by-processor order)
971 if (my_rank == root_processor) //--
972 {
973 // Start timer
975
976 // Repeat the steps used in the serial code: Storage for
977 // the global equations (on root processor)
978 std::set<unsigned> all_global_eqns_root_processor;
979
980 // Set of root elements (as enumerated in the processor-by-processor
981 // order) associated with given global equation number
982 std::map<unsigned, std::set<unsigned>>
984
985 // Retrace the steps of the serial code: Who's connected with who
986 unsigned count_all = 0;
987 for (unsigned e = 0; e < total_number_of_root_elements; e++)
988 {
990 for (unsigned n = 0; n < n_eqn_no; n++)
991 {
993 count_all++;
995 .insert(e);
997 }
998 }
999
1000 // Number of domains
1001 unsigned ndomain = n_proc;
1002
1003 // Now reverse the lookup scheme to find out all root elements
1004 // that are connected because they share the same global eqn
1007
1008 // Counter for total number of entries in connected_root_elements
1009 // structure
1010 unsigned count = 0;
1011
1012 // Loop over all global eqns
1013 for (std::set<unsigned>::iterator it =
1016 it++)
1017 {
1018 // Get set of root elements connected with this data item
1019 std::set<unsigned> root_elements =
1021
1022 // Double loop over connnected root elements: Everybody's connected to
1023 // everybody
1024 for (std::set<unsigned>::iterator it1 = root_elements.begin();
1025 it1 != root_elements.end();
1026 it1++)
1027 {
1028 for (std::set<unsigned>::iterator it2 = root_elements.begin();
1029 it2 != root_elements.end();
1030 it2++)
1031 {
1032 if ((*it1) != (*it2))
1033 {
1034 connected_root_elements[(*it1)].insert(*it2);
1035 }
1036 }
1037 }
1038 }
1039
1040 // End timer
1041 clock_t cpu_end = clock();
1042
1043 // Doc
1045 oomph_info << "CPU time for setup of connected elements (load balance) "
1046 "[nroot_elem="
1047 << total_number_of_root_elements << "]: " << cpu0b << " sec"
1048 << std::endl;
1049
1050 // Now convert into C-style packed array for interface with METIS
1051 cpu_start = clock();
1052 int* xadj = new int[total_number_of_root_elements + 1];
1054
1055 // Reserve (too much) space
1056 adjacency_vector.reserve(count);
1057
1058 // Initialise counters
1059 unsigned ientry = 0;
1060
1061 // Loop over all elements
1062 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1063 {
1064 // First entry for current element
1065 xadj[e] = ientry;
1066
1067 // Loop over elements that are connected to current element
1068 typedef std::set<unsigned>::iterator IT;
1070 it != connected_root_elements[e].end();
1071 it++)
1072 {
1073 // Copy into adjacency array
1074 adjacency_vector.push_back(*it);
1075
1076 // We've just made another entry
1077 ientry++;
1078 }
1079
1080 // Entry after last entry for current element:
1081 xadj[e + 1] = ientry;
1082 }
1083
1084 // End timer
1085 cpu_end = clock();
1086
1087 // Doc
1089 oomph_info << "CPU time for setup of METIS data structures (load "
1090 "balance) [nroot_elem="
1091 << total_number_of_root_elements << "]: " << cpu0 << " sec"
1092 << std::endl;
1093
1094
1095 // Call METIS graph partitioner
1096 //-----------------------------
1097
1098 // Start timer
1099 cpu_start = clock();
1100
1101 // Number of vertices in graph
1102 int nvertex = total_number_of_root_elements;
1103
1104 // No vertex weights
1105 int* vwgt = 0;
1106
1107 // No edge weights
1108 int* adjwgt = 0;
1109
1110 // Flag indicating that graph isn't weighted: 0; vertex weights only: 2
1111 // Note that wgtflag==2 requires nodal weights to be stored in vwgt.
1112 int wgtflag = 0;
1113
1114 // Use C-style numbering (first array entry is zero)
1115 int numflag = 0;
1116
1117 // Number of desired partitions
1118 int nparts = ndomain;
1119
1120 // Use default options
1121 int* options = new int[10];
1122 options[0] = 0;
1123
1124#ifdef OOMPH_TRANSITION_TO_VERSION_3
1125 switch (objective)
1126 {
1127 case 0:
1128 // Edge-cut minimization
1129 options[0] = METIS_OBJTYPE_CUT;
1130 break;
1131
1132 case 1:
1133 // communication volume minimisation
1134 options[0] = METIS_OBJTYPE_VOL;
1135 break;
1136
1137 default:
1138 std::ostringstream error_stream;
1139 error_stream << "Wrong objective for METIS. objective = " << objective
1140 << std::endl;
1141
1142 throw OomphLibError(error_stream.str(),
1145 }
1146#endif
1147
1148 // Number of cut edges in graph
1149 int* edgecut = new int[total_number_of_root_elements];
1150
1151 // Array containing the partition information
1152 int* part = new int[total_number_of_root_elements];
1153
1154 // Now bias distribution by giving each root element
1155 // a weight equal to the number of elements associated with it
1156
1157 // Adjust flag and provide storage for weights
1158 wgtflag = 2;
1160
1161
1162 // Load balance based on assembly times of all leaf
1163 // elements associated with root
1165 {
1166 oomph_info << "Basing distribution on assembly times of elements\n";
1167
1168 // Normalise
1169 double min_time = *(
1170 std::min_element(total_assembly_time_for_global_root_element.begin(),
1172#ifdef PARANOID
1173 if (min_time == 0.0)
1174 {
1175 std::ostringstream error_stream;
1176 error_stream << "Minimum assemble time for element is zero!\n";
1177 throw OomphLibError(error_stream.str(),
1180 }
1181#endif
1182
1183 // Bypass METIS (usually for validation) and use made-up but
1184 // repeatable timings
1185 if (bypass_metis)
1186 {
1187 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1188 {
1189 vwgt[e] = e;
1190 }
1191 }
1192 else
1193 {
1194 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1195 {
1196 // Use assembly times (relative to minimum) as weight
1197 vwgt[e] =
1199 }
1200 }
1201 }
1202 // Load balanced based on number of leaf elements associated with
1203 // root
1204 else
1205 {
1206 oomph_info << "Basing distribution on number of elements\n";
1207 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1208 {
1210 }
1211 }
1212
1213 // Bypass METIS (usually for validation)
1214 if (bypass_metis)
1215 {
1216 // Simple repeatable partition: Equidistribute root element
1217 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1218 {
1219 // Simple repeatable partition: Equidistribute elements on each
1220 // processor
1221 part[e] = (n_proc - 1) -
1222 unsigned(double(e) / double(total_number_of_root_elements) *
1223 double(n_proc));
1224 }
1225
1227 << "Bypassing METIS for validation purposes.\n"
1228 << "Appending input for metis in metis_input_for_validation.dat\n";
1229 std::ofstream outfile;
1230 outfile.open("metis_input_for_validation.dat", std::ios_base::app);
1231
1232 // Dump out relevant input to metis
1233 for (unsigned e = 0; e < total_number_of_root_elements + 1; e++)
1234 {
1235 outfile << xadj[e] << std::endl;
1236 }
1237 unsigned n = adjacency_vector.size();
1238 for (unsigned i = 0; i < n; i++)
1239 {
1240 outfile << adjacency_vector[i] << std::endl;
1241 }
1242 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1243 {
1244 outfile << vwgt[e] << std::endl;
1245 }
1246 outfile.close();
1247 }
1248 // Actually use METIS (good but not always repeatable!)
1249 else
1250 {
1251#ifdef OOMPH_TRANSITION_TO_VERSION_3
1252
1253 METIS_PartGraphKway(&nvertex,
1254 xadj,
1255 &adjacency_vector[0],
1256 vwgt,
1257 adjwgt,
1258 &wgtflag,
1259 &numflag,
1260 &nparts,
1261 options,
1262 edgecut,
1263 part);
1264#else
1265 // for old version of METIS; these two functions have been merged
1266 // in the new METIS API
1267
1268 if (objective == 0)
1269 {
1270 // Partition with the objective of minimising the edge cut
1271 METIS_PartGraphKway(&nvertex,
1272 xadj,
1273 &adjacency_vector[0],
1274 vwgt,
1275 adjwgt,
1276 &wgtflag,
1277 &numflag,
1278 &nparts,
1279 options,
1280 edgecut,
1281 part);
1282 }
1283 else if (objective == 1)
1284 {
1285 // Partition with the objective of minimising the total communication
1286 // volume
1287 METIS_PartGraphVKway(&nvertex,
1288 xadj,
1289 &adjacency_vector[0],
1290 vwgt,
1291 adjwgt,
1292 &wgtflag,
1293 &numflag,
1294 &nparts,
1295 options,
1296 edgecut,
1297 part);
1298 }
1299#endif
1300 }
1301
1302 // Copy across
1304 for (unsigned e = 0; e < total_number_of_root_elements; e++)
1305 {
1308 }
1309
1310 // Document success of partitioning
1311 for (unsigned j = 0; j < n_proc; j++)
1312 {
1313 oomph_info << "Total weight on proc " << j << " is "
1314 << total_weight_on_proc[j] << std::endl;
1315 }
1316
1317 // Doc
1319 oomph_info << "CPU time for METIS mesh partitioning [nroot_elem="
1320 << total_number_of_root_elements << "]: " << cpu1 << " sec"
1321 << std::endl;
1322
1323 // Cleanup
1324 delete[] xadj;
1325 delete[] part;
1326 delete[] vwgt;
1327 delete[] edgecut;
1328 delete[] options;
1329 }
1330
1331 // Now scatter things back to processors: root_element_domain[] contains
1332 // the target domain for all elements (concatenated in processor-by
1333 // processor order on the root processor). Distribute this back
1334 // to the processors so that root_element_domain_on_this_proc[e] contains
1335 // the target domain for root element e (in whatever order the processor
1336 // decided to line up its root elements).
1337 cpu_start = clock();
1339
1340 // Create at least one entry so we don't get a seg fault below
1342 {
1344 }
1347 &start_index[0],
1353 comm_pt->mpi_comm());
1354
1355
1356 // Now translate back into target domain for the actual (non-root)
1357 // elements
1359 unsigned count_non_halo = 0;
1360 for (unsigned e = 0; e < n_elem; e++)
1361 {
1362 GeneralisedElement* el_pt = mesh_pt->element_pt(e);
1363 if (!el_pt->is_halo())
1364 {
1365 // Get the associated root element which is either...
1368 if (ref_el_pt != 0)
1369 {
1370 //...the actual root element
1371 root_el_pt = ref_el_pt->root_element_pt();
1372 }
1373 // ...or the element itself
1374 else
1375 {
1376 root_el_pt = el_pt;
1377 }
1378
1379 // Recover the root element number (offset by one)
1381
1382 // Copy target domain across from root element
1385
1386 // Bump up counter for non-root elements
1388 }
1389 }
1390
1391
1392#ifdef PARANOID
1394 {
1395 std::ostringstream error_stream;
1396 error_stream << "Non-halo counts don't match: " << count_non_halo << " "
1397 << number_of_non_halo_elements << std::endl;
1398
1399 throw OomphLibError(
1401 }
1402#endif
1403
1404 // End timer
1405 cpu_end = clock();
1406
1407 // Doc
1409 oomph_info << "CPU time for communication of partition to all processors "
1410 "[nroot_elem="
1411 << total_number_of_root_elements << "]: " << cpu2 << " sec"
1412 << std::endl;
1413 }
1414
1415
1416#endif
1417
1418} // namespace oomph
e
Definition cfortran.h:571
cstr elem_len * i
Definition cfortran.h:603
double size() const
Calculate the size of the element (length, area, volume,...) in Eulerian computational coordinates....
Definition elements.cc:4320
A Generalised Element class.
Definition elements.h:73
bool is_halo() const
Is this element a halo?
Definition elements.h:1167
unsigned ndof() const
Return the number of equations/dofs in the element.
Definition elements.h:839
unsigned long eqn_number(const unsigned &ieqn_local) const
Return the global equation number corresponding to the ieqn_local-th local equation number.
Definition elements.h:708
A general mesh class.
Definition mesh.h:67
GeneralisedElement *& element_pt(const unsigned long &e)
Return pointer to element e.
Definition mesh.h:448
unsigned long nelement() const
Return number of elements in the mesh.
Definition mesh.h:590
An oomph-lib wrapper to the MPI_Comm communicator object. Just contains an MPI_Comm object (which is ...
An OomphLibError object which should be thrown when an run-time error is encountered....
////////////////////////////////////////////////////////////////// //////////////////////////////////...
Definition problem.h:154
OomphCommunicator * communicator_pt()
access function to the oomph-lib communicator
Definition problem.h:1266
Vector< double > elemental_assembly_time()
Return vector of most-recent elemental assembly times (used for load balancing). Zero sized if no Jac...
Definition problem.h:884
unsigned nsub_mesh() const
Return number of submeshes.
Definition problem.h:1343
Mesh *& mesh_pt()
Return a pointer to the global mesh.
Definition problem.h:1300
RefineableElements are FiniteElements that may be subdivided into children to provide a better local ...
Base class for refineable meshes. Provides standardised interfaces for the following standard mesh ad...
//////////////////////////////////////////////////////////////////////
void uniform_partition_mesh(Problem *problem_pt, const unsigned &ndomain, Vector< unsigned > &element_domain)
Partition mesh uniformly by dividing elements equally over the partitions, in the order in which they...
void partition_distributed_mesh(Problem *problem_pt, const unsigned &objective, Vector< unsigned > &element_domain_on_this_proc, const bool &bypass_metis=false)
Use METIS to assign each element in an already-distributed mesh to a domain. On return,...
void partition_mesh(Problem *problem_pt, const unsigned &ndomain, const unsigned &objective, Vector< unsigned > &element_domain)
Use METIS to assign each element to a domain. On return, element_domain[ielem] contains the number of...
void(* ErrorToWeightFctPt)(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Typedef for function pointer to to function that translates spatial error into weight for METIS parti...
ErrorToWeightFctPt Error_to_weight_fct_pt
Function pointer to to function that translates spatial error into weight for METIS partitioning.
void default_error_to_weight_fct(const double &spatial_error, const double &max_error, const double &min_error, int &weight)
Default function that translates spatial error into weight for METIS partitioning (unit weight regard...
//////////////////////////////////////////////////////////////////// ////////////////////////////////...
void METIS_PartGraphKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function.
void METIS_PartGraphVKway(int *, int *, int *, int *, int *, int *, int *, int *, int *, int *, int *)
Metis graph partitioning function – decomposes nodal graph based on minimum communication volume.
OomphInfo oomph_info
Single (global) instantiation of the OomphInfo object – this is used throughout the library as a "rep...