superlu_dist.c
Go to the documentation of this file.
1
2
3/*----------------------------------------------------------------
4 Interface to distributed SuperLU, created by JWB by adapting
5 code in the superlu distribution, i.e. files /SRC/pdgssvx.c
6 (function pdgssvx solves a system of linear equations) and
7 /EXAMPLE/pddrive.c demo (a driver program to illustrate how to
8 use pdgssvx).
9 Essentially the code below performs the same functions as
10 pdgssvx in a modified order which allows resolves. Much of the
11 code taken from pdgssvx remains essentially unchanged other
12 than changing the layout to match the oomph-lib standard. Comments
13 from the original code have been left unchanged whenever possible
14 to help match this code with that found in pdgssvx.c
15
16 To update this driver code for use with later versions of
17 Distributed SuperLU I suggest first looking at changes (if any) to
18 the two distributed SuperLU files, and then making the corresponding
19 changes to the code below.
20
21 Adapted from code found in:
22 -- Distributed SuperLU routine (version 2.0) --
23 Lawrence Berkeley National Lab, Univ. of California Berkeley.
24 March 15, 2003
25
26 ----------------------------------------------------------------
27*/
28#include <math.h>
29#ifdef USING_OOMPH_SUPERLU_DIST
30#include "oomph_superlu_dist_3.0.h"
31#else
32#include<superlu_defs.h>
33#include<superlu_ddefs.h>
34#include<Cnames.h>
35#include<machines.h>
36#include<psymbfact.h>
37#include<supermatrix.h>
38#include<old_colamd.h>
39#include<util_dist.h>
40#endif
41
42
43/* ================================================= */
44/* Struct for the lu factors */
45/* ================================================= */
46typedef struct
47{
48 gridinfo_t* grid;
49 SuperMatrix* A;
50 SuperMatrix* AC;
51 ScalePermstruct_t* ScalePermstruct;
52 LUstruct_t* LUstruct;
53 SOLVEstruct_t* SOLVEstruct;
54 superlu_options_t* options;
55 int_t rowequ;
56 int_t colequ;
57 double anorm;
59
60
61/* ================================================= */
62/* Can't think of any other way to store the memory */
63/* stats... (PM) */
64/* ================================================= */
66{
67 // Storage for the memory stats
68 mem_usage_t Memory_usage;
69
70 // Boolean
73
74/* ========================================================================= */
75/* Helper to record memory usage*/
76/* ========================================================================= */
78{
79 // If the LU decomposition has been stored
81 {
83 }
84 else
85 {
86 return 0.0;
87 }
88} // End of get_lu_factor_memory_usage_in_bytes
89
90/* ========================================================================= */
91/* Helper to record memory usage*/
92/* ========================================================================= */
94{
95 // If the LU decomposition has been stored
97 {
99 }
100 else
101 {
102 return 0.0;
103 }
104} // End of get_total_memory_usage_in_bytes
105
106/* ========================================================================= */
107/* Helper to record memory usage*/
108/* ========================================================================= */
109void get_memory_usage_in_bytes_dist(double* lu_factor_memory,
110 double* total_memory)
111{
112 (*lu_factor_memory)=symbolic_memory_statistics_storage.Memory_usage.for_lu;
114}
115
116//=============================================================================
117// helper method - just calls the superlu method dCompRow_to_CompCol to convert
118// the c-style vectors of a cr matrix to a cc matrix
119//=============================================================================
120void superlu_cr_to_cc(int nrow, int ncol, int nnz, double* cr_values,
121 int* cr_index, int* cr_start, double** cc_values,
122 int** cc_index, int** cc_start)
123{
124 dCompRow_to_CompCol(nrow,ncol,nnz,cr_values,cr_index,cr_start,cc_values,
125 cc_index,cc_start);
126}
127
128/*----------------------------------------------------------------
129 Bridge to distributed SuperLU with distributed memory (version 2.0).
130 Requires input of system matrix in compressed row form.
131
132 Parameters:
133 op_flag = int specifies the operation:
134 1, performs LU decomposition for the first time
135 2, performs triangular solve
136 3, free all the storage in the end
137 - n = size of system (square matrix)
138 - nnz = # of nonzero entries
139 - values = 1D C array of nonzero entries
140 - row_index = 1D C array of row indices
141 - column_start = 1D C array of column start indices
142 - b = 1D C array representing the rhs vector, is overwritten
143 by solution.
144 - nprow = # of rows in process grid
145 - npcol = # of columns in process grid
146 - doc = 0/1 for doc/no doc
147 - data = pointer to structure to contain LU solver data.
148 If *opt_flag == 1, it is an output. Otherwise, it it an input.
149
150 Return value for *info:
151 = 0: successful exit
152 > 0: if *info = i, and i is
153 <= A->ncol: U(i,i) is exactly zero. The factorization has
154 been completed, but the factor U is exactly singular,
155 so the solution could not be computed.
156 > A->ncol: number of bytes allocated when memory allocation
157 failure occurred, plus A->ncol.
158 < 0: some other error
159 ----------------------------------------------------------------
160*/
161void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations,
162 int n, int nnz_local,
163 int nrow_local,int first_row,
164 double *values, int *col_index,
165 int *row_start, double *b,
166 int nprow, int npcol,
167 int doc, void **data, int *info,
168 MPI_Comm comm)
169{
170 /* Some SuperLU structures */
171 superlu_options_t *options;
172 SuperLUStat_t stat;
173 SuperMatrix *A;
174 ScalePermstruct_t *ScalePermstruct;
175 LUstruct_t *LUstruct;
176 SOLVEstruct_t *SOLVEstruct;
177 gridinfo_t *grid;
178
179 /* Structure to hold SuperLU structures and data */
180 superlu_dist_data *superlu_data;
181
182 int_t *perm_r; /* row permutations from partial pivoting */
183 int_t *perm_c; /* column permutation vector */
184 int_t *etree; /* elimination tree */
185 int_t *rowptr=NULL; int_t *colind; /* Local A in NR*/
186 int_t job, rowequ, colequ, iinfo, need_value, i, j, irow, icol;
187 int_t m_loc, fst_row, nnz, nnz_loc; /* dist_mem_use; */
188 int_t *colptr, *rowind;
189 NRformat_loc *Astore;
190 SuperMatrix GA; /* Global A in NC format */
191 NCformat *GAstore;
192 double *a_GA=NULL;
193 SuperMatrix GAC; /* Global A in NCP format (add n end pointers) */
194 NCPformat *GACstore;
195 Glu_persist_t *Glu_persist;
196 Glu_freeable_t *Glu_freeable=NULL;
197
198 /* Other stuff needed by SuperLU */
199 double *berr=NULL;
200 double *a=NULL; double *X, *b_col;
201 double *B=b;
202 double *C, *R, *C1, *R1, *x_col; /* *bcol, */
203 double amax, t, colcnd, rowcnd; double anorm=0.0;
204 char equed[1], norm[1];
205 int ldx; /* LDA for matrix X (local). */
206 //static mem_usage_t symb_mem_usage;
207 fact_t Fact;
208 int_t Equil, factored, notran, permc_spec;
209
210 /* We're only doing single rhs problems
211 note: code will need modifying to deal with
212 multiple rhs (see function pdgssvx) */
213
214 int nrhs=1;
215
216 /* Square matrix */
217 int m=n;
218
219 /* Set 'Leading dimension' of rhs vector */
220 int ldb=n;
221
222 /* Initialize the statistics variables. */
223 PStatInit(&stat);
224
225 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
226 SET UP GRID, FACTORS, ETC
227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
228 if (opt_flag==1)
229 {
230 /* Allocate data structure to store data between calls to this function */
231 superlu_data =
232 (superlu_dist_data *) SUPERLU_MALLOC(sizeof(superlu_dist_data));
233
234 /* Initialize the superlu process grid. */
235 grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));
236 superlu_gridinit(comm, nprow, npcol, grid);
237 superlu_data->grid = grid;
238
239 /* Bail out if I do not belong in the grid. */
240 int iam = grid->iam;
241 if (iam >= nprow * npcol) return;
242
243 /* Allocate memory for SuperLU_DIST structures */
244 options = (superlu_options_t *) SUPERLU_MALLOC(sizeof(superlu_options_t));
245 A = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
246 ScalePermstruct =
247 (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
248 LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t));
249 SOLVEstruct = (SOLVEstruct_t *) SUPERLU_MALLOC(sizeof(SOLVEstruct_t));
250
251 /* Create SuperMatrix from compressed row representation */
252 dCreate_CompRowLoc_Matrix_dist(A,m,n,nnz_local,nrow_local,first_row,
253 values,col_index,row_start,
254 SLU_NR_loc,SLU_D,SLU_GE);
255
256 /* Set the default options */
257 set_default_options_dist(options);
258
259 /* Is the matrix transposed (NOTRANS or TRANS)? */
260 options->Trans = NOTRANS;
261
262 /* Row permutations (NATURAL [= do nothing], */
263 /* LargeDiag [default], ...)? */
264 /* options->RowPerm=NATURAL; */
265 options->RowPerm=LargeDiag;
266
267 /* Column permutations (NATURAL [= do nothing], */
268 /* MMD_AT_PLUS_A [default],...) */
269 options->ColPerm=MMD_AT_PLUS_A;
270
271 /* Use natural ordering instead? */
272 if (allow_permutations==0)
273 {
274 options->ColPerm=NATURAL;
275 options->RowPerm=NATURAL;
276 }
277
278 /* printf("\n\n\nSWITCHING OFF EQUILIBRATION\n\n\n"); */
279 /* options->Equil=NO; */
280
281 /* Iterative refinement (essential as far as I can tell).*/
282 /* Can be "NO" or "DOUBLE"*/
283 options->IterRefine = SLU_DOUBLE;
284
285 /* Print stats during solve? */
286 if (doc==0)
287 {
288 options->PrintStat = YES;
289 }
290 else
291 {
292 options->PrintStat = NO;
293 }
294
295
296 /* Doc output on process 0 if required: */
297 if ((!iam)&&(doc==0))
298 {
299 printf("\nPerforming SuperLU_DIST setup\n");
300 printf("Process grid\t%d X %d\n", grid->nprow, grid->npcol);
301 print_options_dist(options);
302 }
303
304 /* Initialize ScalePermstruct and LUstruct. */
305 ScalePermstructInit(m, n, ScalePermstruct);
306 LUstructInit(m, n, LUstruct);
307
308 /* Initialization. */
309 Glu_persist = LUstruct->Glu_persist;
310 Astore = (NRformat_loc *) A->Store;
311 nnz_loc = Astore->nnz_loc;
312 m_loc = Astore->m_loc;
313 fst_row = Astore->fst_row;
314 a = Astore->nzval;
315 rowptr = Astore->rowptr;
316 colind = Astore->colind;
317
318 job = 5;
319 Fact = options->Fact;
320 factored = (Fact == FACTORED);
321 Equil = (!factored && options->Equil == YES);
322 notran = (options->Trans == NOTRANS);
323 rowequ = colequ = FALSE;
324 if (factored || (Fact == SamePattern_SameRowPerm && Equil))
325 {
326 rowequ = (ScalePermstruct->DiagScale == ROW) ||
327 (ScalePermstruct->DiagScale == BOTH);
328 colequ = (ScalePermstruct->DiagScale == COL) ||
329 (ScalePermstruct->DiagScale == BOTH);
330 }
331 else
332 {
333 rowequ = colequ = FALSE;
334 }
335
336 /* Test the control parameters etc. */
337 *info = 0;
338 if (Fact < 0 || Fact > FACTORED)
339 {
340 *info = -1;
341 }
342 else if (options->RowPerm < 0 || options->RowPerm > MY_PERMR)
343 {
344 *info = -1;
345 }
346 else if (options->ColPerm < 0 || options->ColPerm > MY_PERMC)
347 {
348 *info = -1;
349 }
350 else if (options->IterRefine < 0 || options->IterRefine > SLU_EXTRA)
351 {
352 *info = -1;
353 }
354 else if (options->IterRefine == SLU_EXTRA)
355 {
356 *info = -1;
357 fprintf(stderr, "Extra precise iterative refinement yet to support.\n");
358 }
359 else if (A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
360 || A->Dtype != SLU_D || A->Mtype != SLU_GE)
361 {
362 *info = -2;
363 }
364 else if (ldb < m_loc)
365 {
366 *info = -5;
367 }
368 else if (nrhs < 0)
369 {
370 *info = -6;
371 }
372 if (*info)
373 {
374 printf("Trouble in pdgstrf. Info=%i\n",*info);
375 if (*info==-1)
376 {
377 printf("Error in options.\n");
378 }
379 else if (*info==-2)
380 {
381 printf("Error in matrix.\n");
382 }
383 else if (*info==-5)
384 {
385 printf("ldb < m_loc\n");
386 }
387 else if (*info==-6)
388 {
389 printf("nrhs < 0\n");
390 }
391 return;
392 }
393
394 /* The following arrays are replicated on all processes. */
395 perm_r = ScalePermstruct->perm_r;
396 perm_c = ScalePermstruct->perm_c;
397 etree = LUstruct->etree;
398 R = ScalePermstruct->R;
399 C = ScalePermstruct->C;
400
401 /* Allocate storage. */
402 if (Equil)
403 {
404 /* Not factored & ask for equilibration */
405 /* Allocate storage if not done so before. */
406 switch (ScalePermstruct->DiagScale)
407 {
408 case NOEQUIL:
409 if (!(R = (double *) doubleMalloc_dist(m)))
410 ABORT("Malloc fails for R[].");
411 if (!(C = (double *) doubleMalloc_dist(n)))
412 ABORT("Malloc fails for C[].");
413 ScalePermstruct->R = R;
414 ScalePermstruct->C = C;
415 break;
416 case ROW:
417 if (!(C = (double *) doubleMalloc_dist(n)))
418 ABORT("Malloc fails for C[].");
419 ScalePermstruct->C = C;
420 break;
421 case COL:
422 if (!(R = (double *) doubleMalloc_dist(m)))
423 ABORT("Malloc fails for R[].");
424 ScalePermstruct->R = R;
425 break;
426 default:
427 printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL);
428 ABORT("Never get here.");
429 break;
430 }
431 }
432
433 /* ------------------------------------------------------------
434 Diagonal scaling to equilibrate the matrix.
435 ------------------------------------------------------------*/
436 if (Equil)
437 {
438 t = SuperLU_timer_();
439
440 if (Fact == SamePattern_SameRowPerm)
441 {
442 /* Reuse R and C. */
443 switch (ScalePermstruct->DiagScale)
444 {
445 case NOEQUIL:
446 break;
447 case ROW:
448 irow = fst_row;
449 for (j = 0; j < m_loc; ++j)
450 {
451 for (i = rowptr[j]; i < rowptr[j+1]; ++i)
452 {
453 a[i] *= R[irow]; /* Scale rows. */
454 }
455 ++irow;
456 }
457 break;
458 case COL:
459 for (j = 0; j < m_loc; ++j)
460 {
461 for (i = rowptr[j]; i < rowptr[j+1]; ++i)
462 {
463 icol = colind[i];
464 a[i] *= C[icol]; /* Scale columns. */
465 }
466 }
467 break;
468 case BOTH:
469 irow = fst_row;
470 for (j = 0; j < m_loc; ++j)
471 {
472 for (i = rowptr[j]; i < rowptr[j+1]; ++i)
473 {
474 icol = colind[i];
475 a[i] *= R[irow] * C[icol]; /* Scale rows and cols. */
476 }
477 ++irow;
478 }
479 break;
480 }
481 }
482 else
483 {
484 /* Compute R & C from scratch */
485 /* Compute the row and column scalings. */
486 pdgsequ(A, R, C, &rowcnd, &colcnd, &amax, &iinfo, grid);
487
488 /* Equilibrate matrix A if it is badly-scaled. */
489 pdlaqgs(A, R, C, rowcnd, colcnd, amax, equed);
490
491 if (lsame_(equed, "R"))
492 {
493 ScalePermstruct->DiagScale = rowequ = ROW;
494 }
495 else if (lsame_(equed, "C"))
496 {
497 ScalePermstruct->DiagScale = colequ = COL;
498 }
499 else if (lsame_(equed, "B"))
500 {
501 ScalePermstruct->DiagScale = BOTH;
502 rowequ = ROW;
503 colequ = COL;
504 }
505 else
506 ScalePermstruct->DiagScale = NOEQUIL;
507 } /* if Fact ... */
508
509 stat.utime[EQUIL] = SuperLU_timer_() - t;
510 } /* if Equil ... */
511
512 if (!factored)
513 {
514 /* Skip this if already factored. */
515 /*
516 Gather A from the distributed compressed row format to
517 global A in compressed column format.
518 Numerical values are gathered only when a row permutation
519 for large diagonal is sought after.
520 */
521 if (Fact != SamePattern_SameRowPerm)
522 {
523 need_value = (options->RowPerm == LargeDiag);
524 pdCompRow_loc_to_CompCol_global(need_value, A, grid, &GA);
525 GAstore = (NCformat *) GA.Store;
526 colptr = GAstore->colptr;
527 rowind = GAstore->rowind;
528 nnz = GAstore->nnz;
529 if (need_value) a_GA = GAstore->nzval;
530 else assert(GAstore->nzval == NULL);
531 }
532
533 /* ------------------------------------------------------------
534 Find the row permutation for A.
535 ------------------------------------------------------------*/
536 if ((int) options->RowPerm != (int) NO)
537 {
538 t = SuperLU_timer_();
539 if (Fact != SamePattern_SameRowPerm)
540 {
541 if (options->RowPerm == MY_PERMR)
542 {
543 /* Use user's perm_r. */
544 /* Permute the global matrix GA for symbfact() */
545 for (i = 0; i < colptr[n]; ++i)
546 {
547 irow = rowind[i];
548 rowind[i] = perm_r[irow];
549 }
550 }
551 else
552 {
553 /* options->RowPerm == LargeDiag */
554 /* Get a new perm_r[] */
555 if (job == 5)
556 {
557 /* Allocate storage for scaling factors. */
558 if (!(R1 = doubleMalloc_dist(m)))
559 {
560 ABORT("SUPERLU_MALLOC fails for R1[]");
561 }
562 if (!(C1 = doubleMalloc_dist(n)))
563 {
564 ABORT("SUPERLU_MALLOC fails for C1[]");
565 }
566 }
567
568 if (!iam)
569 {
570 /* Process 0 finds a row permutation */
571 dldperm(job, m, nnz, colptr, rowind, a_GA, perm_r, R1, C1);
572
573 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
574 if (job == 5 && Equil)
575 {
576 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
577 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
578 }
579 }
580 else
581 {
582 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
583 if (job == 5 && Equil)
584 {
585 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
586 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
587 }
588 }
589
590 if (job == 5)
591 {
592 if (Equil)
593 {
594 for (i = 0; i < n; ++i)
595 {
596 R1[i] = exp(R1[i]);
597 C1[i] = exp(C1[i]);
598 }
599
600 /* Scale the distributed matrix */
601 irow = fst_row;
602 for (j = 0; j < m_loc; ++j)
603 {
604 for (i = rowptr[j]; i < rowptr[j+1]; ++i)
605 {
606 icol = colind[i];
607 a[i] *= R1[irow] * C1[icol];
608 }
609 ++irow;
610 }
611
612 /* Multiply together the scaling factors. */
613 if (rowequ)
614 {
615 for (i = 0; i < m; ++i)
616 {
617 R[i] *= R1[i];
618 }
619 }
620 else
621 {
622 for (i = 0; i < m; ++i)
623 {
624 R[i] = R1[i];
625 }
626 }
627 if (colequ)
628 {
629 for (i = 0; i < n; ++i)
630 {
631 C[i] *= C1[i];
632 }
633 }
634 else
635 {
636 for (i = 0; i < n; ++i)
637 {
638 C[i] = C1[i];
639 }
640 }
641
642 ScalePermstruct->DiagScale = BOTH;
643 rowequ = colequ = 1;
644
645 } /* end Equil */
646
647 /* Now permute global A to prepare for symbfact() */
648 for (j = 0; j < n; ++j)
649 {
650 for (i = colptr[j]; i < colptr[j+1]; ++i)
651 {
652 irow = rowind[i];
653 rowind[i] = perm_r[irow];
654 }
655 }
656 SUPERLU_FREE(R1);
657 SUPERLU_FREE(C1);
658 }
659 else
660 {
661 /* job = 2,3,4 */
662 for (j = 0; j < n; ++j)
663 {
664 for (i = colptr[j]; i < colptr[j+1]; ++i)
665 {
666 irow = rowind[i];
667 rowind[i] = perm_r[irow];
668 } /* end for i ... */
669 } /* end for j ... */
670 } /* end else job ... */
671 } /* end if options->RowPerm ... */
672
673 t = SuperLU_timer_() - t;
674 stat.utime[ROWPERM] = t;
675 } /* end if Fact ... */
676 }
677 else
678 {
679 /* options->RowPerm == NOROWPERM */
680 for (i = 0; i <m; ++i) perm_r[i] = i;
681 }
682 } /* end if (!factored) */
683
684 if (!factored || options->IterRefine)
685 {
686 /* Compute norm(A), which will be used to adjust small diagonal. */
687 if (notran)
688 *(unsigned char *)norm = '1';
689 else
690 *(unsigned char *)norm = 'I';
691 anorm = pdlangs(norm, A, grid);
692 }
693
694
695 /* ------------------------------------------------------------
696 Perform the LU factorization.
697 ------------------------------------------------------------*/
698 if (!factored)
699 {
700 t = SuperLU_timer_();
701 /*
702 Get column permutation vector perm_c[], according to permc_spec:
703 permc_spec = NATURAL: natural ordering
704 permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
705 permc_spec = MMD_ATA: minimum degree on structure of A'*A
706 permc_spec = COLAMD: approximate minimum degree column ordering
707 permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
708 */
709 permc_spec = options->ColPerm;
710 if (permc_spec != MY_PERMC && Fact == DOFACT)
711 {
712 get_perm_c_dist(iam, permc_spec, &GA, perm_c);
713 }
714
715 stat.utime[COLPERM] = SuperLU_timer_() - t;
716
717 /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'
718 (a.k.a. column etree), depending on the choice of ColPerm.
719 Adjust perm_c[] to be consistent with a postorder of etree.
720 Permute columns of A to form A*Pc'. */
721 if (Fact != SamePattern_SameRowPerm)
722 {
723 int_t *GACcolbeg, *GACcolend, *GACrowind;
724
725 sp_colorder(options, &GA, perm_c, etree, &GAC);
726
727 /* Form Pc*A*Pc' to preserve the diagonal of the matrix GAC. */
728 GACstore = GAC.Store;
729 GACcolbeg = GACstore->colbeg;
730 GACcolend = GACstore->colend;
731 GACrowind = GACstore->rowind;
732 for (j = 0; j < n; ++j)
733 {
734 for (i = GACcolbeg[j]; i < GACcolend[j]; ++i)
735 {
736 irow = GACrowind[i];
737 GACrowind[i] = perm_c[irow];
738 }
739 }
740
741 /* Perform a symbolic factorization on Pc*Pr*A*Pc' and set up the
742 nonzero data structures for L & U. */
743 t = SuperLU_timer_();
744 if (!(Glu_freeable = (Glu_freeable_t *)
745 SUPERLU_MALLOC(sizeof(Glu_freeable_t))))
746 {
747 ABORT("Malloc fails for Glu_freeable.");
748 }
749
750 /* Every process does this. */
751 iinfo = symbfact(options, iam, &GAC, perm_c, etree,
752 Glu_persist, Glu_freeable);
753
754 stat.utime[SYMBFAC] = SuperLU_timer_() - t;
755 if (iinfo < 0)
756 {
757 /* Successful return */
758 QuerySpace_dist(n, -iinfo, Glu_freeable,
760 }
761 else
762 {
763 if (!iam)
764 {
765 fprintf(stderr, "symbfact() error returns %d\n", iinfo);
766 exit(-1);
767 }
768 }
769 } /* end if Fact ... */
770
771 /* Apply column permutation to the original distributed A */
772 for (j = 0; j < nnz_loc; ++j)
773 {
774 colind[j] = perm_c[colind[j]];
775 }
776
777 /* Distribute Pc*Pr*diag(R)*A*diag(C)*Pc' into L and U storage.
778 NOTE: the row permutation Pc*Pr is applied internally in the
779 distribution routine. */
780 t = SuperLU_timer_();
781 /* dist_mem_use = */
782 pddistribute(Fact, n, A, ScalePermstruct,
783 Glu_freeable, LUstruct, grid);
784 stat.utime[DIST] = SuperLU_timer_() - t;
785
786 /* Deallocate storage used in symbolic factorization. */
787 if (Fact != SamePattern_SameRowPerm)
788 {
789 iinfo = symbfact_SubFree(Glu_freeable);
790 SUPERLU_FREE(Glu_freeable);
791 }
792
793 /* Perform numerical factorization in parallel. */
794 t = SuperLU_timer_();
795 pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
796 stat.utime[FACT] = SuperLU_timer_() - t;
797
798 /* Destroy GA and GAC */
799 if (Fact != SamePattern_SameRowPerm)
800 {
801 Destroy_CompCol_Matrix_dist(&GA);
802 Destroy_CompCol_Permuted_dist(&GAC);
803 }
804 } /* end if (!factored) */
805
806 if (*info!=0)
807 {
808 printf("Trouble in pdgstrf. Info=%i\n",*info);
809 if (*info>0)
810 {
811 printf("U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
812 printf("been completed, but the factor U is exactly singular,\n");
813 printf("and division by zero will occur if it is used to solve a\n");
814 printf("system of equations.\n");
815 }
816 else
817 {
818 printf("The %i-th argument had an illegal value.\n", *info);
819 }
820 }
821
822 /* ------------------------------------------------------------
823 Initialize the solver.
824 ------------------------------------------------------------*/
825 if (options->SolveInitialized == NO)
826 {
827 dSolveInit(options, A, perm_r, perm_c, nrhs, LUstruct, grid,
828 SOLVEstruct);
829 }
830
831 if (options->IterRefine)
832 {
833 if (options->RefineInitialized == NO || Fact == DOFACT)
834 {
835 /* All these cases need to re-initialize gsmv structure */
836 if (options->RefineInitialized)
837 {
838 pdgsmv_finalize(SOLVEstruct->gsmv_comm);
839 }
840 pdgsmv_init(A, SOLVEstruct->row_to_proc, grid,
841 SOLVEstruct->gsmv_comm);
842
843 options->RefineInitialized = YES;
844 }
845 }
846
847 /* Print the statistics. */
848 if ((doc==0) && (!iam))
849 {
850 printf("\nstats after setup....\n");
851 PStatPrint(options, &stat, grid);
852 }
853
854 /* ------------------------------------------------------------
855 Set up data structure.
856 ------------------------------------------------------------*/
857 superlu_data->A = A;
858 superlu_data->options = options;
859 superlu_data->ScalePermstruct = ScalePermstruct;
860 superlu_data->LUstruct = LUstruct;
861 superlu_data->SOLVEstruct = SOLVEstruct;
862 superlu_data->colequ = colequ;
863 superlu_data->rowequ = rowequ;
864 superlu_data->anorm = anorm;
865 *data = superlu_data;
866
867 } /* End of setup */
868
869
870 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
871 PERFORM A SOLVE
872 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
873 if (opt_flag==2)
874 {
875 /* Get pointer to the grid */
876 superlu_data = (superlu_dist_data *)*data;
877 grid = superlu_data->grid;
878
879 /* Bail out if I do not belong in the grid. */
880 int iam = grid->iam;
881 if (iam >= nprow * npcol)
882 {
883 return;
884 }
885
886 if ((doc==0)&&(!iam))
887 {
888 printf("\nPerforming SuperLU_DIST solve\n");
889 }
890
891 /* ------------------------------------------------------------
892 Set other pointers to data structure.
893 ------------------------------------------------------------*/
894 A = superlu_data->A;
895 options = superlu_data->options;
896 ScalePermstruct = superlu_data->ScalePermstruct;
897 LUstruct = superlu_data->LUstruct;
898 SOLVEstruct = superlu_data->SOLVEstruct;
899 colequ = superlu_data->colequ;
900 rowequ = superlu_data->rowequ;
901 anorm = superlu_data->anorm;
902
903 /* Initialization. */
904 Astore = (NRformat_loc *) A->Store;
905 nnz_loc = Astore->nnz_loc;
906 m_loc = Astore->m_loc;
907 fst_row = Astore->fst_row;
908 colind = Astore->colind;
909 R = ScalePermstruct->R;
910 C = ScalePermstruct->C;
911
912 /* Local control paramaters */
913 Fact = options->Fact;
914 factored = (Fact == FACTORED);
915 Equil = (!factored && options->Equil == YES);
916 notran = (options->Trans == NOTRANS);
917
918 /* ------------------------------------------------------------
919 Scale the right-hand side if equilibration was performed.
920 ------------------------------------------------------------*/
921 if (notran)
922 {
923 if (rowequ)
924 {
925 b_col = B;
926 for (j = 0; j < nrhs; ++j)
927 {
928 irow = fst_row;
929 for (i = 0; i < m_loc; ++i)
930 {
931 b_col[i] *= R[irow];
932 ++irow;
933 }
934 b_col += ldb;
935 }
936 }
937 }
938 else if (colequ)
939 {
940 b_col = B;
941 for (j = 0; j < nrhs; ++j)
942 {
943 irow = fst_row;
944 for (i = 0; i < m_loc; ++i)
945 {
946 b_col[i] *= C[irow];
947 ++irow;
948 }
949 b_col += ldb;
950 }
951 }
952
953 /* Save a copy of the right-hand side. */
954 ldx = ldb;
955 if (!(X = doubleMalloc_dist(((size_t)ldx) * nrhs)))
956 {
957 ABORT("Malloc fails for X[]");
958 }
959 x_col = X;
960 b_col = B;
961 for (j = 0; j < nrhs; ++j)
962 {
963 for (i = 0; i < m_loc; ++i)
964 {
965 x_col[i] = b_col[i];
966 }
967 x_col += ldx;
968 b_col += ldb;
969 }
970
971
972 /* ------------------------------------------------------------
973 Solve the linear system.
974 ------------------------------------------------------------*/
975 pdgstrs(n, LUstruct, ScalePermstruct, grid, X, m_loc,
976 fst_row, ldb, nrhs, SOLVEstruct, &stat, info);
977
978 if (*info!=0)
979 {
980 printf("Trouble in pdgstrs. Info=%i\n",*info);
981 printf("The %i-th argument had an illegal value.\n", *info);
982 }
983
984 /* ------------------------------------------------------------
985 Use iterative refinement to improve the computed solution and
986 compute error bounds and backward error estimates for it.
987 ------------------------------------------------------------*/
988 if (options->IterRefine)
989 {
990 /* Improve the solution by iterative refinement. */
991 int_t *it, *colind_gsmv = SOLVEstruct->A_colind_gsmv;
992 /*SOLVEstruct_t *SOLVEstruct1;*/ /* Used by refinement. */
993
994 t = SuperLU_timer_();
995 if (options->RefineInitialized == NO || Fact == DOFACT)
996 {
997 /* Save a copy of the transformed local col indices
998 in colind_gsmv[]. */
999 if (colind_gsmv)
1000 {
1001 SUPERLU_FREE(colind_gsmv);
1002 }
1003 if (!(it = intMalloc_dist(nnz_loc)))
1004 {
1005 ABORT("Malloc fails for colind_gsmv[]");
1006 }
1007 colind_gsmv = SOLVEstruct->A_colind_gsmv = it;
1008 for (i = 0; i < nnz_loc; ++i)
1009 {
1010 colind_gsmv[i] = colind[i];
1011 }
1012 }
1013 else if (Fact == SamePattern || Fact == SamePattern_SameRowPerm)
1014 {
1015 double at;
1016 int_t k, jcol, p;
1017 /* Swap to beginning the part of A corresponding to the
1018 local part of X, as was done in pdgsmv_init() */
1019 for (i = 0; i < m_loc; ++i)
1020 {
1021 /* Loop through each row */
1022 k = rowptr[i];
1023 for (j = rowptr[i]; j < rowptr[i+1]; ++j)
1024 {
1025 jcol = colind[j];
1026 p = SOLVEstruct->row_to_proc[jcol];
1027 if (p == iam)
1028 {
1029 /* Local */
1030 at = a[k]; a[k] = a[j]; a[j] = at;
1031 ++k;
1032 }
1033 }
1034 }
1035
1036 /* Re-use the local col indices of A obtained from the
1037 previous call to pdgsmv_init() */
1038 for (i = 0; i < nnz_loc; ++i)
1039 {
1040 colind[i] = colind_gsmv[i];
1041 }
1042 }
1043
1044 /* Storage for backward error */
1045 if (!(berr = doubleMalloc_dist(nrhs)))
1046 {
1047 ABORT("Malloc fails for berr[].");
1048 }
1049
1050 pdgsrfs(n, A, anorm, LUstruct, ScalePermstruct, grid,
1051 B, ldb, X, ldx, nrhs, SOLVEstruct, berr, &stat, info);
1052
1053 stat.utime[REFINE] = SuperLU_timer_() - t;
1054 }
1055
1056 if (*info!=0)
1057 {
1058 printf("Trouble in pdgsrfs. Info=%i\n",*info);
1059 printf("The %i-th argument had an illegal value.\n", *info);
1060 }
1061
1062 /* Print the statistics. */
1063 if ((doc==0) && (!iam))
1064 {
1065 printf("\nstats after solve....\n");
1066 PStatPrint(options, &stat, grid);
1067 }
1068
1069 /* Permute the solution matrix B <= Pc'*X. */
1070 pdPermute_Dense_Matrix(fst_row, m_loc, SOLVEstruct->row_to_proc,
1071 SOLVEstruct->inv_perm_c,
1072 X, ldx, B, ldb, nrhs, grid);
1073
1074 /* Transform the solution matrix X to a solution of the original
1075 system before the equilibration. */
1076 if (notran)
1077 {
1078 if (colequ)
1079 {
1080 b_col = B;
1081 for (j = 0; j < nrhs; ++j)
1082 {
1083 irow = fst_row;
1084 for (i = 0; i < m_loc; ++i)
1085 {
1086 b_col[i] *= C[irow];
1087 ++irow;
1088 }
1089 b_col += ldb;
1090 }
1091 }
1092 }
1093 else if (rowequ)
1094 {
1095 b_col = B;
1096 for (j = 0; j < nrhs; ++j)
1097 {
1098 irow = fst_row;
1099 for (i = 0; i < m_loc; ++i)
1100 {
1101 b_col[i] *= R[irow];
1102 ++irow;
1103 }
1104 b_col += ldb;
1105 }
1106 }
1107
1108 /* Clean up memory */
1109 if (options->IterRefine)
1110 {
1111 SUPERLU_FREE(berr);
1112 }
1113 SUPERLU_FREE(X);
1114
1115 } /* End of solve */
1116
1117 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1118 PERFORM CLEAN UP OF MEMORY
1119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1120 if (opt_flag==3)
1121 {
1122 /* Get pointer to the process grid */
1123 superlu_data = (superlu_dist_data *)*data;
1124 grid = superlu_data->grid;
1125
1126 /* Bail out if I do not belong in the grid. */
1127 int iam = grid->iam;
1128 if (iam >= nprow * npcol) goto out;
1129 if ((doc==0)&&(!iam))
1130 {
1131 printf("\nCleaning up memory allocated for SuperLU_DIST\n");
1132 }
1133
1134 /* ------------------------------------------------------------
1135 Set pointers to the data structure.
1136 ------------------------------------------------------------*/
1137 A = superlu_data->A;
1138 options = superlu_data->options;
1139 ScalePermstruct = superlu_data->ScalePermstruct;
1140 LUstruct = superlu_data->LUstruct;
1141 SOLVEstruct = superlu_data->SOLVEstruct;
1142
1143 /* -------------------------------
1144 Set the other pointers required
1145 -------------------------------*/
1146 R = ScalePermstruct->R;
1147 C = ScalePermstruct->C;
1148
1149 /* Local control paramaters */
1150 Fact = options->Fact;
1151 factored = (Fact == FACTORED);
1152 Equil = (!factored && options->Equil == YES);
1153 notran = (options->Trans == NOTRANS);
1154
1155 /* Deallocate R and/or C if it was not used. */
1156 if (Equil && Fact != SamePattern_SameRowPerm)
1157 {
1158 switch (ScalePermstruct->DiagScale)
1159 {
1160 case NOEQUIL:
1161 SUPERLU_FREE(R);
1162 SUPERLU_FREE(C);
1163 break;
1164 case ROW:
1165 SUPERLU_FREE(C);
1166 break;
1167 case COL:
1168 SUPERLU_FREE(R);
1169 break;
1170 default:
1171 /* Apparently this one is ok */
1172 /* printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL); */
1173 /* ABORT("Never get here. THIS IS THE ONE");*/
1174 break;
1175 }
1176 }
1177
1178 /* Free storage */
1179 ScalePermstructFree(ScalePermstruct);
1180 Destroy_LU(n, grid, LUstruct);
1181 LUstructFree(LUstruct);
1182 dSolveFinalize(options, SOLVEstruct);
1183 //Destroy_CompRowLoc_Matrix_dist(&A);
1184
1185 // Only destroy the store part of the matrix
1186 Destroy_SuperMatrix_Store_dist(A);
1187
1188 /* Deallocate memory */
1189 SUPERLU_FREE(A);
1190 SUPERLU_FREE(ScalePermstruct);
1191 SUPERLU_FREE(LUstruct);
1192 SUPERLU_FREE(SOLVEstruct);
1193 SUPERLU_FREE(options);
1194
1195 /* Release the superlu process grid. */
1196out:
1197 superlu_gridexit(grid);
1198
1199 SUPERLU_FREE(grid);
1200 SUPERLU_FREE(superlu_data);
1201
1202 }
1203
1204 /* Free storage */
1205 PStatFree(&stat);
1206
1207 return;
1208
1209}
1210
1211
1212
1213/*----------------------------------------------------------------
1214 Bridge to distributed SuperLU with distributed memory (version 2.0).
1215 Requires input of system matrix in compressed row form.
1216
1217 Parameters:
1218 op_flag = int specifies the operation:
1219 1, performs LU decomposition for the first time
1220 2, performs triangular solve
1221 3, free all the storage in the end
1222 - n = size of system (square matrix)
1223 - nnz = # of nonzero entries
1224 - values = 1D C array of nonzero entries
1225 - row_index = 1D C array of row indices
1226 - column_start = 1D C array of column start indices
1227 - b = 1D C array representing the rhs vector, is overwritten
1228 by solution.
1229 - nprow = # of rows in process grid
1230 - npcol = # of columns in process grid
1231 - doc = 0/1 for doc/no doc
1232 - data = pointer to structure to contain LU solver data.
1233 If *opt_flag == 1, it is an output. Otherwise, it it an input.
1234
1235 Return value of *info:
1236 = 0: successful exit
1237 > 0: if *info = i, and i is
1238 <= A->ncol: U(i,i) is exactly zero. The factorization has
1239 been completed, but the factor U is exactly singular,
1240 so the solution could not be computed.
1241 > A->ncol: number of bytes allocated when memory allocation
1242 failure occurred, plus A->ncol.
1243 < 0: some other error
1244 ----------------------------------------------------------------
1245*/
1246void superlu_dist_global_matrix(int opt_flag, int allow_permutations,
1247 int n_in, int nnz_in,
1248 double *values,
1249 int *row_index, int *col_start,
1250 double *b, int nprow, int npcol,
1251 int doc, void **data, int *info,
1252 MPI_Comm comm)
1253{
1254 /* Some SuperLU structures */
1255 superlu_options_t *options;
1256 SuperLUStat_t stat;
1257 SuperMatrix *A;
1258 SuperMatrix *AC;
1259 ScalePermstruct_t *ScalePermstruct;
1260 LUstruct_t *LUstruct;
1261 gridinfo_t *grid;
1262
1263 /* Structure to hold SuperLU structures and data */
1264 superlu_dist_data *superlu_data;
1265
1266 int_t *perm_r; /* row permutations from partial pivoting */
1267 int_t *perm_c; /* column permutation vector */
1268 int_t *etree; /* elimination tree */
1269 int_t job, rowequ, colequ, iinfo, i, j, irow; /* , need_value */
1270 int_t m, n, nnz;
1271 int_t *colptr, *rowind;
1272 int_t Equil, factored, notran, permc_spec; /*, dist_mem_use; */
1273 NCformat *Astore;
1274 NCPformat *ACstore;
1275 Glu_persist_t *Glu_persist;
1276 Glu_freeable_t *Glu_freeable=NULL;
1277
1278 /* Other stuff needed by SuperLU */
1279 double *berr=NULL;
1280 double *a, *X, *b_col;
1281 double *B=b;
1282 double *C, *R, *C1, *R1, *b_work, *x_col; /* *bcol, */
1283 double amax, t, colcnd, rowcnd; double anorm=0.0;
1284 char equed[1], norm[1];
1285 int ldx; /* LDA for matrix X (local). */
1286 int iam;
1287 //static mem_usage_t num_mem_usage, symb_mem_usage;
1288 fact_t Fact;
1289
1290
1291 /* We're only doing single rhs problems
1292 note: code will need modifying to deal with
1293 multiple rhs (see function pdgssvx) */
1294 int nrhs=1;
1295
1296 /* Square matrix */
1297 n=n_in;
1298 m=n_in;
1299 nnz=nnz_in;
1300
1301 /* Set 'Leading dimension' of rhs vector */
1302 int ldb=n;
1303
1304
1305 /* Initialize the statistics variables. */
1306 PStatInit(&stat);
1307
1308 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1309 SET UP GRID, FACTORS, ETC
1310 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1311 if (opt_flag==1)
1312 {
1313 /* Allocate data structure to store data between calls to this function */
1314 superlu_data =
1315 (superlu_dist_data *) SUPERLU_MALLOC(sizeof(superlu_dist_data));
1316
1317 /* Initialize the superlu process grid. */
1318 grid = (gridinfo_t *) SUPERLU_MALLOC(sizeof(gridinfo_t));
1319 superlu_gridinit(comm, nprow, npcol, grid);
1320 superlu_data->grid = grid;
1321
1322 /* Bail out if I do not belong in the grid. */
1323 iam = grid->iam;
1324 if (iam >= nprow * npcol)
1325 {
1326 return;
1327 }
1328
1329 /* Allocate memory for SuperLU_DIST structures */
1330 options = (superlu_options_t *) SUPERLU_MALLOC(sizeof(superlu_options_t));
1331 A = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
1332 AC = (SuperMatrix *) SUPERLU_MALLOC(sizeof(SuperMatrix));
1333 ScalePermstruct =
1334 (ScalePermstruct_t *) SUPERLU_MALLOC(sizeof(ScalePermstruct_t));
1335 LUstruct = (LUstruct_t *) SUPERLU_MALLOC(sizeof(LUstruct_t));
1336
1337 /* Set the default options */
1338 set_default_options_dist(options);
1339
1340 /* Is the matrix transposed (NOTRANS or TRANS)? */
1341 options->Trans = NOTRANS;
1342
1343 /* Row permutations (NATURAL [= do nothing], */
1344 /* LargeDiag [default], ...)? */
1345 /* options->RowPerm=NATURAL; */
1346 options->RowPerm=LargeDiag;
1347
1348 /* Column permutations (NATURAL [= do nothing], */
1349 /* MMD_AT_PLUS_A [default],...) */
1350 options->ColPerm=MMD_AT_PLUS_A;
1351
1352 /* Use natural ordering instead? */
1353 if (allow_permutations==0)
1354 {
1355 options->ColPerm=NATURAL;
1356 options->RowPerm=NATURAL;
1357 }
1358
1359 /* Iterative refinement (essential as far as I can tell).*/
1360 /* Can be "NO" or "DOUBLE"*/
1361 options->IterRefine = SLU_DOUBLE;
1362
1363 /* Print stats during solve? */
1364 if (doc==0)
1365 {
1366 options->PrintStat = YES;
1367 }
1368 else
1369 {
1370 options->PrintStat = NO;
1371 }
1372
1373 /* Doc output on process 0 if required: */
1374 if ((!iam)&&(doc==0))
1375 {
1376 printf("\nPerforming SuperLU_DIST setup\n");
1377 printf("Process grid\t%d X %d\n", grid->nprow, grid->npcol);
1378 print_options_dist(options);
1379 }
1380
1381
1382 /* Create SuperMatrix from compressed column representation */
1383 dCreate_CompCol_Matrix_dist(A,m,n,nnz,values,row_index,col_start,
1384 SLU_NC,SLU_D,SLU_GE);
1385
1386 /* Initialize ScalePermstruct and LUstruct. */
1387 ScalePermstructInit(m, n, ScalePermstruct);
1388 LUstructInit(m, n, LUstruct);
1389
1390 /* Test the control parameters etc. */
1391 *info = 0;
1392 Fact = options->Fact;
1393 if (Fact < 0 || Fact > FACTORED)
1394 {
1395 *info = -1;
1396 }
1397 else if (options->RowPerm < 0 || options->RowPerm > MY_PERMR)
1398 {
1399 *info = -1;
1400 }
1401 else if (options->ColPerm < 0 || options->ColPerm > MY_PERMC)
1402 {
1403 *info = -1;
1404 }
1405 else if (options->IterRefine < 0 || options->IterRefine > SLU_EXTRA)
1406 {
1407 *info = -1;
1408 }
1409 else if (options->IterRefine == SLU_EXTRA)
1410 {
1411 *info = -1;
1412 fprintf(stderr, "Extra precise iterative refinement yet to support.\n");
1413 }
1414 else if (A->nrow != A->ncol || A->nrow < 0 ||
1415 A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE)
1416 {
1417 *info = -2;
1418 }
1419 else if (ldb < A->nrow)
1420 {
1421 *info = -5;
1422 }
1423 else if (nrhs < 0)
1424 {
1425 *info = -6;
1426 }
1427 if (*info)
1428 {
1429 printf("Trouble in pdgstrf. Info=%i\n",-*info);
1430 if (*info==-1)
1431 {
1432 printf("Error in options.\n");
1433 }
1434 else if (*info==-2)
1435 {
1436 printf("Error in matrix.\n");
1437 }
1438 else if (*info==-5)
1439 {
1440 printf("ldb < A->nrow\n");
1441 }
1442 else if (*info==-6)
1443 {
1444 printf("nrhs < 0\n");
1445 }
1446 return;
1447 }
1448
1449 /* Initialization. */
1450 factored = (Fact == FACTORED);
1451 Equil = (!factored && options->Equil == YES);
1452 notran = (options->Trans == NOTRANS);
1453 job = 5;
1454 Astore = A->Store;
1455 nnz = Astore->nnz;
1456 a = Astore->nzval;
1457 colptr = Astore->colptr;
1458 rowind = Astore->rowind;
1459 if (factored || (Fact == SamePattern_SameRowPerm && Equil))
1460 {
1461 rowequ = (ScalePermstruct->DiagScale == ROW) ||
1462 (ScalePermstruct->DiagScale == BOTH);
1463 colequ = (ScalePermstruct->DiagScale == COL) ||
1464 (ScalePermstruct->DiagScale == BOTH);
1465 }
1466 else
1467 {
1468 rowequ = colequ = FALSE;
1469 }
1470
1471 perm_r = ScalePermstruct->perm_r;
1472 perm_c = ScalePermstruct->perm_c;
1473 etree = LUstruct->etree;
1474 R = ScalePermstruct->R;
1475 C = ScalePermstruct->C;
1476 Glu_persist = LUstruct->Glu_persist;
1477 if (Equil)
1478 {
1479 /* Allocate storage if not done so before. */
1480 switch (ScalePermstruct->DiagScale)
1481 {
1482 case NOEQUIL:
1483 if (!(R = (double *) doubleMalloc_dist(m)))
1484 ABORT("Malloc fails for R[].");
1485 if (!(C = (double *) doubleMalloc_dist(n)))
1486 ABORT("Malloc fails for C[].");
1487 ScalePermstruct->R = R;
1488 ScalePermstruct->C = C;
1489 break;
1490 case ROW:
1491 if (!(C = (double *) doubleMalloc_dist(n)))
1492 ABORT("Malloc fails for C[].");
1493 ScalePermstruct->C = C;
1494 break;
1495 case COL:
1496 if (!(R = (double *) doubleMalloc_dist(m)))
1497 ABORT("Malloc fails for R[].");
1498 ScalePermstruct->R = R;
1499 break;
1500 default:
1501 printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL);
1502 ABORT("Never get here.");
1503 break;
1504 }
1505 }
1506
1507 /* ------------------------------------------------------------
1508 Diagonal scaling to equilibrate the matrix.
1509 ------------------------------------------------------------*/
1510 if (Equil)
1511 {
1512 t = SuperLU_timer_();
1513
1514 if (Fact == SamePattern_SameRowPerm)
1515 {
1516 /* Reuse R and C. */
1517 switch (ScalePermstruct->DiagScale)
1518 {
1519 case NOEQUIL:
1520 break;
1521 case ROW:
1522 for (j = 0; j < n; ++j)
1523 {
1524 for (i = colptr[j]; i < colptr[j+1]; ++i)
1525 {
1526 irow = rowind[i];
1527 a[i] *= R[irow]; /* Scale rows. */
1528 }
1529 }
1530 break;
1531 case COL:
1532 for (j = 0; j < n; ++j)
1533 {
1534 for (i = colptr[j]; i < colptr[j+1]; ++i)
1535 {
1536 a[i] *= C[j]; /* Scale columns. */
1537 }
1538 }
1539 break;
1540 case BOTH:
1541 for (j = 0; j < n; ++j)
1542 {
1543 for (i = colptr[j]; i < colptr[j+1]; ++i)
1544 {
1545 irow = rowind[i];
1546 a[i] *= R[irow] * C[j]; /* Scale rows and columns. */
1547 }
1548 }
1549 break;
1550 }
1551 }
1552 else
1553 {
1554 if (!iam)
1555 {
1556 /* Compute row and column scalings to equilibrate matrix A. */
1557 dgsequ_dist(A, R, C, &rowcnd, &colcnd, &amax, &iinfo);
1558
1559 MPI_Bcast(&iinfo, 1, mpi_int_t, 0, grid->comm);
1560 if (iinfo == 0)
1561 {
1562 MPI_Bcast(R, m, MPI_DOUBLE, 0, grid->comm);
1563 MPI_Bcast(C, n, MPI_DOUBLE, 0, grid->comm);
1564 MPI_Bcast(&rowcnd, 1, MPI_DOUBLE, 0, grid->comm);
1565 MPI_Bcast(&colcnd, 1, MPI_DOUBLE, 0, grid->comm);
1566 MPI_Bcast(&amax, 1, MPI_DOUBLE, 0, grid->comm);
1567 }
1568 else
1569 {
1570 if (iinfo > 0)
1571 {
1572 if (iinfo <= m)
1573 {
1574 fprintf(stderr, "The %d-th row of A is exactly zero\n",
1575 iinfo);
1576 }
1577 else
1578 {
1579 fprintf(stderr, "The %d-th column of A is exactly zero\n",
1580 iinfo-n);
1581 }
1582 exit(-1);
1583 }
1584 }
1585 }
1586 else
1587 {
1588 MPI_Bcast(&iinfo, 1, mpi_int_t, 0, grid->comm);
1589 if (iinfo == 0)
1590 {
1591 MPI_Bcast(R, m, MPI_DOUBLE, 0, grid->comm);
1592 MPI_Bcast(C, n, MPI_DOUBLE, 0, grid->comm);
1593 MPI_Bcast(&rowcnd, 1, MPI_DOUBLE, 0, grid->comm);
1594 MPI_Bcast(&colcnd, 1, MPI_DOUBLE, 0, grid->comm);
1595 MPI_Bcast(&amax, 1, MPI_DOUBLE, 0, grid->comm);
1596 }
1597 else
1598 {
1599 ABORT("DGSEQU failed\n");
1600 }
1601 }
1602
1603 /* Equilibrate matrix A. */
1604 dlaqgs_dist(A, R, C, rowcnd, colcnd, amax, equed);
1605 if (lsame_(equed, "R"))
1606 {
1607 ScalePermstruct->DiagScale = rowequ = ROW;
1608 }
1609 else if (lsame_(equed, "C"))
1610 {
1611 ScalePermstruct->DiagScale = colequ = COL;
1612 }
1613 else if (lsame_(equed, "B"))
1614 {
1615 ScalePermstruct->DiagScale = BOTH;
1616 rowequ = ROW;
1617 colequ = COL;
1618 }
1619 else
1620 {
1621 ScalePermstruct->DiagScale = NOEQUIL;
1622 }
1623 } /* if Fact ... */
1624
1625 stat.utime[EQUIL] = SuperLU_timer_() - t;
1626 } /* if Equil ... */
1627
1628
1629 /* ------------------------------------------------------------
1630 Permute rows of A.
1631 ------------------------------------------------------------*/
1632 if ((int) options->RowPerm != (int) NO)
1633 {
1634 t = SuperLU_timer_();
1635
1636 if (Fact == SamePattern_SameRowPerm /* Reuse perm_r. */
1637 || options->RowPerm == MY_PERMR)
1638 {
1639 /* Use my perm_r. */
1640 /* for (j = 0; j < n; ++j) {
1641 for (i = colptr[j]; i < colptr[j+1]; ++i) {*/
1642 for (i = 0; i < colptr[n]; ++i)
1643 {
1644 irow = rowind[i];
1645 rowind[i] = perm_r[irow];
1646 /* }*/
1647 }
1648 }
1649 else if (!factored)
1650 {
1651 if (job == 5)
1652 {
1653 /* Allocate storage for scaling factors. */
1654 if (!(R1 = (double *) SUPERLU_MALLOC(m * sizeof(double))))
1655 ABORT("SUPERLU_MALLOC fails for R1[]");
1656 if (!(C1 = (double *) SUPERLU_MALLOC(n * sizeof(double))))
1657 ABORT("SUPERLU_MALLOC fails for C1[]");
1658 }
1659
1660 if (!iam)
1661 {
1662 /* Process 0 finds a row permutation for large diagonal. */
1663 dldperm(job, m, nnz, colptr, rowind, a, perm_r, R1, C1);
1664
1665 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
1666 if (job == 5 && Equil)
1667 {
1668 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
1669 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
1670 }
1671 }
1672 else
1673 {
1674 MPI_Bcast(perm_r, m, mpi_int_t, 0, grid->comm);
1675 if (job == 5 && Equil)
1676 {
1677 MPI_Bcast(R1, m, MPI_DOUBLE, 0, grid->comm);
1678 MPI_Bcast(C1, n, MPI_DOUBLE, 0, grid->comm);
1679 }
1680 }
1681
1682 if (job == 5)
1683 {
1684 if (Equil)
1685 {
1686 for (i = 0; i < n; ++i)
1687 {
1688 R1[i] = exp(R1[i]);
1689 C1[i] = exp(C1[i]);
1690 }
1691 for (j = 0; j < n; ++j)
1692 {
1693 for (i = colptr[j]; i < colptr[j+1]; ++i)
1694 {
1695 irow = rowind[i];
1696 a[i] *= R1[irow] * C1[j]; /* Scale the matrix. */
1697 rowind[i] = perm_r[irow];
1698 }
1699 }
1700
1701 /* Multiply together the scaling factors. */
1702 if (rowequ)
1703 {
1704 for (i = 0; i < m; ++i)
1705 {
1706 R[i] *= R1[i];
1707 }
1708 }
1709 else
1710 {
1711 for (i = 0; i < m; ++i)
1712 {
1713 R[i] = R1[i];
1714 }
1715 }
1716 if (colequ)
1717 {
1718 for (i = 0; i < n; ++i)
1719 {
1720 C[i] *= C1[i];
1721 }
1722 }
1723 else
1724 {
1725 for (i = 0; i < n; ++i)
1726 {
1727 C[i] = C1[i];
1728 }
1729 }
1730
1731 ScalePermstruct->DiagScale = BOTH;
1732 rowequ = colequ = 1;
1733 }
1734 else
1735 {
1736 /* No equilibration. */
1737 /* for (j = 0; j < n; ++j) {
1738 for (i = colptr[j]; i < colptr[j+1]; ++i) {*/
1739 for (i = colptr[0]; i < colptr[n]; ++i)
1740 {
1741 irow = rowind[i];
1742 rowind[i] = perm_r[irow];
1743 }
1744 /* }*/
1745 }
1746 SUPERLU_FREE(R1);
1747 SUPERLU_FREE(C1);
1748 }
1749 else
1750 {
1751 /* job = 2,3,4 */
1752 for (j = 0; j < n; ++j)
1753 {
1754 for (i = colptr[j]; i < colptr[j+1]; ++i)
1755 {
1756 irow = rowind[i];
1757 rowind[i] = perm_r[irow];
1758 }
1759 }
1760 }
1761 } /* else !factored */
1762
1763 t = SuperLU_timer_() - t;
1764 stat.utime[ROWPERM] = t;
1765 } /* if options->RowPerm ... */
1766
1767 if (!factored || options->IterRefine)
1768 {
1769 /* Compute norm(A), which will be used to adjust small diagonal. */
1770 if (notran)
1771 {
1772 *(unsigned char *)norm = '1';
1773 }
1774 else
1775 {
1776 *(unsigned char *)norm = 'I';
1777 }
1778 anorm = dlangs_dist(norm, A);
1779 }
1780
1781
1782
1783 /* ------------------------------------------------------------
1784 Perform the LU factorization.
1785 ------------------------------------------------------------*/
1786 if (!factored)
1787 {
1788 t = SuperLU_timer_();
1789 /*
1790 Get column permutation vector perm_c[], according to permc_spec:
1791 permc_spec = NATURAL: natural ordering
1792 permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
1793 permc_spec = MMD_ATA: minimum degree on structure of A'*A
1794 permc_spec = COLAMD: approximate minimum degree column ordering
1795 permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
1796 */
1797 permc_spec = options->ColPerm;
1798 if (permc_spec != MY_PERMC && Fact == DOFACT)
1799 {
1800 /* Use an ordering provided by SuperLU */
1801 get_perm_c_dist(iam, permc_spec, A, perm_c);
1802 }
1803
1804 /* Compute the elimination tree of Pc*(A'+A)*Pc' or Pc*A'*A*Pc'
1805 (a.k.a. column etree), depending on the choice of ColPerm.
1806 Adjust perm_c[] to be consistent with a postorder of etree.
1807 Permute columns of A to form A*Pc'. */
1808 sp_colorder(options, A, perm_c, etree, AC);
1809
1810 /* Form Pc*A*Pc' to preserve the diagonal of the matrix Pr*A. */
1811 ACstore = AC->Store;
1812 for (j = 0; j < n; ++j)
1813 {
1814 for (i = ACstore->colbeg[j]; i < ACstore->colend[j]; ++i)
1815 {
1816 irow = ACstore->rowind[i];
1817 ACstore->rowind[i] = perm_c[irow];
1818 }
1819 }
1820 stat.utime[COLPERM] = SuperLU_timer_() - t;
1821
1822 /* Perform a symbolic factorization on matrix A and set up the
1823 nonzero data structures which are suitable for supernodal GENP. */
1824 if (Fact != SamePattern_SameRowPerm)
1825 {
1826 t = SuperLU_timer_();
1827 if (!(Glu_freeable = (Glu_freeable_t *)
1828 SUPERLU_MALLOC(sizeof(Glu_freeable_t))))
1829 ABORT("Malloc fails for Glu_freeable.");
1830
1831 iinfo = symbfact(options, iam, AC, perm_c, etree,
1832 Glu_persist, Glu_freeable);
1833
1834 stat.utime[SYMBFAC] = SuperLU_timer_() - t;
1835
1836 if (iinfo < 0)
1837 {
1838 QuerySpace_dist(n, -iinfo, Glu_freeable,
1840 }
1841 else
1842 {
1843 if (!iam)
1844 {
1845 fprintf(stderr, "symbfact() error returns %d\n", iinfo);
1846 exit(-1);
1847 }
1848 }
1849 }
1850
1851 /* Distribute the L and U factors onto the process grid. */
1852 t = SuperLU_timer_();
1853 /* dist_mem_use = */
1854 ddistribute(Fact, n, AC, Glu_freeable, LUstruct, grid);
1855 stat.utime[DIST] = SuperLU_timer_() - t;
1856
1857 /* Deallocate storage used in symbolic factor. */
1858 if (Fact != SamePattern_SameRowPerm)
1859 {
1860 iinfo = symbfact_SubFree(Glu_freeable);
1861 SUPERLU_FREE(Glu_freeable);
1862 }
1863
1864 /* Perform numerical factorization in parallel. */
1865 t = SuperLU_timer_();
1866 pdgstrf(options, m, n, anorm, LUstruct, grid, &stat, info);
1867 stat.utime[FACT] = SuperLU_timer_() - t;
1868 }
1869 else if (options->IterRefine)
1870 {
1871 /* options->Fact==FACTORED */
1872 /* Permute columns of A to form A*Pc' using the existing perm_c.
1873 NOTE: rows of A were previously permuted to Pc*A.
1874 */
1875 sp_colorder(options, A, perm_c, NULL, AC);
1876 } /* if !factored ... */
1877
1878 if (*info!=0)
1879 {
1880 printf("Trouble in pdgstrf. Info=%i\n",*info);
1881 if (*info>0)
1882 {
1883 printf("U(%i,%i) is exactly zero. The factorization has\n",*info,*info);
1884 printf("been completed, but the factor U is exactly singular,\n");
1885 printf("and division by zero will occur if it is used to solve a\n");
1886 printf("system of equations.\n");
1887 }
1888 else
1889 {
1890 printf("The %i-th argument had an illegal value.\n", *info);
1891 }
1892 }
1893
1894 /* Print the statistics. */
1895 if ((doc==0) && (!iam))
1896 {
1897 printf("\nstats after setup....\n");
1898 PStatPrint(options, &stat, grid);
1899 }
1900
1901 /* ------------------------------------------------------------
1902 Set up data structure.
1903 ------------------------------------------------------------*/
1904 superlu_data->A = A;
1905 superlu_data->AC = AC;
1906 superlu_data->options = options;
1907 superlu_data->ScalePermstruct = ScalePermstruct;
1908 superlu_data->LUstruct = LUstruct;
1909 superlu_data->colequ = colequ;
1910 superlu_data->rowequ = rowequ;
1911 superlu_data->anorm = anorm;
1912 *data = superlu_data;
1913
1914 } /* End of setup */
1915
1916
1917 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1918 PERFORM A SOLVE
1919 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
1920 if (opt_flag==2)
1921 {
1922 /* Get pointer to the grid */
1923 superlu_data = (superlu_dist_data *)*data;
1924 grid = superlu_data->grid;
1925
1926 /* Bail out if I do not belong in the grid. */
1927 iam = grid->iam;
1928 if (iam >= nprow * npcol)
1929 {
1930 return;
1931 }
1932
1933 if ((doc==0)&&(!iam))
1934 {
1935 printf("\nPerforming SuperLU_DIST solve\n");
1936 }
1937
1938 /* ------------------------------------------------------------
1939 Set other pointers to data structure.
1940 ------------------------------------------------------------*/
1941 A = superlu_data->A;
1942 AC = superlu_data->AC;
1943 options = superlu_data->options;
1944 ScalePermstruct = superlu_data->ScalePermstruct;
1945 LUstruct = superlu_data->LUstruct;
1946 colequ = superlu_data->colequ;
1947 rowequ = superlu_data->rowequ;
1948 anorm = superlu_data->anorm;
1949
1950 /* Initialization. */
1951 Astore = A->Store;
1952 colptr = Astore->colptr;
1953 rowind = Astore->rowind;
1954 R = ScalePermstruct->R;
1955 C = ScalePermstruct->C;
1956 perm_r = ScalePermstruct->perm_r;
1957 perm_c = ScalePermstruct->perm_c;
1958
1959 /* Local control paramaters */
1960 Fact = options->Fact;
1961 factored = (Fact == FACTORED);
1962 Equil = (!factored && options->Equil == YES);
1963 notran = (options->Trans == NOTRANS);
1964
1965
1966 /* ------------------------------------------------------------
1967 Compute the solution matrix X.
1968 ------------------------------------------------------------*/
1969 if (!(b_work = doubleMalloc_dist(n)))
1970 {
1971 ABORT("Malloc fails for b_work[]");
1972 }
1973
1974 /* ------------------------------------------------------------
1975 Scale the right-hand side if equilibration was performed.
1976 ------------------------------------------------------------*/
1977 if (notran)
1978 {
1979 if (rowequ)
1980 {
1981 b_col = B;
1982 for (j = 0; j < nrhs; ++j)
1983 {
1984 for (i = 0; i < m; ++i)
1985 {
1986 b_col[i] *= R[i];
1987 }
1988 b_col += ldb;
1989 }
1990 }
1991 }
1992 else if (colequ)
1993 {
1994 b_col = B;
1995 for (j = 0; j < nrhs; ++j)
1996 {
1997 for (i = 0; i < m; ++i)
1998 {
1999 b_col[i] *= C[i];
2000 }
2001 b_col += ldb;
2002 }
2003 }
2004
2005 /* ------------------------------------------------------------
2006 Permute the right-hand side to form Pr*B.
2007 ------------------------------------------------------------*/
2008 if ((int) options->RowPerm != (int) NO)
2009 {
2010 if (notran)
2011 {
2012 b_col = B;
2013 for (j = 0; j < nrhs; ++j)
2014 {
2015 for (i = 0; i < m; ++i)
2016 {
2017 b_work[perm_r[i]] = b_col[i];
2018 }
2019 for (i = 0; i < m; ++i)
2020 {
2021 b_col[i] = b_work[i];
2022 }
2023 b_col += ldb;
2024 }
2025 }
2026 }
2027
2028
2029 /* ------------------------------------------------------------
2030 Permute the right-hand side to form Pc*B.
2031 ------------------------------------------------------------*/
2032 if (notran)
2033 {
2034 b_col = B;
2035 for (j = 0; j < nrhs; ++j)
2036 {
2037 for (i = 0; i < m; ++i)
2038 {
2039 b_work[perm_c[i]] = b_col[i];
2040 }
2041 for (i = 0; i < m; ++i)
2042 {
2043 b_col[i] = b_work[i];
2044 }
2045 b_col += ldb;
2046 }
2047 }
2048
2049
2050 /* Save a copy of the right-hand side. */
2051 ldx = ldb;
2052 if (!(X = doubleMalloc_dist(((size_t)ldx) * nrhs)))
2053 {
2054 ABORT("Malloc fails for X[]");
2055 }
2056
2057 x_col = X;
2058 b_col = B;
2059 for (j = 0; j < nrhs; ++j)
2060 {
2061 for (i = 0; i < ldb; ++i)
2062 {
2063 x_col[i] = b_col[i];
2064 }
2065 x_col += ldx;
2066 b_col += ldb;
2067 }
2068
2069 /* ------------------------------------------------------------
2070 Solve the linear system.
2071 ------------------------------------------------------------*/
2072 pdgstrs_Bglobal(n, LUstruct, grid, X, ldb, nrhs, &stat, info);
2073 if (*info!=0)
2074 {
2075 printf("Trouble in pdgstrs_Bglobal. Info=%i\n",*info);
2076 printf("The %i-th argument had an illegal value.\n", *info);
2077 }
2078
2079 /* ------------------------------------------------------------
2080 Use iterative refinement to improve the computed solution and
2081 compute error bounds and backward error estimates for it.
2082 ------------------------------------------------------------*/
2083 if (options->IterRefine)
2084 {
2085 /* Improve the solution by iterative refinement. */
2086 t = SuperLU_timer_();
2087
2088 /* Storage for backward error */
2089 if (!(berr = doubleMalloc_dist(nrhs)))
2090 {
2091 ABORT("Malloc fails for berr[].");
2092 }
2093
2094 pdgsrfs_ABXglobal(n, AC, anorm, LUstruct, grid, B, ldb,
2095 X, ldx, nrhs, berr, &stat, info);
2096 if (*info!=0)
2097 {
2098 printf("Trouble in pdgsrfs_ABXglobal. Info=%i\n",*info);
2099 printf("The %i-th argument had an illegal value.\n", *info);
2100 }
2101 stat.utime[REFINE] = SuperLU_timer_() - t;
2102 }
2103
2104 /* Print the statistics. */
2105 if ((doc==0) && (!iam))
2106 {
2107 printf("\nstats after solve....\n");
2108 PStatPrint(options, &stat, grid);
2109 }
2110
2111 /* Permute the solution matrix X <= Pc'*X. */
2112 for (j = 0; j < nrhs; j++)
2113 {
2114 b_col = &B[j*ldb];
2115 x_col = &X[j*ldx];
2116 for (i = 0; i < n; ++i)
2117 {
2118 b_col[i] = x_col[perm_c[i]];
2119 }
2120 }
2121
2122 /* Transform the solution matrix X to a solution of the original system
2123 before the equilibration. */
2124 if (notran)
2125 {
2126 if (colequ)
2127 {
2128 b_col = B;
2129 for (j = 0; j < nrhs; ++j)
2130 {
2131 for (i = 0; i < n; ++i)
2132 {
2133 b_col[i] *= C[i];
2134 }
2135 b_col += ldb;
2136 }
2137 }
2138 }
2139 else if (rowequ)
2140 {
2141 b_col = B;
2142 for (j = 0; j < nrhs; ++j)
2143 {
2144 for (i = 0; i < n; ++i)
2145 {
2146 b_col[i] *= R[i];
2147 }
2148 b_col += ldb;
2149 }
2150 }
2151
2152
2153 /* Clean up memory */
2154 if (options->IterRefine)
2155 {
2156 SUPERLU_FREE(berr);
2157 }
2158 SUPERLU_FREE(b_work);
2159 SUPERLU_FREE(X);
2160
2161 } /* End of solve */
2162
2163 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2164 PERFORM CLEAN UP OF MEMORY
2165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
2166 if (opt_flag==3)
2167 {
2168 /* Get pointer to the process grid */
2169 superlu_data = (superlu_dist_data *)*data;
2170 grid = superlu_data->grid;
2171
2172 /* Bail out if I do not belong in the grid. */
2173 iam = grid->iam;
2174 if (iam >= nprow * npcol) goto out;
2175 if ((doc==0)&&(!iam))
2176 {
2177 printf("\nCleaning up memory allocated for SuperLU_DIST\n");
2178 }
2179
2180 /* ------------------------------------------------------------
2181 Set pointers to the data structure.
2182 ------------------------------------------------------------*/
2183 A = superlu_data->A;
2184 AC = superlu_data->AC;
2185 options = superlu_data->options;
2186 ScalePermstruct = superlu_data->ScalePermstruct;
2187 LUstruct = superlu_data->LUstruct;
2188
2189 /* -------------------------------
2190 Set the other pointers required
2191 -------------------------------*/
2192 R = ScalePermstruct->R;
2193 C = ScalePermstruct->C;
2194
2195 /* Local control paramaters */
2196 Fact = options->Fact;
2197 factored = (Fact == FACTORED);
2198 Equil = (!factored && options->Equil == YES);
2199 rowequ = colequ = FALSE;
2200 if (factored || (Fact == SamePattern_SameRowPerm && Equil))
2201 {
2202 rowequ = (ScalePermstruct->DiagScale == ROW) ||
2203 (ScalePermstruct->DiagScale == BOTH);
2204 colequ = (ScalePermstruct->DiagScale == COL) ||
2205 (ScalePermstruct->DiagScale == BOTH);
2206 }
2207 else
2208 {
2209 rowequ = colequ = FALSE;
2210 }
2211
2212 /* Deallocate storage. */
2213 if (Equil && Fact != SamePattern_SameRowPerm)
2214 {
2215 switch (ScalePermstruct->DiagScale)
2216 {
2217 case NOEQUIL:
2218 SUPERLU_FREE(R);
2219 SUPERLU_FREE(C);
2220 break;
2221 case ROW:
2222 SUPERLU_FREE(C);
2223 break;
2224 case COL:
2225 SUPERLU_FREE(R);
2226 break;
2227 default:
2228 /* Apparently this one is ok */
2229 /* printf("diagscale: %i %i %i %i\n",ScalePermstruct->DiagScale,NOEQUIL,ROW,COL); */
2230 /* ABORT("Never get here."); */
2231 break;
2232 }
2233 }
2234 if (!factored || (factored && options->IterRefine))
2235 {
2236 Destroy_CompCol_Permuted_dist(AC);
2237 }
2238
2239 /* Free storage */
2240 ScalePermstructFree(ScalePermstruct);
2241 Destroy_LU(n, grid, LUstruct);
2242 LUstructFree(LUstruct);
2243 //Destroy_CompRowLoc_Matrix_dist(&A);
2244 // Only destroy the store part of the matrix
2245 Destroy_SuperMatrix_Store_dist(A);
2246
2247 /* Deallocate memory */
2248 SUPERLU_FREE(A);
2249 SUPERLU_FREE(AC);
2250 SUPERLU_FREE(ScalePermstruct);
2251 SUPERLU_FREE(LUstruct);
2252 SUPERLU_FREE(options);
2253
2254 /* Release the superlu process grid. */
2255out:
2256 superlu_gridexit(grid);
2257
2258 SUPERLU_FREE(grid);
2259 SUPERLU_FREE(superlu_data);
2260 }
2261
2262 /* Free storage */
2263 PStatFree(&stat);
2264
2265 return;
2266}
2267
2268
cstr elem_len * i
Definition cfortran.h:603
char t
Definition cfortran.h:568
mem_usage_t Memory_usage
Definition superlu.c:44
int Memory_usage_has_been_recorded
Definition superlu.c:47
SuperMatrix * AC
ScalePermstruct_t * ScalePermstruct
gridinfo_t * grid
SOLVEstruct_t * SOLVEstruct
SuperMatrix * A
superlu_options_t * options
LUstruct_t * LUstruct
double get_total_memory_usage_in_bytes_dist()
struct MemoryStatisticsStorage symbolic_memory_statistics_storage
double get_lu_factor_memory_usage_in_bytes_dist()
void superlu_cr_to_cc(int nrow, int ncol, int nnz, double *cr_values, int *cr_index, int *cr_start, double **cc_values, int **cc_index, int **cc_start)
void get_memory_usage_in_bytes_dist(double *lu_factor_memory, double *total_memory)
void superlu_dist_distributed_matrix(int opt_flag, int allow_permutations, int n, int nnz_local, int nrow_local, int first_row, double *values, int *col_index, int *row_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)
void superlu_dist_global_matrix(int opt_flag, int allow_permutations, int n_in, int nnz_in, double *values, int *row_index, int *col_start, double *b, int nprow, int npcol, int doc, void **data, int *info, MPI_Comm comm)