KTH framework for Nek5000 toolboxes; testing version  0.0.1
fem_amg_preco.c
Go to the documentation of this file.
1 /*
2  * Low-Order finite element preconditioner computed with HYPRE's AMG solver
3 */
4 
5 #include <math.h>
6 #include "gslib.h"
7 
8 #define fem_amg_setup FORTRAN_UNPREFIXED(fem_amg_setup, FEM_AMG_SETUP)
9 #define fem_amg_solve FORTRAN_UNPREFIXED(fem_amg_solve, FEM_AMG_SOLVE)
10 
11 #ifdef HYPRE
12 
13 /* Headers */
14 #include "_hypre_utilities.h"
15 #include "HYPRE_parcsr_ls.h"
16 #include "_hypre_parcsr_ls.h"
17 #include "HYPRE.h"
18 
19 #include "fem_amg_preco.h"
20 
21 /* Global variables */
22 int precond_type;
23 int meshing_type;
24 int n_x, n_y, n_z, n_elem, n_dim;
25 int n_xyz, n_xyze;
26 double *x_m, *y_m, *z_m;
27 long long *glo_num;
28 double *pmask;
29 double *binv;
30 int num_loc_dofs;
31 long long *dof_map;
32 int amg_ready = 0;
33 long long row_start;
34 long long row_end;
35 HYPRE_IJMatrix A_bc;
36 HYPRE_ParCSRMatrix A_fem;
37 HYPRE_IJMatrix B_bc;
38 HYPRE_ParCSRMatrix B_fem;
39 HYPRE_IJVector Bd_bc;
40 HYPRE_ParVector Bd_fem;
41 HYPRE_IJVector f_bc;
42 HYPRE_ParVector f_fem;
43 HYPRE_IJVector u_bc;
44 HYPRE_ParVector u_fem;
45 HYPRE_IJVector Bf_bc;
46 HYPRE_ParVector Bf_fem;
47 HYPRE_IJVector Binv_sem_bc;
48 HYPRE_ParVector Binv_sem;
49 HYPRE_Solver amg_preconditioner;
50 
51 struct comm comm;
52 struct gs_data *gsh;
53 
54 #define NPARAM 10
55 double HYPREsettings[NPARAM];
56 
57 /* Interface definition */
58 void fem_amg_setup(const sint *n_x_, const sint *n_y_, const sint *n_z_,
59  const sint *n_elem_, const sint *n_dim_,
60  double *x_m_, double *y_m_, double *z_m_,
61  double *pmask_, double *binv_, const sint *nullspace,
62  const sint *gshf, double *param)
63 {
64  precond_type = 1;
65  meshing_type = 0;
66  n_x = *n_x_;
67  n_y = *n_y_;
68  n_z = *n_z_;
69  n_elem = *n_elem_;
70  n_dim = *n_dim_;
71  x_m = x_m_;
72  y_m = y_m_;
73  z_m = z_m_;
74  pmask = pmask_;
75  binv = binv_;
76 
77  n_xyz = n_x * n_y * n_z;
78  n_xyze = n_x * n_y * n_z * n_elem;
79 
80  gsh = gs_hf2c(*gshf);
81  comm_init(&comm, gsh->comm.c);
82 
83  if (comm.id == 0) printf("fem_amg_setup ...\n");
84 
85  if (sizeof(HYPRE_Int) != sizeof(long long)) {
86  if (comm.id == 0)
87  fail(1,__FILE__,__LINE__,"incompatible long long size");
88  exit(EXIT_FAILURE);
89  }
90 
91  double time0 = comm_time();
93  fem_assembly();
94 
95  long long row;
96  long long row_start = hypre_ParCSRMatrixFirstRowIndex(A_fem);
97  long long row_end = hypre_ParCSRMatrixLastRowIndex(A_fem);
98 
99  HYPRE_IJVectorCreate(comm.c, row_start, row_end, &u_bc);
100  HYPRE_IJVectorSetObjectType(u_bc, HYPRE_PARCSR);
101  HYPRE_IJVectorInitialize(u_bc);
102  HYPRE_IJVectorAssemble(u_bc);
103  HYPRE_IJVectorGetObject(u_bc, (void**) &u_fem);
104 
105  HYPRE_IJVectorCreate(comm.c, row_start, row_end, &f_bc);
106  HYPRE_IJVectorSetObjectType(f_bc, HYPRE_PARCSR);
107  HYPRE_IJVectorInitialize(f_bc);
108  HYPRE_IJVectorAssemble(f_bc);
109  HYPRE_IJVectorGetObject(f_bc, (void**) &f_fem);
110 
111  HYPRE_IJVectorCreate(comm.c, row_start, row_end, &Bf_bc);
112  HYPRE_IJVectorSetObjectType(Bf_bc, HYPRE_PARCSR);
113  HYPRE_IJVectorInitialize(Bf_bc);
114  HYPRE_IJVectorAssemble(Bf_bc);
115  HYPRE_IJVectorGetObject(Bf_bc, (void**) &Bf_fem);
116 
117  HYPRE_IJVectorCreate(comm.c, row_start, row_end, &Binv_sem_bc);
118  HYPRE_IJVectorSetObjectType(Binv_sem_bc, HYPRE_PARCSR);
119  HYPRE_IJVectorInitialize(Binv_sem_bc);
120 
121  for (row = row_start; row <= row_end; row++)
122  HYPRE_IJVectorSetValues(Binv_sem_bc, 1, &row, &(binv[dof_map[row - row_start]]));
123 
124  HYPRE_IJVectorAssemble(Binv_sem_bc);
125  HYPRE_IJVectorGetObject(Binv_sem_bc, (void**) &Binv_sem);
126 
127  HYPRE_BoomerAMGCreate(&amg_preconditioner);
128 
129  int uparam = (int) param[0];
130 
131  int i;
132  if (uparam) {
133  for (i=1; i < NPARAM; i++) {
134  HYPREsettings[i-1] = param[i];
135  if (comm.id == 0)
136  printf("Custom HYPREsettings[%d]: %.2f\n", i, HYPREsettings[i-1]);
137  }
138  } else {
139  HYPREsettings[0] = 10; /* HMIS */
140  HYPREsettings[1] = 6; /* Extended+i */
141  HYPREsettings[2] = 3; /* SOR is default smoother */
142  HYPREsettings[3] = 3; /* SOR smoother for crs level */
143  HYPREsettings[4] = 1;
144  HYPREsettings[5] = 0.25;
145  HYPREsettings[6] = 0.1;
146  }
147 
148  HYPRE_BoomerAMGSetCoarsenType(amg_preconditioner, HYPREsettings[0]);
149  HYPRE_BoomerAMGSetInterpType(amg_preconditioner, HYPREsettings[1]);
150  if (*nullspace == 1 || (int) param[0] != 0) {
151  HYPRE_BoomerAMGSetRelaxType(amg_preconditioner, HYPREsettings[2]);
152  HYPRE_BoomerAMGSetCycleRelaxType(amg_preconditioner, HYPREsettings[3], 3); /* Coarse grid solver */
153  HYPRE_BoomerAMGSetCycleNumSweeps(amg_preconditioner, HYPREsettings[4], 3); /* Number of sweeps at coarse level */
154  }
155  HYPRE_BoomerAMGSetStrongThreshold(amg_preconditioner, HYPREsettings[5]);
156  HYPRE_BoomerAMGSetNonGalerkinTol(amg_preconditioner, HYPREsettings[6]);
157  if (uparam) {
158  HYPRE_BoomerAMGSetLevelNonGalerkinTol(amg_preconditioner, HYPREsettings[7], 0);
159  HYPRE_BoomerAMGSetLevelNonGalerkinTol(amg_preconditioner, HYPREsettings[8], 1);
160 
161  fflush(stdout);HYPRE_BoomerAMGSetLevelNonGalerkinTol(amg_preconditioner, HYPREsettings[9], 2);
162  }
163 
164  HYPRE_BoomerAMGSetMaxRowSum(amg_preconditioner, 1); /* Don't check for maximum row sum */
165  HYPRE_BoomerAMGSetPMaxElmts(amg_preconditioner, 4);
166  HYPRE_BoomerAMGSetMaxCoarseSize(amg_preconditioner, 50);
167  HYPRE_BoomerAMGSetMaxIter(amg_preconditioner,1);
168  HYPRE_BoomerAMGSetTol(amg_preconditioner, 0);
169  HYPRE_BoomerAMGSetPrintLevel(amg_preconditioner, 1);
170 
171  HYPRE_BoomerAMGSetup(amg_preconditioner, A_fem, NULL, NULL);
172 
173  double time1 = comm_time();
174  if (comm.id == 0) printf("fem_amg_setup: done %fs\n",time1-time0);
175  fflush(stdout);
176  amg_ready = 1;
177 }
178 
179 void fem_amg_solve(double *z, double *w)
180 {
181  long long row;
182  int idx;
183 
184  if (amg_ready == 0)
185  {
186  if (comm.id == 0)
187  fail(1,__FILE__,__LINE__,"AMG hasn't been setup");
188  exit(EXIT_FAILURE);
189  }
190 
191  /* Choose preconditioner type */
192  HYPRE_IJVectorInitialize(f_bc);
193 
194  switch (precond_type)
195  {
196  /* No mass operator */
197  case 1:
198  for (row = row_start; row <= row_end; row++)
199  HYPRE_IJVectorSetValues(f_bc, 1, &row, &(w[dof_map[row - row_start]]));
200 
201  break;
202 
203  /* Diagonal mass operator */
204  case 2:
205  for (row = row_start; row <= row_end; row++)
206  {
207  double Bd_fem_value;
208  double Binv_sem_value;
209 
210  HYPRE_IJVectorGetValues(Bd_bc, 1, &row, &Bd_fem_value);
211  HYPRE_IJVectorGetValues(Binv_sem_bc, 1, &row, &Binv_sem_value);
212 
213  double value = Bd_fem_value * Binv_sem_value * w[dof_map[row - row_start]];
214 
215  HYPRE_IJVectorSetValues(f_bc, 1, &row, &value);
216  }
217 
218  break;
219 
220  /* Full mass operator */
221  case 3:
222  for (row = row_start; row <= row_end; row++)
223  {
224  double Binv_sem_value;
225 
226  HYPRE_IJVectorGetValues(Binv_sem_bc, 1, &row, &Binv_sem_value);
227 
228  double value = Binv_sem_value * w[dof_map[row - row_start]];
229 
230  HYPRE_IJVectorSetValues(Bf_bc, 1, &row, &value);
231  }
232 
233  HYPRE_ParCSRMatrixMatvec(1.0, B_fem, Bf_fem, 0.0, f_fem);
234 
235  break;
236  }
237 
238  HYPRE_IJVectorAssemble(f_bc);
239 
240  /* Solve preconditioned system */
241  HYPRE_BoomerAMGSolve(amg_preconditioner, A_fem, f_fem, u_fem);
242 
243  /* Map data back to SEM mesh */
244  for (idx = 0; idx < n_xyze; idx++)
245  z[idx] = 0.0;
246 
247  for (row = row_start; row <= row_end; row++)
248  HYPRE_IJVectorGetValues(u_bc, 1, &row, &(z[dof_map[row - row_start]]));
249 
250  gs(z, gs_double, gs_add, 0, gsh, 0);
251 }
252 
253 /* FEM Assembly definition */
254 void matrix_distribution()
255 {
256  /*
257  * Ranks the global numbering array after removing the Dirichlet nodes
258  * which is then used in the assembly of the matrices to map degrees of
259  * freedom to rows of the matrix
260  */
261 
262  int idx;
263  buffer my_buffer;
264  long long idx_start = n_xyze;
265  long long scan_out[2], scan_buf[2];
266  comm_scan(scan_out, &comm, gs_long_long, gs_add, &idx_start, 1, scan_buf);
267  idx_start = scan_out[0];
268 
269  glo_num = mem_alloc_1D_long(n_xyze);
270 
271  for (idx = 0; idx < n_xyze; idx++)
272  {
273  if (pmask[idx] > 0.0)
274  glo_num[idx] = idx_start + (long long)idx;
275  else
276  glo_num[idx] = -1;
277  }
278 
279  gs(glo_num, gs_long_long, gs_min, 0, gsh, 0);
280 
281  /* Rank ids */
282  long long maximum_value_local = 0;
283  long long maximum_value = 0;
284 
285  for (idx = 0; idx < n_xyze; idx++)
286  {
287  maximum_value_local = (glo_num[idx] > maximum_value_local) ? glo_num[idx] : maximum_value_local;
288  }
289 
290  comm_allreduce(&comm, gs_long_long, gs_max, &maximum_value_local, 1, &maximum_value);
291  const long long nstar = maximum_value/comm.np + 1;
292 
293  struct ranking_tuple
294  {
295  long long rank;
296  unsigned int proc;
297  unsigned int idx;
298  };
299 
300  struct array ranking_transfer;
301  array_init(struct ranking_tuple, &ranking_transfer, n_xyze);
302  ranking_transfer.n = n_xyze;
303  struct ranking_tuple *ranking_tuple_array = ranking_transfer.ptr;
304 
305  for (idx = 0; idx < ranking_transfer.n; idx++)
306  {
307  ranking_tuple_array[idx].rank = glo_num[idx];
308  ranking_tuple_array[idx].proc = glo_num[idx] / nstar;
309  ranking_tuple_array[idx].idx = idx;
310  }
311 
312  struct crystal crystal_router_handle;
313  crystal_init(&crystal_router_handle, &comm);
314  sarray_transfer(struct ranking_tuple, &ranking_transfer, proc, 1, &crystal_router_handle);
315  ranking_tuple_array = ranking_transfer.ptr;
316 
317  buffer_init(&my_buffer, 1);
318  sarray_sort(struct ranking_tuple, ranking_transfer.ptr, ranking_transfer.n, rank, 1, &my_buffer);
319 
320  long long current_rank = ranking_tuple_array[0].rank;
321  long long current_count = 0;
322  ranking_tuple_array[0].rank = current_count;
323 
324  for (idx = 1; idx < ranking_transfer.n; idx++) {
325 
326  if (ranking_tuple_array[idx].rank > current_rank) {
327  current_count++;
328  current_rank = ranking_tuple_array[idx].rank;
329  ranking_tuple_array[idx].rank = current_count;
330  } else if (ranking_tuple_array[idx].rank == current_rank) {
331  ranking_tuple_array[idx].rank = current_count;
332  } else {
333  break;
334  }
335  }
336 
337  current_count += 1;
338 
339  long long rank_start;
340  comm_scan(scan_out, &comm, gs_long_long, gs_add, &current_count, 1, scan_buf);
341  rank_start = scan_out[0];
342 
343  for (idx = 0; idx < ranking_transfer.n; idx++)
344  {
345  ranking_tuple_array[idx].rank += rank_start;
346  }
347 
348  sarray_transfer(struct ranking_tuple, &ranking_transfer, proc, 1, &crystal_router_handle);
349  ranking_tuple_array = ranking_transfer.ptr;
350 
351  buffer_init(&my_buffer, 1);
352  sarray_sort(struct ranking_tuple, ranking_transfer.ptr, ranking_transfer.n, idx, 0, &my_buffer);
353 
354  for (idx = 0; idx < n_xyze; idx++)
355  {
356  glo_num[idx] = ranking_tuple_array[idx].rank;
357  }
358 
359  array_free(&ranking_transfer);
360  crystal_free(&crystal_router_handle);
361 }
362 
363 void fem_assembly()
364 {
365  /*
366  * Assembles the low-order FEM matrices from the spectral element mesh
367  *
368  * Returns A_fem and B_fem
369  */
370 
371  /* Variables */
372  int i, j, k, e, d, t, q;
373  int idx;
374  long long row;
375 
376  /*
377  * Rank and prepare data to be mapped to rows of the matrix so it can
378  * be solved with Hypre (Ranking done on the Fortran side since here it fails
379  * for some reason I couldn't figure out)
380  */
381  long long *ranking = mem_alloc_1D_long(n_xyze);
382 
383  for (idx = 0; idx < n_xyze; idx++)
384  ranking[idx] = glo_num[idx];
385 
386  row_start = 0;
387  row_end = 0;
388 
389  for (idx = 0; idx < n_xyze; idx++)
390  if (ranking[idx] >= 0) row_end = maximum(row_end, ranking[idx]);
391 
392  long long scan_out[2], scan_buf[2];
393  comm_scan(scan_out, &comm, gs_long_long, gs_max, &row_end, 1, scan_buf);
394  if (comm.id > 0) row_start = scan_out[0] + 1;
395 
396  num_loc_dofs = row_end - row_start + 1;
397 
398  dof_map = mem_alloc_1D_long(num_loc_dofs);
399 
400  for (idx = 0; idx < n_xyze; idx++)
401  {
402  if ((row_start <= ranking[idx]) && (ranking[idx] <= row_end))
403  {
404  dof_map[ranking[idx] - row_start] = idx;
405  }
406  }
407 
408  /* Assemble FE matrices with boundary conditions applied */
409  HYPRE_IJMatrixCreate(comm.c, row_start, row_end, row_start, row_end, &A_bc);
410  HYPRE_IJMatrixSetObjectType(A_bc, HYPRE_PARCSR);
411  HYPRE_IJMatrixInitialize(A_bc);
412 
413  HYPRE_IJMatrixCreate(comm.c, row_start, row_end, row_start, row_end, &B_bc);
414  HYPRE_IJMatrixSetObjectType(B_bc, HYPRE_PARCSR);
415  HYPRE_IJMatrixInitialize(B_bc);
416 
417  HYPRE_IJVectorCreate(comm.c, row_start, row_end, &Bd_bc);
418  HYPRE_IJVectorSetObjectType(Bd_bc, HYPRE_PARCSR);
419  HYPRE_IJVectorInitialize(Bd_bc);
420 
421  double *Bd_sum = mem_alloc_1D_double(n_xyze);
422  for (i = 0; i < n_xyze; i++) Bd_sum[i] = 0.0;
423 
424  /* Set quadrature rule */
425  int n_quad = (n_dim == 2) ? 3 : 4;
426  double **q_r;
427  double *q_w;
428 
429  quadrature_rule(&q_r, &q_w, n_quad, n_dim);
430 
431  /* Mesh connectivity (Can be changed to fill-out or one-per-vertex) */
432  int num_fem;
433  int **v_coord;
434  int **t_map;
435 
436  if (n_dim == 2)
437  num_fem = (meshing_type == 0) ? 4 : 2;
438  else
439  num_fem = (meshing_type == 0) ? 8 : 6;
440 
441  mesh_connectivity(&v_coord, &t_map, num_fem, n_dim);
442 
443  /* Finite element assembly */
444  double **A_loc = mem_alloc_2D_double(n_dim + 1, n_dim + 1);
445  double **B_loc = mem_alloc_2D_double(n_dim + 1, n_dim + 1);
446  double **J_xr = mem_alloc_2D_double(n_dim, n_dim);
447  double **J_rx = mem_alloc_2D_double(n_dim, n_dim);
448  double **x_t = mem_alloc_2D_double(n_dim, n_dim + 1);
449  double *q_x = mem_alloc_1D_double(n_dim);
450  double *dp = mem_alloc_1D_double(n_dim);
451 
452  Basis phi[n_dim + 1];
453  DBasis dphi[n_dim + 1];
454 
455  if (n_dim == 2)
456  {
457  phi[0] = phi_2D_1; phi[1] = phi_2D_2; phi[2] = phi_2D_3;
458  dphi[0] = dphi_2D_1; dphi[1] = dphi_2D_2; dphi[2] = dphi_2D_3;
459  }
460  else
461  {
462  phi[0] = phi_3D_1; phi[1] = phi_3D_2; phi[2] = phi_3D_3; phi[3] = phi_3D_4;
463  dphi[0] = dphi_3D_1; dphi[1] = dphi_3D_2; dphi[2] = dphi_3D_3; dphi[3] = dphi_3D_4;
464  }
465 
466  int s_x, s_y, s_z;
467  int E_x = n_x - 1;
468  int E_y = n_y - 1;
469  int E_z = (n_dim == 2) ? 1 : n_z - 1;
470 
471  for (e = 0; e < n_elem; e++)
472  {
473  /* Cycle through collocated quads/hexes */
474  for (s_z = 0; s_z < E_z; s_z++)
475  {
476  for (s_y = 0; s_y < E_y; s_y++)
477  {
478  for (s_x = 0; s_x < E_x; s_x++)
479  {
480  /* Get indices */
481  int s[n_dim];
482 
483  if (n_dim == 2)
484  {
485  s[0] = s_x; s[1] = s_y;
486  }
487  else
488  {
489  s[0] = s_x; s[1] = s_y; s[2] = s_z;
490  }
491 
492  int idx[(int)(pow(2, n_dim))];
493 
494  for (i = 0; i < pow(2, n_dim); i++)
495  {
496  idx[i] = 0;
497 
498  for (d = 0; d < n_dim; d++)
499  {
500  idx[i] += (s[d] + v_coord[i][d]) * pow(n_x, d);
501  }
502  }
503 
504  /* Cycle through collocated triangles/tets */
505  for (t = 0; t < num_fem; t++)
506  {
507  /* Get vertices */
508  for (i = 0; i < n_dim + 1; i++)
509  {
510  if (n_dim == 2)
511  {
512  x_t[0][i] = x_m[idx[t_map[t][i]] + e * n_xyz];
513  x_t[1][i] = y_m[idx[t_map[t][i]] + e * n_xyz];
514  }
515  else
516  {
517  x_t[0][i] = x_m[idx[t_map[t][i]] + e * n_xyz];
518  x_t[1][i] = y_m[idx[t_map[t][i]] + e * n_xyz];
519  x_t[2][i] = z_m[idx[t_map[t][i]] + e * n_xyz];
520  }
521  }
522 
523  /* Local FEM matrices */
524  /* Reset local stiffness and mass matrices */
525  for (i = 0; i < n_dim + 1; i++)
526  {
527  for (j = 0; j < n_dim + 1; j++)
528  {
529  A_loc[i][j] = 0.0;
530  B_loc[i][j] = 0.0;
531  }
532  }
533 
534  /* Build local stiffness matrices by applying quadrature rules */
535  for (q = 0; q < n_quad; q++)
536  {
537  /* From r to x */
538  x_map(&q_x, q_r[q], x_t, n_dim, phi);
539  J_xr_map(&J_xr, q_r[q], x_t, n_dim, dphi);
540  inverse(&J_rx, J_xr, n_dim);
541  double det_J_xr = determinant(J_xr, n_dim);
542 
543  /* Integrand */
544  for (i = 0; i < n_dim + 1; i++)
545  {
546  for (j = 0; j < n_dim + 1; j++)
547  {
548  int alpha, beta;
549  double func = 0.0;
550 
551  for (alpha = 0; alpha < n_dim; alpha++)
552  {
553  double a = 0.0, b = 0.0;
554 
555  for (beta = 0; beta < n_dim; beta++)
556  {
557  dphi[i](&dp, q_r[q]);
558  a += dp[beta] * J_rx[beta][alpha];
559 
560  dphi[j](&dp, q_r[q]);
561  b += dp[beta] * J_rx[beta][alpha];
562  }
563 
564  func += a * b;
565  }
566 
567  A_loc[i][j] += func * det_J_xr * q_w[q];
568  B_loc[i][j] += phi[i](q_r[q]) * phi[j](q_r[q]) * det_J_xr * q_w[q];
569  }
570  }
571  }
572 
573  /* Add to global matrix */
574  for (i = 0; i < n_dim + 1; i++)
575  {
576  for (j = 0; j < n_dim + 1; j++)
577  {
578  if ((pmask[idx[t_map[t][i]] + e * n_xyz] > 0.0) && (pmask[idx[t_map[t][j]] + e * n_xyz] > 0.0))
579  {
580  long long row = ranking[idx[t_map[t][i]] + e * n_xyz];
581  long long col = ranking[idx[t_map[t][j]] + e * n_xyz];
582 
583  double A_val = A_loc[i][j];
584  double B_val = B_loc[i][j];
585 
586  long long ncols = 1;
587  int insert_error;
588 
589  if (fabs(A_val) > 1.0e-14)
590  {
591  insert_error = HYPRE_IJMatrixAddToValues(A_bc, 1, &ncols, &row, &col, &A_val);
592  }
593 
594  if (fabs(B_val) > 1.0e-14)
595  {
596  insert_error = HYPRE_IJMatrixAddToValues(B_bc, 1, &ncols, &row, &col, &B_val);
597  }
598 
599  if (insert_error != 0)
600  {
601  if (comm.id == 0)
602  printf("There was an error with entry A(%lld, %lld) = %f or B(%lld, %lld) = %f\n", row, col, A_val, row, col, B_val);
603 
604  exit(EXIT_FAILURE);
605  }
606  }
607 
608  int row = idx[t_map[t][i]] + e * n_xyz;
609  int col = idx[t_map[t][j]] + e * n_xyz;
610  Bd_sum[row] += B_loc[i][j];
611  Bd_sum[col] += B_loc[i][j];
612  }
613  }
614  }
615  }
616  }
617  }
618  }
619 
620  for (idx = 0; idx < n_xyze; idx++)
621  Bd_sum[idx] /= 2.0;
622 
623  gs(Bd_sum, gs_double, gs_add, 0, gsh, 0);
624 
625  for (row = row_start; row <= row_end; row++)
626  HYPRE_IJVectorSetValues(Bd_bc, 1, &row, &(Bd_sum[dof_map[row - row_start]]));
627 
628  HYPRE_IJMatrixAssemble(A_bc);
629  HYPRE_IJMatrixGetObject(A_bc, (void**) &A_fem);
630 
631  HYPRE_IJMatrixAssemble(B_bc);
632  HYPRE_IJMatrixGetObject(B_bc, (void**) &B_fem);
633 
634  HYPRE_IJVectorAssemble(Bd_bc);
635  HYPRE_IJVectorGetObject(Bd_bc, (void**) &Bd_fem);
636 
637  /* Free memory */
638  mem_free_1D_long(&ranking, n_xyze);
639  mem_free_1D_long(&glo_num, n_xyze);
640  mem_free_1D_double(&Bd_sum, n_xyze);
641  mem_free_2D_double(&q_r, n_quad, n_dim);
642  mem_free_1D_double(&q_w, n_quad);
643  mem_free_2D_int(&v_coord, pow(n_dim, 2), n_dim);
644  mem_free_2D_int(&t_map, num_fem, n_dim + 1);
645  mem_free_2D_double(&A_loc, n_dim + 1, n_dim + 1);
646  mem_free_2D_double(&B_loc, n_dim + 1, n_dim + 1);
647  mem_free_2D_double(&J_xr, n_dim, n_dim);
648  mem_free_2D_double(&J_rx, n_dim, n_dim);
649  mem_free_2D_double(&x_t, n_dim, n_dim + 1);
650  mem_free_1D_double(&q_x, n_dim);
651  mem_free_1D_double(&dp, n_dim);
652 }
653 
654 void quadrature_rule(double ***q_r, double **q_w, int n_quad, int n_dim)
655 {
656  (*q_r) = mem_alloc_2D_double(n_quad, n_dim);
657  (*q_w) = mem_alloc_1D_double(n_quad);
658 
659  if (n_dim == 2)
660  {
661  if (n_quad == 3)
662  {
663  (*q_r)[0][0] = 1.0 / 6.0; (*q_r)[0][1] = 1.0 / 6.0;
664  (*q_r)[1][0] = 2.0 / 3.0; (*q_r)[1][1] = 1.0 / 6.0;
665  (*q_r)[2][0] = 1.0 / 6.0; (*q_r)[2][1] = 2.0 / 3.0;
666 
667  (*q_w)[0] = 1.0 / 6.0;
668  (*q_w)[1] = 1.0 / 6.0;
669  (*q_w)[2] = 1.0 / 6.0;
670  }
671  else if (n_quad == 4)
672  {
673  (*q_r)[0][0] = 1.0 / 3.0; (*q_r)[0][1] = 1.0 / 3.0;
674  (*q_r)[1][0] = 1.0 / 5.0; (*q_r)[1][1] = 3.0 / 5.0;
675  (*q_r)[2][0] = 1.0 / 5.0; (*q_r)[2][1] = 1.0 / 5.0;
676  (*q_r)[3][0] = 3.0 / 5.0; (*q_r)[3][1] = 1.0 / 5.0;
677 
678  (*q_w)[0] = - 27.0 / 96.0;
679  (*q_w)[1] = 25.0 / 96.0;
680  (*q_w)[2] = 25.0 / 96.0;
681  (*q_w)[3] = 25.0 / 96.0;
682  }
683  else
684  {
685  printf("No quadrature rule for %d points available\n", n_quad);
686  exit(EXIT_FAILURE);
687  }
688  }
689  else
690  {
691  if (n_quad == 4)
692  {
693  double a = (5.0 + 3.0 * sqrt(5.0)) / 20.0;
694  double b = (5.0 - sqrt(5.0)) / 20.0;
695 
696  (*q_r)[0][0] = a; (*q_r)[0][1] = b; (*q_r)[0][2] = b;
697  (*q_r)[1][0] = b; (*q_r)[1][1] = a; (*q_r)[1][2] = b;
698  (*q_r)[2][0] = b; (*q_r)[2][1] = b; (*q_r)[2][2] = a;
699  (*q_r)[3][0] = b; (*q_r)[3][1] = b; (*q_r)[3][2] = b;
700 
701  (*q_w)[0] = 1.0 / 24.0;
702  (*q_w)[1] = 1.0 / 24.0;
703  (*q_w)[2] = 1.0 / 24.0;
704  (*q_w)[3] = 1.0 / 24.0;
705  }
706  else if (n_quad == 5)
707  {
708  (*q_r)[0][0] = 1.0 / 2.0; (*q_r)[0][1] = 1.0 / 6.0; (*q_r)[0][2] = 1.0 / 6.0;
709  (*q_r)[1][0] = 1.0 / 6.0; (*q_r)[1][1] = 1.0 / 2.0; (*q_r)[1][2] = 1.0 / 6.0;
710  (*q_r)[2][0] = 1.0 / 6.0; (*q_r)[2][1] = 1.0 / 6.0; (*q_r)[2][2] = 1.0 / 2.0;
711  (*q_r)[3][0] = 1.0 / 6.0; (*q_r)[3][1] = 1.0 / 6.0; (*q_r)[3][2] = 1.0 / 6.0;
712  (*q_r)[4][0] = 1.0 / 4.0; (*q_r)[4][1] = 1.0 / 4.0; (*q_r)[4][2] = 1.0 / 4.0;
713 
714  (*q_w)[0] = 9.0 / 20.0;
715  (*q_w)[1] = 9.0 / 20.0;
716  (*q_w)[2] = 9.0 / 20.0;
717  (*q_w)[3] = 9.0 / 20.0;
718  (*q_w)[4] = - 4.0 / 5.0;
719  }
720  else
721  {
722  printf("No quadrature rule for %d points available\n", n_quad);
723  exit(EXIT_FAILURE);
724  }
725  }
726 }
727 
728 void mesh_connectivity(int ***v_coord, int ***t_map, int num_fem, int n_dim)
729 {
730  (*v_coord) = mem_alloc_2D_int(pow(n_dim, 2), n_dim);
731  (*t_map) = mem_alloc_2D_int(num_fem, n_dim + 1);
732 
733  if (n_dim == 2)
734  {
735  (*v_coord)[0][0] = 0; (*v_coord)[0][1] = 0;
736  (*v_coord)[1][0] = 1; (*v_coord)[1][1] = 0;
737  (*v_coord)[2][0] = 0; (*v_coord)[2][1] = 1;
738  (*v_coord)[3][0] = 1; (*v_coord)[3][1] = 1;
739 
740  if (num_fem == 2)
741  {
742  (*t_map)[0][0] = 0; (*t_map)[0][1] = 1; (*t_map)[0][2] = 3;
743  (*t_map)[1][0] = 0; (*t_map)[1][1] = 3; (*t_map)[1][2] = 2;
744  }
745  else if (num_fem == 4)
746  {
747  (*t_map)[0][0] = 1; (*t_map)[0][1] = 2; (*t_map)[0][2] = 0;
748  (*t_map)[1][0] = 3; (*t_map)[1][1] = 0; (*t_map)[1][2] = 1;
749  (*t_map)[2][0] = 0; (*t_map)[2][1] = 3; (*t_map)[2][2] = 2;
750  (*t_map)[3][0] = 2; (*t_map)[3][1] = 1; (*t_map)[3][2] = 3;
751  }
752  else
753  {
754  printf("Wrong number of triangles\n");
755  exit(EXIT_FAILURE);
756  }
757  }
758  else
759  {
760  (*v_coord)[0][0] = 0; (*v_coord)[0][1] = 0; (*v_coord)[0][2] = 0;
761  (*v_coord)[1][0] = 1; (*v_coord)[1][1] = 0; (*v_coord)[1][2] = 0;
762  (*v_coord)[2][0] = 0; (*v_coord)[2][1] = 1; (*v_coord)[2][2] = 0;
763  (*v_coord)[3][0] = 1; (*v_coord)[3][1] = 1; (*v_coord)[3][2] = 0;
764  (*v_coord)[4][0] = 0; (*v_coord)[4][1] = 0; (*v_coord)[4][2] = 1;
765  (*v_coord)[5][0] = 1; (*v_coord)[5][1] = 0; (*v_coord)[5][2] = 1;
766  (*v_coord)[6][0] = 0; (*v_coord)[6][1] = 1; (*v_coord)[6][2] = 1;
767  (*v_coord)[7][0] = 1; (*v_coord)[7][1] = 1; (*v_coord)[7][2] = 1;
768 
769  if (num_fem == 6)
770  {
771  (*t_map)[0][0] = 0; (*t_map)[0][1] = 2; (*t_map)[0][2] = 1; (*t_map)[0][3] = 5;
772  (*t_map)[1][0] = 1; (*t_map)[1][1] = 2; (*t_map)[1][2] = 3; (*t_map)[1][3] = 5;
773  (*t_map)[2][0] = 0; (*t_map)[2][1] = 4; (*t_map)[2][2] = 2; (*t_map)[2][3] = 5;
774  (*t_map)[3][0] = 5; (*t_map)[3][1] = 3; (*t_map)[3][2] = 7; (*t_map)[3][3] = 2;
775  (*t_map)[4][0] = 4; (*t_map)[4][1] = 5; (*t_map)[4][2] = 6; (*t_map)[4][3] = 2;
776  (*t_map)[5][0] = 5; (*t_map)[5][1] = 7; (*t_map)[5][2] = 6; (*t_map)[5][3] = 2;
777  }
778  else if (num_fem == 8)
779  {
780  (*t_map)[0][0] = 0; (*t_map)[0][1] = 2; (*t_map)[0][2] = 1; (*t_map)[0][3] = 4;
781  (*t_map)[1][0] = 1; (*t_map)[1][1] = 0; (*t_map)[1][2] = 3; (*t_map)[1][3] = 5;
782  (*t_map)[2][0] = 2; (*t_map)[2][1] = 6; (*t_map)[2][2] = 3; (*t_map)[2][3] = 0;
783  (*t_map)[3][0] = 3; (*t_map)[3][1] = 2; (*t_map)[3][2] = 7; (*t_map)[3][3] = 1;
784  (*t_map)[4][0] = 4; (*t_map)[4][1] = 5; (*t_map)[4][2] = 6; (*t_map)[4][3] = 0;
785  (*t_map)[5][0] = 5; (*t_map)[5][1] = 7; (*t_map)[5][2] = 4; (*t_map)[5][3] = 1;
786  (*t_map)[6][0] = 6; (*t_map)[6][1] = 7; (*t_map)[6][2] = 2; (*t_map)[6][3] = 4;
787  (*t_map)[7][0] = 7; (*t_map)[7][1] = 3; (*t_map)[7][2] = 6; (*t_map)[7][3] = 5;
788  }
789  else
790  {
791  printf("Wrong number of tetrahedrals\n");
792  exit(EXIT_SUCCESS);
793  }
794  }
795 }
796 
797 void x_map(double **x, double *r, double **x_t, int n_dim, Basis *phi)
798 {
799  int i, d;
800 
801  for (d = 0; d < n_dim; d++)
802  {
803  (*x)[d] = 0.0;
804 
805  for (i = 0; i < n_dim + 1; i++)
806  {
807  (*x)[d] += x_t[d][i] * phi[i](r);
808  }
809  }
810 }
811 
812 void J_xr_map(double ***J_xr, double *r, double **x_t, int n_dim, DBasis *dphi)
813 {
814  int i, j, k;
815  double *deriv = mem_alloc_1D_double(n_dim);
816 
817  for (i = 0; i < n_dim; i++)
818  {
819  for (j = 0; j < n_dim; j++)
820  {
821  (*J_xr)[i][j] = 0.0;
822 
823  for (k = 0; k < n_dim + 1; k++)
824  {
825  dphi[k](&deriv, r);
826 
827  (*J_xr)[i][j] += x_t[i][k] * deriv[j];
828  }
829  }
830  }
831 
832  mem_free_1D_double(&deriv, n_dim);
833 }
834 
835 /* Basis functions and derivatives in 2D */
836 double phi_2D_1(double *r) { return r[0]; }
837 double phi_2D_2(double *r) { return r[1]; }
838 double phi_2D_3(double *r) { return 1.0 - r[0] - r[1]; }
839 void dphi_2D_1(double **dp, double *r) { (*dp)[0] = 1.0; (*dp)[1] = 0.0; }
840 void dphi_2D_2(double **dp, double *r) { (*dp)[0] = 0.0; (*dp)[1] = 1.0; }
841 void dphi_2D_3(double **dp, double *r) { (*dp)[0] = -1.0; (*dp)[1] = -1.0; }
842 
843 /* Basis functions and derivatives in 3D */
844 double phi_3D_1(double *r) { return r[0]; }
845 double phi_3D_2(double *r) { return r[1]; }
846 double phi_3D_3(double *r) { return r[2]; }
847 double phi_3D_4(double *r) { return 1.0 - r[0] - r[1] - r[2]; }
848 void dphi_3D_1(double **dp, double *r) { (*dp)[0] = 1.0; (*dp)[1] = 0.0; (*dp)[2] = 0.0; }
849 void dphi_3D_2(double **dp, double *r) { (*dp)[0] = 0.0; (*dp)[1] = 1.0; (*dp)[2] = 0.0; }
850 void dphi_3D_3(double **dp, double *r) { (*dp)[0] = 0.0; (*dp)[1] = 0.0; (*dp)[2] = 1.0; }
851 void dphi_3D_4(double **dp, double *r) { (*dp)[0] = -1.0; (*dp)[1] = -1.0; (*dp)[2] = -1.0; }
852 
853 /* Math functions */
854 long long maximum(long long a, long long b)
855 {
856  return a > b ? a : b;
857 }
858 
859 double determinant(double** A, int n)
860 {
861  /*
862  * Computes the determinant of a matrix
863  */
864  if (n == 2)
865  {
866  double d_1 = A[0][0] * A[1][1] - A[1][0] * A[0][1];
867 
868  return d_1;
869  }
870  else if (n == 3)
871  {
872  double d_1 = A[0][0] * (A[1][1] * A[2][2] - A[2][1] * A[1][2]);
873  double d_2 = A[0][1] * (A[1][0] * A[2][2] - A[2][0] * A[1][2]);
874  double d_3 = A[0][2] * (A[1][0] * A[2][1] - A[2][0] * A[1][1]);
875 
876  return d_1 - d_2 + d_3;
877  }
878  else if (n == 4)
879  {
880  double d_1 = A[0][0];
881  double d_11 = A[1][1] * (A[2][2] * A[3][3] - A[3][2] * A[2][3]);
882  double d_12 = A[1][2] * (A[2][1] * A[3][3] - A[3][1] * A[2][3]);
883  double d_13 = A[1][3] * (A[2][1] * A[3][2] - A[3][1] * A[2][2]);
884 
885  double d_2 = A[0][1];
886  double d_21 = A[1][0] * (A[2][2] * A[3][3] - A[3][2] * A[2][3]);
887  double d_22 = A[1][2] * (A[2][0] * A[3][3] - A[3][0] * A[2][3]);
888  double d_23 = A[1][3] * (A[2][0] * A[3][2] - A[3][0] * A[2][2]);
889 
890  double d_3 = A[0][2];
891  double d_31 = A[1][0] * (A[2][1] * A[3][3] - A[3][1] * A[2][3]);
892  double d_32 = A[1][1] * (A[2][0] * A[3][3] - A[3][0] * A[2][3]);
893  double d_33 = A[1][3] * (A[2][0] * A[3][1] - A[3][0] * A[2][1]);
894 
895  double d_4 = A[0][3];
896  double d_41 = A[1][0] * (A[2][1] * A[3][2] - A[3][1] * A[2][2]);
897  double d_42 = A[1][1] * (A[2][0] * A[3][2] - A[3][0] * A[2][2]);
898  double d_43 = A[1][2] * (A[2][0] * A[3][1] - A[3][0] * A[2][1]);
899 
900  return d_1 * (d_11 - d_12 + d_13) - d_2 * (d_21 - d_22 + d_23) + d_3 * (d_31 - d_32 + d_33) - d_4 * (d_41 - d_42 + d_43);
901  }
902  else
903  {
904  printf("ERROR: No determinant implementation for matrices of size > 4\n");
905  exit(EXIT_FAILURE);
906  }
907 }
908 
909 void inverse(double*** inv_A, double** A, int n)
910 {
911  /*
912  * Computes the inverse of a matrix
913  */
914  double det_A = determinant(A, n);
915 
916  if (n == 2)
917  {
918  (*inv_A)[0][0] = (1.0 / det_A) * A[1][1];
919  (*inv_A)[0][1] = -(1.0 / det_A) * A[0][1];
920  (*inv_A)[1][0] = -(1.0 / det_A) * A[1][0];
921  (*inv_A)[1][1] = (1.0 / det_A) * A[0][0];
922  }
923  else if (n == 3)
924  {
925  (*inv_A)[0][0] = (1.0 / det_A) * (A[1][1] * A[2][2] - A[2][1] * A[1][2]);
926  (*inv_A)[0][1] = (1.0 / det_A) * (A[0][2] * A[2][1] - A[2][2] * A[0][1]);
927  (*inv_A)[0][2] = (1.0 / det_A) * (A[0][1] * A[1][2] - A[1][1] * A[0][2]);
928  (*inv_A)[1][0] = (1.0 / det_A) * (A[1][2] * A[2][0] - A[2][2] * A[1][0]);
929  (*inv_A)[1][1] = (1.0 / det_A) * (A[0][0] * A[2][2] - A[2][0] * A[0][2]);
930  (*inv_A)[1][2] = (1.0 / det_A) * (A[0][2] * A[1][0] - A[1][2] * A[0][0]);
931  (*inv_A)[2][0] = (1.0 / det_A) * (A[1][0] * A[2][1] - A[2][0] * A[1][1]);
932  (*inv_A)[2][1] = (1.0 / det_A) * (A[0][1] * A[2][0] - A[2][1] * A[0][0]);
933  (*inv_A)[2][2] = (1.0 / det_A) * (A[0][0] * A[1][1] - A[1][0] * A[0][1]);
934  }
935  else
936  {
937  printf("ERROR: No inverse implementation for matrices of size > 3\n");
938  exit(EXIT_FAILURE);
939  }
940 }
941 
942 /* Memory management */
943 int* mem_alloc_1D_int(int n)
944 {
945  int *array = (int*) malloc(n * sizeof(int));
946 
947  return array;
948 }
949 
950 long long* mem_alloc_1D_long(int n)
951 {
952  long long *array = (long long*) malloc(n * sizeof(long long));
953 
954  return array;
955 }
956 
957 double* mem_alloc_1D_double(int n)
958 {
959  double *array = (double*) malloc(n * sizeof(double));
960 
961  return array;
962 }
963 
964 int** mem_alloc_2D_int(int n, int m)
965 {
966  int i;
967  int **array = (int**) malloc(n * sizeof(int*));
968 
969  for (i = 0; i < n; i++)
970  array[i] = (int*) malloc(m * sizeof(int));
971 
972  return array;
973 }
974 
975 double** mem_alloc_2D_double(int n, int m)
976 {
977  int i;
978  double **array = (double**) malloc(n * sizeof(double*));
979 
980  for (i = 0; i < n; i++)
981  array[i] = (double*) malloc(m * sizeof(double));
982 
983  return array;
984 }
985 
986 void mem_free_1D_int(int **array, int n)
987 {
988  free((*array));
989 }
990 
991 void mem_free_1D_long(long long **array, int n)
992 {
993  free((*array));
994 }
995 
996 void mem_free_1D_double(double **array, int n)
997 {
998  free((*array));
999 }
1000 
1001 void mem_free_2D_int(int ***array, int n, int m)
1002 {
1003  int i;
1004 
1005  for (i = 0; i < n; i++)
1006  free((*array)[i]);
1007 
1008  free((*array));
1009 }
1010 
1011 void mem_free_2D_double(double ***array, int n, int m)
1012 {
1013  int i;
1014 
1015  for (i = 0; i < n; i++)
1016  free((*array)[i]);
1017 
1018  free((*array));
1019 }
1020 #else
1021 void fem_amg_setup(const sint *n_x_, const sint *n_y_, const sint *n_z_,
1022  const sint *n_elem_, const sint *n_dim_,
1023  double *x_m_, double *y_m_, double *z_m_,
1024  double *pmask_, double *binv_, const sint *nullspace,
1025  const sint *gshf)
1026 {
1027  fail(1,__FILE__,__LINE__,"please recompile with HYPRE support");
1028 }
1029 void fem_amg_solve(double *z, double *w)
1030 {
1031  fail(1,__FILE__,__LINE__,"please recompile with HYPRE support");
1032 }
1033 #endif
#define fem_amg_solve
Definition: fem_amg_preco.c:9
#define fem_amg_setup
Definition: fem_amg_preco.c:8
void dphi_3D_4(double **, double *)
void dphi_3D_3(double **, double *)
void mem_free_2D_double(double ***, int, int)
void mem_free_1D_double(double **, int)
long long * mem_alloc_1D_long(int)
void mem_free_2D_int(int ***, int, int)
void fem_assembly()
void dphi_2D_2(double **, double *)
void matrix_distribution()
long long maximum(long long, long long)
void J_xr_map(double ***, double *, double **, int, DBasis *)
double phi_3D_2(double *)
double phi_2D_2(double *)
double phi_3D_3(double *)
int * mem_alloc_1D_int(int)
void mesh_connectivity(int ***, int ***, int, int)
void quadrature_rule(double ***, double **, int, int)
double(* Basis)(double *)
Definition: fem_amg_preco.h:1
double phi_3D_1(double *)
void inverse(double ***, double **, int)
int ** mem_alloc_2D_int(int, int)
double * mem_alloc_1D_double(int)
double determinant(double **, int)
void dphi_3D_1(double **, double *)
void mem_free_1D_long(long long **, int)
double phi_3D_4(double *)
double ** mem_alloc_2D_double(int, int)
void dphi_2D_1(double **, double *)
double phi_2D_3(double *)
void dphi_3D_2(double **, double *)
void dphi_2D_3(double **, double *)
void(* DBasis)(double **, double *)
Definition: fem_amg_preco.h:2
void mem_free_1D_int(int **, int)
double phi_2D_1(double *)
void x_map(double **, double *, double **, int, Basis *)
The nomenclature of the interpolating fields saved by Nek into the binary file int_fld is here explained w
Definition: field_list.txt:2
struct comm comm
Definition: fem_amg_preco.h:43