Hello @damianmoz and @annacfs, wrt parallelizing the SOR method, one possible option is to use a "checker-board" approach as described on this page: CS267: Notes for Lectures 15 and 16, Mar 5 and 7, 1996.
The basic idea is to split your array into two groups of non-adjacent elements (in 2D, this ends up looking like the squares of a checkerboard). Then, the new values of each group can be computed in parallel, because they don't rely on the values of the neighboring elements.
I've put together a modified version of your code that uses this approach. It also makes some small adjustments to the array initialization, and simplifies the calc_erro
proc by doing a maximum reduction at the same time as the computation.
I can't be 100% sure that this will match results from your original Fortran code, but hopefully it's a helpful start (at least conceptually) for parallelizing that loop:
use Time;
var runtime: Timer;
// set up domains
config const imax, jmax, kmax = 10;
const domPoisson = {1..imax, 1..jmax, 1..kmax}, // the full domain
dpInner = domPoisson[domPoisson.expand(-1)]; // the full domain, not including boundaries
// set up arrays
var f, tf : [domPoisson] real = 0.0;
// set up other parameters
const Lx = 1.0, Ly = 1.0, Lz = 1.0 : real;
const dx = Lx/imax, dy = Ly/jmax, dz = Lz/kmax : real;
const w = 1.3 : real;
const dx2 = dx*dx, dy2 = dy*dy, dz2 = dz*dz: real;
const beta1 = dx2/dy2,
beta2 = dx2/dz2 : real;
const tol = 1E-6: real;
const NPRIN = 500;
var i, j, k: int;
var itera: int = 0;
var erro : real = 1E0;
var max_f, min_f : real;
// create arrays of x,y,z coordinates so that we don't have to recompute them during iteration:
const x_coords = [i in domPoisson.dim(0)] (i - 0.5) * dx,
y_coords = [j in domPoisson.dim(1)] (j - 0.5) * dy,
z_coords = [k in domPoisson.dim(2)] (k - 0.5) * dz;
// initialize source term:
forall (i, j, k) in domPoisson do
tf[i, j, k] = -3.0 * sin(x_coords[i]) * cos(y_coords[j]) * sin(z_coords[k]);
// initialize boundary conditions:
forall j in domPoisson.dim(1) {
for k in domPoisson.dim(2) {
f(1, j, k) = sin(0.5) * cos(y_coords[j]) * sin(z_coords[k]);
f(imax, j, k) = sin(x_coords[imax]) * cos(y_coords[j]) * sin(z_coords[k]);
}
}
forall i in domPoisson.dim(0) {
for k in domPoisson.dim(2) {
f(i, 1, k) = sin(x_coords[i]) * cos(0.5) * sin(z_coords[k]);
f(i, jmax, k) = sin(x_coords[i]) * cos(y_coords[jmax]) * sin(z_coords[k]);
}
}
forall i in domPoisson.dim(0) {
for j in domPoisson.dim(1) {
f(i, j, 1) = sin(x_coords[1]) * cos(y_coords[j]) * sin(0.5);
f(i, j, kmax) = sin(x_coords[1]) * cos(y_coords[j]) * sin(z_coords[kmax]);
}
}
// temporary copy of 'f'
var f_next = f;
runtime.start();
while erro >= tol {
// compute 'f_next'
sor_parallel(f_next, f, tf, dx2, beta1, beta2, w);
// swap 'f_next' into 'f' for the next iteration
f <=> f_next;
// compute the error residual
erro = calc_erro(f, tf, dx2, dy2, dz2);
itera += 1;
if (mod(itera,NPRIN) == 0) {
writef('%i %6.12dr\n', itera, erro);
}
}
runtime.stop();
escreve(f, imax, jmax, kmax, dx, dy, dz);
max_f = max reduce(f);
min_f = min reduce(f);
writef("%12.8er %12.8er \n", max_f, min_f);
writef(" CPU TIME CHAPEL = %10.16dr \n", runtime.elapsed());
writef("%i %12.8er \n",itera, erro);
writef("------------------------------------------------------------------------------ \n");
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc sor_parallel(ref f_next, const ref f, const ref tf, dx2: real, beta1: real, beta2: real, w: real) {
// update the "red spaces" in the checkerboard domain in parallel
forall (i, j, k) in dpInner {
if ((i+j)%2 == 0 && k%2 == 0) || ((i+j)%2 != 0 && k%2 != 0) {
f_next(i,j,k) = (1.0 - w) * f(i, j, k) + w *
(-dx2*tf(i,j,k) + f(i-1,j,k) + f(i+1,j,k) +
beta1*( f(i,j-1,k) + f(i,j+1,k) ) +
beta2*( f(i,j,k-1) + f(i,j,k+1) )) /
( 2.0*(beta1 + beta2 + 1.0) );
}
}
// update the "black spaces" in the checkerboard domain in parallel
forall (i, j, k) in dpInner {
if ((i+j)%2 != 0 && k%2 == 0) || ((i+j)%2 == 0 && k%2 != 0) {
f_next(i,j,k) = (1.0 - w) * f(i, j, k) + w *
(-dx2*tf(i,j,k) + f_next(i-1,j,k) + f_next(i+1,j,k) +
beta1*( f_next(i,j-1,k) + f_next(i,j+1,k) ) +
beta2*( f_next(i,j,k-1) + f_next(i,j,k+1) )) /
( 2.0*(beta1 + beta2 + 1.0) );
}
}
}
proc calc_erro(f, tf, dx2: real, dy2: real, dz2: real): real {
// do a min-reduction during error calculation,
// so that results don't have to be stored in 'residuo'
var max_error: real;
forall (i, j, k) in dpInner with (max reduce max_error) {
max_error reduce= abs(tf(i,j,k) -
(f(i-1,j,k) - 2.0*f(i,j,k) + f(i+1,j,k))/dx2 -
(f(i,j-1,k) - 2.0*f(i,j,k) + f(i,j+1,k))/dy2 -
(f(i,j,k-1) - 2.0*f(i,j,k) + f(i,j,k+1))/dz2);
}
return max_error;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc escreve(f, imax: int, jmax: int, kmax: int, dx: real, dy: real, dz: real){
use IO;
var erro: real;
const saida = openwriter("saida_chapel_par.csv");
saida.writef(" %s %s %s %s %s %s %s %s %s \n", "x", ",", "y", ",", "z", ",", "f", ",", "e");
for (i, j, k) in domPoisson {
erro = abs( f(i, j, k) - (sin(x_coords[i]) * cos(y_coords[j]) * sin(z_coords[k])) );
saida.writef("%10.12dr %s %10.12dr %s %10.12dr %s %10.12dr %s %10.12dr \n",
x_coords[i], ",", y_coords[j], ",", z_coords[k], ",", f(i,j,k), ",", erro);
}
saida.close();
}