Hello guys,
I'm writing code to solve the navier stokes equations but the poisson problem isn't scaled as I'd like. Can anyone help me optimize the code?
Attached is a brief description and the code: PDF description
I'm using single locale;
CHPL_HOST_PLATFORM: linux64
CHPL_HOST_COMPILER: gnu
CHPL_HOST_CC: gcc
CHPL_HOST_CXX: g++
CHPL_HOST_ARCH: x86_64
CHPL_TARGET_PLATFORM: linux64
CHPL_TARGET_COMPILER: gnu
CHPL_TARGET_CC: gcc
CHPL_TARGET_CXX: g++
CHPL_TARGET_ARCH: x86_64
CHPL_TARGET_CPU: native
CHPL_LOCALE_MODEL: flat
CHPL_GPU_CODEGEN: none
CHPL_COMM: none
CHPL_TASKS: fifo *
CHPL_LAUNCHER: none
CHPL_TIMERS: generic
CHPL_UNWIND: none
CHPL_HOST_MEM: cstdlib
CHPL_MEM: cstdlib *
CHPL_ATOMICS: cstdlib
CHPL_GMP: none
CHPL_HWLOC: none *
CHPL_RE2: none
CHPL_LLVM: unset
CHPL_LLVM_CONFIG: none
CHPL_LLVM_VERSION: 0
CHPL_AUX_FILESYS: none
CHPL_LIB_PIC: none
CHPL_SANITIZE: none
CHPL_SANITIZE_EXE: none
module load chapel/1.27.0
source /modules/apps/chapel/chapel-1.27.0/util/setchplenv.bash && export CHPL_MEM=cstdlib CHPL_HWLOC=none CHPL_TASKS=fifo CHPL_RT_NUM_THREADS_PER_LOCALE=2
cd /lustre/anna2/poisson/chapel/grid130/
chpl --fast eliptico3D_coforall.chpl
time ./eliptico3D_coforall --imax=130 --jmax=130 --kmax=130 --npx=1 --npy=1 --npz=2 > coforall_130_112
chpl --fast eliptico3D_forall.chpl
time ./eliptico3D_forall --imax=130 --jmax=130 --kmax=130 > forall_130_2
Serial
// Finite Difference Solver for the Poisson equation (3D)
// Author: JESUS, Anna && GRION, Livia
// Last Modification: 20/11/2022
// Serial version
use Time;
var runtime: Timer;
config const imax, jmax, kmax : int;
const Lx = 1.0, Ly = 1.0, Lz = 1.0 : real;
const dx = Lx/imax, dy = Ly/jmax, dz = Lz/kmax : real;
const w = 1.3 : real;
const dx2 = dx*dx, dy2 = dy*dy, dz2 = dz*dz: real;
const beta1 = dx2/dy2 : real;
const beta2 = dx2/dz2 : real;
const tol = 1E-6: real;
const NPRIN = 500: int;
var f, tf, residuo : [1..imax, 1..jmax, 1..kmax] real;
var i, j, k: int;
var itera: int;
var x, y, z, erro : real;
var max_f, min_f : real;
runtime.start();
serial{
itera = 0;
residuo = 0.0;
for k in 1..kmax do{
z = (k-0.5)*dz ;
for j in 1..jmax do{
y = (j-0.5)*dy;
for i in 1..imax do{
x = (i-0.5)*dx;
tf(i,j,k) = -3.0*sin(x)*cos(y)*sin(z);
}
}
}
// Direcao - x
for j in 1..jmax do{
y = (j-0.5)*dy;
for k in 1..kmax do{
z = (k-0.5)*dz;
x = 0.5;
f(1,j,k) = sin(x)*cos(y)*sin(z);
x = (imax-0.5)*dx;
f(imax,j,k) = sin(x)*cos(y)*sin(z);
}
}
// Direcao - y
for i in 1..imax do{
x = (i-0.5)*dx;
for k in 1..kmax do{
z = (k-0.5)*dy;
y = 0.5;
f(i,1,k) = sin(x)*cos(y)*sin(z);
y = (jmax-0.5)*dy;
f(i,jmax,k) = sin(x)*cos(y)*sin(z);
}
}
// Direcao - z
for i in 1..imax do{
x = (i-0.5)*dx;
for j in 1..jmax do{
y = (j-0.5)*dy;
z = 0.5;
f(i,j,1) = sin(x)*cos(y)*sin(z);
z = (kmax-0.5)*dz ;
f(i,j,kmax) = sin(x)*cos(y)*sin(z);
}
}
erro = 1E0;
itera = 0;
while (erro >= tol) do{
f = sor_method(f, tf, imax, jmax, kmax, dx2, beta1, beta2, w);
residuo = calc_erro(f, tf, imax, jmax, kmax, dx2, dy2, dz2);
erro = max reduce(residuo);
itera += 1;
if (mod(itera,NPRIN) == 0) then{
writef('%i %6.12dr\n', itera, erro);
}
}
} // fim serial
runtime.stop();
escreve(f, imax, jmax, kmax, dx, dy, dz);
max_f = max reduce(f);
min_f = min reduce(f);
writef("%12.8er %12.8er \n", max_f, min_f);
writef(" CPU TIME CHAPEL = %10.16dr \n",runtime.elapsed());
writef("%i %12.8er \n",itera, erro);
writef("------------------------------------------------------------------------------ \n");
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc sor_method(f, tf, imax: int, jmax: int, kmax: int, dx2: real, beta1: real, beta2: real, w: real){
for i in 2..imax-1 do{
for j in 2..jmax-1 do{
for k in 2..kmax-1 do{
f(i,j,k) = (1.0 - w)*f(i,j,k) +
w * (-dx2*tf(i,j,k) + f(i-1,j,k) + f(i+1,j,k) +
beta1*( f(i,j-1,k) + f(i,j+1,k) ) +
beta2*( f(i,j,k-1) + f(i,j,k+1) ))/ ( 2.0*(beta1 + beta2 + 1.0) ) ;
}
}
}
return f;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc calc_erro(f, tf, imax: int, jmax: int, kmax: int, dx2: real, dy2: real, dz2: real){
var errol : [1..imax,1..jmax, 1..kmax] real;
errol = 0.0;
for i in 2..imax-1 do{
for j in 2..jmax-1 do{
for k in 2..kmax-1 do{
errol(i,j,k) = abs(tf(i,j,k) -
(f(i-1,j,k) - 2.0*f(i,j,k) + f(i+1,j,k))/dx2 -
(f(i,j-1,k) - 2.0*f(i,j,k) + f(i,j+1,k))/dy2 -
(f(i,j,k-1) - 2.0*f(i,j,k) + f(i,j,k+1))/dz2);
}
}
}
return errol;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc escreve(f, imax: int, jmax: int, kmax: int, dx: real, dy: real, dz: real){
use IO;
var erro: [1..imax,1..jmax, 1..kmax] real;
erro = 0.0;
const saida = openwriter("saida_serial.csv");
saida.writef(" %s %s %s %s %s %s %s %s %s \n", "x", ",", "y", ",", "z", ",", "f", ",", "e");
for k in 1..kmax do{
z = (k-0.5)*dz ;
for j in 1..jmax do{
y = (j-0.5)*dy;
for i in 1..imax do{
x = (i-0.5)*dx;
erro(i,j,k) = abs(f(i,j,k) - sin(x)*cos(y)*sin(z));
saida.writef("%10.12dr %s %10.12dr %s %10.12dr %s %10.12dr %s %10.12dr \n",
x, ",", y, ",", z, ",", f(i,j,k), ",", erro(i,j,k));
}
}
}
saida.close();
}
Coforall
// Finite Difference Solver for the Poisson equation (3D)
// Author: JESUS, Anna && GRION, Livia
// Last Modification: 20/11/2022
// Serial version
use Time;
var runtime: Timer;
config const imax, jmax, kmax : int;
config const npx, npy, npz: int;
const ptsx = (imax-2)/npx, ptsy = (jmax-2)/npy, ptsz = (kmax-2)/npz : int;
const Lx = 1.0, Ly = 1.0, Lz = 1.0 : real;
const dx = Lx/imax, dy = Ly/jmax, dz = Lz/kmax : real;
const w = 1.3 : real;
const dx2 = dx*dx, dy2 = dy*dy, dz2 = dz*dz: real;
const beta1 = dx2/dy2 : real;
const beta2 = dx2/dz2 : real;
const tol = 1E-6: real;
const NPRIN = 500;
const Inner = {2..imax-1, 2..jmax-1, 2..kmax-1};
var f, tf, residuo : [1..imax, 1..jmax, 1..kmax] real;
var i, j, k: int;
var itera: int;
var x, y, z, erro : real;
var max_f, min_f : real;
runtime.start();
itera = 0;
residuo = 0.0;
for k in 1..kmax do{
z = (k-0.5)*dz ;
for j in 1..jmax do{
y = (j-0.5)*dy;
for i in 1..imax do{
x = (i-0.5)*dx;
tf(i,j,k) = -3.0*sin(x)*cos(y)*sin(z);
}
}
}
// Direcao - x
for j in 1..jmax do{
y = (j-0.5)*dy;
for k in 1..kmax do{
z = (k-0.5)*dz;
x = 0.5;
f(1,j,k) = sin(x)*cos(y)*sin(z);
x = (imax-0.5)*dx;
f(imax,j,k) = sin(x)*cos(y)*sin(z);
}
}
// Direcao - y
for i in 1..imax do{
x = (i-0.5)*dx;
for k in 1..kmax do{
z = (k-0.5)*dy;
y = 0.5;
f(i,1,k) = sin(x)*cos(y)*sin(z);
y = (jmax-0.5)*dy;
f(i,jmax,k) = sin(x)*cos(y)*sin(z);
}
}
// Direcao - z
for i in 1..imax do{
x = (i-0.5)*dx;
for j in 1..jmax do{
y = (j-0.5)*dy;
z = 0.5;
f(i,j,1) = sin(x)*cos(y)*sin(z);
z = (kmax-0.5)*dz ;
f(i,j,kmax) = sin(x)*cos(y)*sin(z);
}
}
erro = 1E0;
itera = 0;
while (erro >= tol) do{
f = sor_method(f, tf, imax, jmax, kmax, dx2, beta1, beta2, w, ptsx, ptsy, ptsz);
residuo = calc_erro(f, tf, imax, jmax, kmax, dx2, dy2, dz2, ptsx, ptsy, ptsz);
erro = max reduce(residuo);
itera += 1;
if (mod(itera,NPRIN) == 0) then{
writef('%i %6.12dr\n', itera, erro);
}
}
runtime.stop();
escreve(f, imax, jmax, kmax, dx, dy, dz);
max_f = max reduce(f);
min_f = min reduce(f);
writef("%12.8er %12.8er \n", max_f, min_f);
writef(" CPU TIME CHAPEL = %10.16dr \n",runtime.elapsed());
writef("%i %12.8er \n",itera, erro);
writef("------------------------------------------------------------------------------ \n");
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc sor_method(f, tf, imax: int, jmax: int, kmax: int, dx2: real, beta1: real, beta2: real, w: real, ptsx: int, ptsy: int, ptsz: int){
coforall (ii,jj,kk) in {2..imax-1, 2..jmax-1, 2..kmax-1} by (ptsx, ptsy, ptsz) {
for i in ii..ii + ptsx-1 do{
for j in jj..jj + ptsy-1 do{
for k in kk..kk + ptsz-1 do{
f(i,j,k) = (1.0 - w)*f(i,j,k) +
w * (-dx2*tf(i,j,k) + f(i-1,j,k) + f(i+1,j,k) +
beta1*( f(i,j-1,k) + f(i,j+1,k) ) +
beta2*( f(i,j,k-1) + f(i,j,k+1) ))/ ( 2.0*(beta1 + beta2 + 1.0) ) ;
}
}
}
}
return f;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc calc_erro(f, tf, imax: int, jmax: int, kmax: int, dx2: real, dy2: real, dz2: real, ptsx: int, ptsy: int, ptsz: int){
var errol : [1..imax,1..jmax, 1..kmax] real;
errol = 0.0;
coforall (ii,jj,kk) in {2..imax-1, 2..jmax-1, 2..kmax-1} by (ptsx, ptsy, ptsz) {
for i in ii..ii + ptsx-1 do{
for j in jj..jj + ptsy-1 do{
for k in kk..kk + ptsz-1 do{
errol(i,j,k) = abs(tf(i,j,k) -
(f(i-1,j,k) - 2.0*f(i,j,k) + f(i+1,j,k))/dx2 -
(f(i,j-1,k) - 2.0*f(i,j,k) + f(i,j+1,k))/dy2 -
(f(i,j,k-1) - 2.0*f(i,j,k) + f(i,j,k+1))/dz2);
}
}
}
}
return errol;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc escreve(f, imax: int, jmax: int, kmax: int, dx: real, dy: real, dz: real){
use IO;
var erro: [1..imax,1..jmax, 1..kmax] real;
erro = 0.0;
const saida = openwriter("saida_coforall.csv");
saida.writef(" %s %s %s %s %s %s %s %s %s \n", "x", ",", "y", ",", "z", ",", "f", ",", "e");
for k in 1..kmax do{
z = (k-0.5)*dz ;
for j in 1..jmax do{
y = (j-0.5)*dy;
for i in 1..imax do{
x = (i-0.5)*dx;
erro(i,j,k) = abs(f(i,j,k) - sin(x)*cos(y)*sin(z));
saida.writef("%10.12dr %s %10.12dr %s %10.12dr %s %10.12dr %s %10.12dr \n",
x, ",", y, ",", z, ",", f(i,j,k), ",", erro(i,j,k));
}
}
}
saida.close();
}
Forall
// Finite Difference Solver for the Poisson equation (3D)
// Author: JESUS, Anna && GRION, Livia
// Last Modification: 20/10/2022
// Parallel version
use Time;
var runtime: Timer;
config const imax, jmax, kmax : int;
const Lx = 1.0, Ly = 1.0, Lz = 1.0 : real;
const dx = Lx/imax, dy = Ly/jmax, dz = Lz/kmax : real;
const w = 1.3 : real;
const dx2 = dx*dx, dy2 = dy*dy, dz2 = dz*dz: real;
const beta1 = dx2/dy2 : real;
const beta2 = dx2/dz2 : real;
const tol = 1E-6: real;
const Inner = {2..imax-1, 2..jmax-1, 2..kmax-1};
const domPoisson = {1..imax, 1..jmax, 1..kmax};
const NPRIN = 500;
var f, tf, residuo : [1..imax, 1..jmax, 1..kmax] real;
var i, j, k: int;
var itera: int;
var x, y, z, erro : real;
var max_f, min_f : real;
itera = 0;
residuo = 0.0;
runtime.start();
for k in 1..kmax do{
z = (k-0.5)*dz ;
for j in 1..jmax do{
y = (j-0.5)*dy;
for i in 1..imax do{
x = (i-0.5)*dx;
tf(i,j,k) = -3.0*sin(x)*cos(y)*sin(z);
}
}
}
// Direcao - x
for j in 1..jmax do{
y = (j-0.5)*dy;
for k in 1..kmax do{
z = (k-0.5)*dz;
x = 0.5;
f(1,j,k) = sin(x)*cos(y)*sin(z);
x = (imax-0.5)*dx;
f(imax,j,k) = sin(x)*cos(y)*sin(z);
}
}
// Direcao - y
for i in 1..imax do{
x = (i-0.5)*dx;
for k in 1..kmax do{
z = (k-0.5)*dy;
y = 0.5;
f(i,1,k) = sin(x)*cos(y)*sin(z);
y = (jmax-0.5)*dy;
f(i,jmax,k) = sin(x)*cos(y)*sin(z);
}
}
for i in 1..imax do{
x = (i-0.5)*dx;
for j in 1..jmax do{
y = (j-0.5)*dy;
z = 0.5;
f(i,j,1) = sin(x)*cos(y)*sin(z);
z = (kmax-0.5)*dz ;
f(i,j,kmax) = sin(x)*cos(y)*sin(z);
}
}
erro = 1E0;
itera = 0;
while (erro >= tol) do{
f = sor_method(f, tf, dx2, beta1, beta2, w);
residuo = calc_erro(f, tf, dx2, dy2, dz2);
erro = max reduce(residuo);
itera += 1;
if (mod(itera,NPRIN) == 0) then{
writef('%i %6.12dr\n', itera, erro);
}
}
escreve(f, imax, jmax, kmax, dx, dy, dz);
runtime.stop();
max_f = max reduce(f);
min_f = min reduce(f);
writef("%12.8er %12.8er \n", max_f, min_f);
writef(" CPU TIME CHAPEL = %10.16dr \n",runtime.elapsed());
writef("%i %12.8er \n",itera, erro);
writef("------------------------------------------------------------------------------ \n");
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc sor_method(f, tf, dx2: real, beta1: real, beta2: real, w: real){
forall (i,j,k) in Inner do{
f(i,j,k) = (1.0 - w)*f(i,j,k) +
w * (-dx2*tf(i,j,k) + f(i-1,j,k) + f(i+1,j,k) +
beta1*( f(i,j-1,k) + f(i,j+1,k) ) +
beta2*( f(i,j,k-1) + f(i,j,k+1) ))/ ( 2.0*(beta1 + beta2 + 1.0) ) ;
}
return f;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc calc_erro(f, tf, dx2: real, dy2: real, dz2: real){
var errol : [1..imax,1..jmax, 1..kmax] real;
errol = 0.0;
forall (i,j,k) in Inner do{
errol(i,j,k) = abs(tf(i,j,k) -
(f(i-1,j,k) - 2.0*f(i,j,k) + f(i+1,j,k))/dx2 -
(f(i,j-1,k) - 2.0*f(i,j,k) + f(i,j+1,k))/dy2 -
(f(i,j,k-1) - 2.0*f(i,j,k) + f(i,j,k+1))/dz2);
}
return errol;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
proc escreve(f, imax: int, jmax: int, kmax: int, dx: real, dy: real, dz: real){
use IO;
var erro: [1..imax,1..jmax, 1..kmax] real;
const saida = openwriter("saida_chapel_par.csv");
saida.writef(" %s %s %s %s %s %s %s %s %s \n", "x", ",", "y", ",", "z", ",", "f", ",", "e");
for k in 1..kmax do{
z = (k-0.5)*dz ;
for j in 1..jmax do{
y = (j-0.5)*dy;
for i in 1..imax do{
x = (i-0.5)*dx;
erro(i,j,k) = abs(f(i,j,k) - sin(x)*cos(y)*sin(z));
saida.writef("%10.12dr %s %10.12dr %s %10.12dr %s %10.12dr %s %10.12dr \n",
x, ",", y, ",", z, ",", f(i,j,k), ",", erro(i,j,k));
}
}
}
saida.close();
}