19636, "annacfs", "array operations: time", "2022-04-15T17:06:03Z"
Resume
I'm implementing finite volume code to solve navier stokes equations. The objective of this work is to test the chapel language for a fluid mechanics solution.
Array loops consume a lot of simulation time compared to fortran. Is there an efficient method in the chapel?
`
For example:
In chapel:
use LinearAlgebra;
use Time;
var runtime: Timer;
param Lx = 1.0, Ly = 1.0 : real; // Comprimento do dominio na direcao x e y
param Nx = 20, Ny = 20 : int; // Numero de celulas na direcao x e y
param dx = Lx/Nx, dy = Ly/Ny : real; // Comprimento do volume finito em x e y
param u0 = 1.0 : real; // Velocidade na tampa
param vkinem = 1E-2 : real; // Viscosidade cinematica (mi/rho)
param rho = 1.0 : real; // Massa especifica
param w = 1.940 : real; // Omega do metodo SOR
param p : normType; // defaut: norma euclidiana
param dx2, dy2, beta : real; // auxiliares
param nt = 1000: int; // Numero de passos de tempo
param NPRIN = 20: int; // Exibe a solucao
// Variaveis
var ustar, unew, un, deltau : [1..Nx+1,1..Ny] real;
var vstar, vnew, vn, deltav : [1..Nx,1..Ny+1] real;
var pnew, rhs, div_unew, erro : [1..Nx,1..Ny] real;
var CONVx, VISCx, CONVy, VISCy : [1..Nx,1..Ny] real;
var dt, dt1, dt2, t, t1, t2, tfinal: real;
var tol, divu, eps, dtn: real;
var errou, errov, errop, mu, mv: real;
var i, j, itera, it : int;
runtime.start () ;
dx2 = dx*dx;
dy2 = dy*dy;
beta = dx2/dy2;
itera = 0; // Contador SOR
it = 0; // Contador da evolucao temporal
// Condicao inicial
un = 0.0;
vn = 0.0;
unew = un;
vnew = vn;
pnew = 0.0;
VISCx = 0.0;
CONVx = 0.0;
VISCy = 0.0;
CONVy = 0.0;
// Inicializacao
errou = 1E0;
errov = 1E0;
erro = 0.0;
t = 0.0;
tfinal = 30.0;
tol = 1E-8;
dt = 1E-3;
// Condicao para estacionaridade norma(du/dt + dv/dt) < eps
eps = 1E-8;
dtn = 1.0;
runtime.stop();
Time: 1.0900E-04
In Fortran:
`
program cavity2D
implicit none
double precision, parameter :: Lx = 1.d0, Ly = 1.d0 ! Comprimento do dominio na direcao x e y
integer, parameter :: Nx = 20, Ny = 20 ! Numero de celulas na direcao x e y
double precision, parameter :: dx = Lx/float(Nx), dy = Ly/float(Ny) ! Comprimento do volume finito em x e y
double precision, parameter :: u0 = 1.d0 ! Velocidade na tampa
double precision, parameter :: vkinem = 1E-2 ! Viscosidade cinematica (mi/rho)
double precision, parameter :: rho = 1E0 ! Massa especifica
double precision, parameter :: w = 1.94d0 ! Omega do metodo SOR
double precision dx2, dy2, beta ! auxiliares
integer, parameter :: nt = 1000 ! Numero de passos de tempo
integer, parameter :: NPRIN = 20 ! Exibe a solucao
! Variaveis
double precision, dimension(Nx+1,Ny) :: ustar, unew, un, deltau
double precision, dimension(Nx,Ny+1) :: vstar, vnew, vn, deltav
double precision, dimension(Nx,Ny) :: pnew, rhs, div_unew, erro
double precision, dimension(Nx,Ny) :: CONVx, CONVy, VISCx, VISCy
double precision dt, dt1, dt2, t, t1, t2, tfinal
double precision tol, divu, eps, dtn
double precision errou, errov, errop, mu, mv
integer i, j, itera, it
character*100 arq
call cpu_time(t1)
dx2 = dx*dx
dy2 = dy*dy
beta = dx2/dy2
itera = 0 ! Contador SOR
it = 0 ! Contador evolucao temporal
! Condicao inicial
un = 0.d0
vn = 0.d0
unew = un
vnew = vn
pnew = 0.d0
VISCx = 0.d0
CONVx = 0.d0
VISCy = 0.d0
CONVy = 0.d0
!Inicializacao
errou = 1E0
errov = 1E0
erro = 0.d0
t = 0.d0
tfinal = 30.d0
tol = 1E-8
dt = 1E-3
! Condicao para estacionaridade norma(du/dt + dv/dt) < eps
eps = 1E-8
dtn = 1.d0
call cpu_time(t2) `
Time: 1.9000E-05
to be continued...
serial{
while(dtn >= eps) do{ //Estacionario
// Passo 1: Preditor
// u^* = u^n + dt*( (mu/rho)*Lu^n - C(u^n) + (1/rho)*f^n)
for i in 2..Nx-1 do{
for j in 2..Ny-1 do{
VISCx[i+1,j] = (un[i+1,j+1] - 2.0*un[i+1,j] + un[i+1,j-1])/dy2
+ (un[i+2,j] - 2.0*un[i+1,j] + un[i,j])/dx2;
VISCy[i,j+1] = (vn[i,j+2] - 2.0*vn[i,j+1] + vn[i,j])/dy2 +
+ (vn[i+1,j+1] - 2.0*vn[i,j+1] + vn[i-1,j+1])/dx2;
CONVx[i+1,j] = (un[i+1,j+1] + un[i+1,j])*(vn[i+1,j+1] + vn[i,j+1])/dy/4.0
- (un[i+1,j-1] + un[i+1,j])*(vn[i+1,j] + vn[i,j])/dy/4.0
+ (un[i+2,j] + un[i+1,j])*(un[i+2,j] + un[i+1,j])/dx/4.0
- (un[i,j] + un[i+1,j])*( un[i,j] + un[i+1,j])/dx/4.0;
CONVy[i,j+1] = (vn[i,j+2] + vn[i,j+1])*(vn[i,j+2] + vn[i,j+1])/dy/4.0
- (vn[i,j] + vn[i,j+1])*(vn[i,j] + vn[i,j+1])/dy/4.0
+ (un[i+1,j+1] + un[i+1,j])*(vn[i+1,j+1] + vn[i,j+1])/dx/4.0
- (un[i,j] + un[i,j+1])*(vn[i,j+1] + vn[i-1,j+1])/dx/4.0;
ustar[i+1,j] = un[i+1,j] + dt*( -CONVx[i+1,j] + vkinem*VISCx[i+1,j] );
vstar[i,j+1] = vn[i,j+1] + dt*( -CONVy[i,j+1] + vkinem*VISCy[i,j+1] );
}
}
}
if ( mod ( it , NPRIN ) == 0) then {
writef ( " CPU TIME CHAPEL = %5.4 Er \ n " , runtime.elapsed () ) ;
writef ( " Viscoso e convectivo \ n " ) ;
}
runtime.stop();
`
Time:
CPU TIME CHAPEL = 1.1000 E -05
Viscoso e convectivo
CPU TIME CHAPEL = 1.0220 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 2.0370 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 3.0460 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 4.0910 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 5.0920 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 6.1020 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 7.1030 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 8.1160 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 9.1330 E -03
Viscoso e convectivo
CPU TIME CHAPEL = 1.0133 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.1143 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.2148 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.3149 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.4147 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.5160 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.6157 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.7152 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.8165 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 1.9168 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.0166 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.1163 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.2166 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.3161 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.4160 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.5158 E -02
Viscoso e convectivo
CPU TIME CHAPEL = 2.6156 E -02
Viscoso e convectivo
In Fortran:
`
call cpu_time(t1)
do while (dtn.ge.eps) ! Estacionario
! Passo 1: Preditor
! u^* = u^n + dt*( (mu/rho)*Lu^n - C(u^n) + (1/rho)*f^n)
do j = 2,Ny-1
do i = 2,Nx-1
VISCx(i+1,j) = (un(i+1,j+1) - 2.d0*un(i+1,j) + un(i+1,j-1))/dy2 &
+ (un(i+2,j) - 2.d0*un(i+1,j) + un(i,j))/dx2
VISCy(i,j+1) = (vn(i,j+2) - 2.d0*vn(i,j+1) + vn(i,j))/dy2 + &
+ (vn(i+1,j+1) - 2.d0*vn(i,j+1) + vn(i-1,j+1))/dx2
CONVx(i+1,j) = (un(i+1,j+1) + un(i+1,j))*(vn(i+1,j+1) + vn(i,j+1))/dy/4.d0 &
- (un(i+1,j-1) + un(i+1,j))*(vn(i+1,j) + vn(i,j))/dy/4.d0 &
+ (un(i+2,j) + un(i+1,j))*(un(i+2,j) + un(i+1,j))/dx/4.d0 &
- (un(i,j) + un(i+1,j))*( un(i,j) + un(i+1,j))/dx/4.d0
CONVy(i,j+1) = (vn(i,j+2) + vn(i,j+1))*(vn(i,j+2) + vn(i,j+1))/dy/4.d0 &
- (vn(i,j) + vn(i,j+1))*(vn(i,j) + vn(i,j+1))/dy/4.d0 &
+ (un(i+1,j+1) + un(i+1,j))*(vn(i+1,j+1) + vn(i,j+1))/dx/4.d0 &
- (un(i,j) + un(i,j+1))*(vn(i,j+1) + vn(i-1,j+1))/dx/4.d0
ustar(i+1,j) = un(i+1,j) + dt*( -CONVx(i+1,j) + vkinem*VISCx(i+1,j) )
vstar(i,j+1) = vn(i,j+1) + dt*( -CONVy(i,j+1) + vkinem*VISCy(i,j+1) )
end do
end do
call cpu_time ( t2 )
if ( mod ( it , NPRIN ) . eq .0) then
print ’ (" CPU TIME FORTRAN = " , ( ES11.4 E2 ) ) ’ , t2 - t1
print ’ (" Viscoso e convectivo ") ’
end if
`
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 4.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
CPU TIME FORTRAN = 3.0000 E -06
Viscoso e convectivo
Compile command:
source chplenv.sh
chpl --fast cavity2D.chpl
gfortran -o post cavity2D.f90
Execution command:
./cavity2D
./post
Associated Future Test(s):
Configuration Information
chplenv.sh
`#!/bin/bash
# --------------------------------------------------------------------
# Single-local Chapel
# --------------------------------------------------------------------
export CHPL_HOME=~/chapel-1.24.0
CHPL_BIN_SUBDIR=`"$CHPL_HOME"/util/chplenv/chpl_bin_subdir.py`
export PATH="$PATH":"$CHPL_HOME/bin/$CHPL_BIN_SUBDIR"
export MANPATH="$MANPATH":"$CHPL_HOME"/man
# --------------------------------------------------------------------
# path for modules as libraries
# --------------------------------------------------------------------
export CHPL_MODULE_PATH=~/modules
# --------------------------------------------------------------------
# use all cores available
# --------------------------------------------------------------------
export CHPL_RT_NUM_THREADS_PER_LOCALE=MAX_LOGICAL`
Files can be download from:
https://github.com/annacfs/RansChapel/tree/annacfs-cavity2d
cavity2D_chpl.txt
cavity2D_f90.txt
chplenv_sh.txt
@thomasrolinger
@buddha314
@ty1027