From e0fd6e85225588f437194dd5a975fa794beb1618 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Mon, 19 May 2025 21:41:00 -0700 Subject: [PATCH 01/30] periodic ibs,forcing,fft,drag, --- runs/3d_1sphere_periodic/case.py | 154 +++ runs/3d_drag_test/case.py | 145 +++ runs/3d_periodic_ibs_test/case.py | 155 +++ src/common/m_boundary_common.fpp | 149 ++- src/common/m_checker_common.fpp | 5 + src/common/m_constants.fpp | 2 +- src/common/m_mpi_common.fpp | 263 ++++- src/post_process/m_global_parameters.fpp | 8 + src/post_process/m_mpi_proxy.fpp | 16 +- src/post_process/m_start_up.f90 | 3 +- src/pre_process/m_compute_levelset.fpp | 103 ++ src/pre_process/m_data_output.fpp | 84 +- src/pre_process/m_global_parameters.fpp | 8 + src/pre_process/m_initial_condition.fpp | 6 +- src/pre_process/m_mpi_proxy.fpp | 16 +- src/pre_process/m_patches.fpp | 70 +- src/pre_process/m_start_up.fpp | 3 +- src/simulation/m_additional_forcing.fpp | 158 +++ src/simulation/m_checker.fpp | 8 + src/simulation/m_compute_particle_forces.fpp | 72 ++ src/simulation/m_global_parameters.fpp | 24 + src/simulation/m_ibm.fpp | 144 ++- src/simulation/m_mpi_proxy.fpp | 20 +- src/simulation/m_rhs.fpp | 123 +- src/simulation/m_start_up.fpp | 188 ++-- src/simulation/m_time_steppers.fpp | 61 +- src/simulation/m_volume_filtering.fpp | 1049 ++++++++++++++++++ src/simulation/p_main.fpp | 6 + toolchain/mfc/run/case_dicts.py | 12 +- voronoi/gen_voronoi_2D.py | 99 ++ voronoi/gen_voronoi_3D.py | 98 ++ 31 files changed, 3098 insertions(+), 154 deletions(-) create mode 100644 runs/3d_1sphere_periodic/case.py create mode 100644 runs/3d_drag_test/case.py create mode 100644 runs/3d_periodic_ibs_test/case.py create mode 100644 src/simulation/m_additional_forcing.fpp create mode 100644 src/simulation/m_compute_particle_forces.fpp create mode 100644 src/simulation/m_volume_filtering.fpp create mode 100644 voronoi/gen_voronoi_2D.py create mode 100644 voronoi/gen_voronoi_3D.py diff --git a/runs/3d_1sphere_periodic/case.py b/runs/3d_1sphere_periodic/case.py new file mode 100644 index 0000000000..857841ad0c --- /dev/null +++ b/runs/3d_1sphere_periodic/case.py @@ -0,0 +1,154 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 10 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + #"q_filtered_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + #"compute_CD_vi": "F", + #"compute_CD_si": "F", + #"fourier_transform_filtering": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + "mu_visc": mu, + + "store_levelset": "F", + "slab_domain_decomposition": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/3d_drag_test/case.py b/runs/3d_drag_test/case.py new file mode 100644 index 0000000000..2eb50ebc62 --- /dev/null +++ b/runs/3d_drag_test/case.py @@ -0,0 +1,145 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 1 + +Nx = 99 +Ny = 99 +Nz = 99 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # ghost cell extrapolation + "bc_x%beg": -3, + "bc_x%end": -3, + "bc_y%beg": -3, + "bc_y%end": -3, + "bc_z%beg": -3, + "bc_z%end": -3, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "compute_CD": "T", + "mu_visc": mu, + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/3d_periodic_ibs_test/case.py b/runs/3d_periodic_ibs_test/case.py new file mode 100644 index 0000000000..9a63a3f4a3 --- /dev/null +++ b/runs/3d_periodic_ibs_test/case.py @@ -0,0 +1,155 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 5 +t_save = 1 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.5, + f"patch_ib({1})%y_centroid": 0.5, + f"patch_ib({1})%z_centroid": 0.5, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + + f"patch_ib({2})%geometry": 8, + f"patch_ib({2})%x_centroid": 0.0, + f"patch_ib({2})%y_centroid": 0.0, + f"patch_ib({2})%z_centroid": 0.0, + f"patch_ib({2})%radius": D / 2, + f"patch_ib({2})%slip": "F", + + f"patch_ib({3})%geometry": 8, + f"patch_ib({3})%x_centroid": 0.0, + f"patch_ib({3})%y_centroid": 0.5, + f"patch_ib({3})%z_centroid": 0.25, + f"patch_ib({3})%radius": D / 2, + f"patch_ib({3})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 3, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "periodic_ibs": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 55ac2fec82..eb07e7d9eb 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -32,7 +32,8 @@ module m_boundary_common s_populate_variables_buffers, & s_create_mpi_types, & s_populate_capillary_buffers, & - s_finalize_boundary_common_module + s_finalize_boundary_common_module, & + s_populate_scalarfield_buffers public :: bc_buffers, bcxb, bcxe, bcyb, bcye, bczb, bcze @@ -238,6 +239,152 @@ contains end subroutine s_populate_variables_buffers + !> The purpose of this procedure is to populate the buffers of any scalar field. Used in unclosed term calculation + subroutine s_populate_scalarfield_buffers(q_temp) + + type(scalar_field), intent(inout) :: q_temp + + ! currently only considering periodic boundary conditions + + ! X-dir + select case (bc_x%beg) + case (-1) ! Periodic BC at beginning + call s_periodic_scalarfield(q_temp, 1, -1) + case default ! Processor BC at beginning + call s_mpi_sendrecv_variables_buffers_scalarfield( & + q_temp, 1, -1) + end select + + select case (bc_x%end) + case (-1) ! Periodic BC at end + call s_periodic_scalarfield(q_temp, 1, 1) + case default ! Processor BC at end + call s_mpi_sendrecv_variables_buffers_scalarfield( & + q_temp, 1, 1) + end select + + ! Y-dir + select case (bc_y%beg) + case (-1) ! Periodic BC at beginning + call s_periodic_scalarfield(q_temp, 2, -1) + case default ! Processor BC at beginning + call s_mpi_sendrecv_variables_buffers_scalarfield( & + q_temp, 2, -1) + end select + + select case (bc_y%end) + case (-1) ! Periodic BC at end + call s_periodic_scalarfield(q_temp, 2, 1) + case default ! Processor BC at end + call s_mpi_sendrecv_variables_buffers_scalarfield( & + q_temp, 2, 1) + end select + + ! Z-dir + select case (bc_z%beg) + case (-1) ! Periodic BC at beginning + call s_periodic_scalarfield(q_temp, 3, -1) + case default ! Processor BC at beginning + call s_mpi_sendrecv_variables_buffers_scalarfield( & + q_temp, 3, -1) + end select + + select case (bc_z%end) + case (-1) ! Periodic BC at end + call s_periodic_scalarfield(q_temp, 3, 1) + case default ! Processor BC at end + call s_mpi_sendrecv_variables_buffers_scalarfield( & + q_temp, 3, 1) + end select + + end subroutine s_populate_scalarfield_buffers + + subroutine s_periodic_scalarfield(q_temp, bc_dir, bc_loc) + + type(scalar_field), intent(inout) :: q_temp + integer, intent(in) :: bc_dir, bc_loc + + integer :: j, k, l, q, i + + !< x-direction + if (bc_dir == 1) then + if (bc_loc == -1) then !< bc_x%beg + !$acc parallel loop collapse(3) gang vector default(present) + do l = 0, p + do k = 0, n + do j = 1, buff_size + q_temp%sf(-j, k, l) = & + q_temp%sf(m - (j - 1), k, l) + end do + end do + end do + + else !< bc_x%end + !$acc parallel loop collapse(3) gang vector default(present) + do l = 0, p + do k = 0, n + do j = 1, buff_size + q_temp%sf(m + j, k, l) = & + q_temp%sf(j - 1, k, l) + end do + end do + end do + end if + + !< y-direction + elseif (bc_dir == 2) then + if (bc_loc == -1) then !< bc_y%beg + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 1, buff_size + do l = -buff_size, m + buff_size + q_temp%sf(l, -j, k) = & + q_temp%sf(l, n - (j - 1), k) + end do + end do + end do + + else !< bc_y%end + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 1, buff_size + do l = -buff_size, m + buff_size + q_temp%sf(l, n + j, k) = & + q_temp%sf(l, j - 1, k) + end do + end do + end do + end if + + !< z-direction + elseif (bc_dir == 3) then + if (bc_loc == -1) then !< bc_z%beg + !$acc parallel loop collapse(3) gang vector default(present) + do j = 1, buff_size + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + q_temp%sf(k, l, -j) = & + q_temp%sf(k, l, p - (j - 1)) + end do + end do + end do + + else !< bc_z%end + !$acc parallel loop collapse(3) gang vector default(present) + do j = 1, buff_size + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + q_temp%sf(k, l, p + j) = & + q_temp%sf(k, l, j - 1) + end do + end do + end do + end if + + end if + + end subroutine s_periodic_scalarfield + subroutine s_ghost_cell_extrapolation(q_prim_vf, pb, mv, bc_dir, bc_loc, k, l) #ifdef _CRAYFTN !DIR$ INLINEALWAYS s_ghost_cell_extrapolation diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp index 7abadf29be..2f6a505001 100644 --- a/src/common/m_checker_common.fpp +++ b/src/common/m_checker_common.fpp @@ -174,6 +174,11 @@ contains @:PROHIBIT(ib .and. n <= 0, "Immersed Boundaries do not work in 1D") @:PROHIBIT(ib .and. (num_ibs <= 0 .or. num_ibs > num_patches_max), "num_ibs must be between 1 and num_patches_max") @:PROHIBIT((.not. ib) .and. num_ibs > 0, "num_ibs is set, but ib is not enabled") + #:for X in ['x', 'y', 'z'] + #:for BOUND in ['beg', 'end'] + @:PROHIBIT(periodic_ibs .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, "periodic ibs requires periodic BCs, bc_${X}$%${BOUND}$ must = -1") + #:endfor + #:endfor end subroutine s_check_inputs_ibm #endif diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 1d09d98fcf..ac2a614860 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -21,7 +21,7 @@ module m_constants integer, parameter :: fourier_rings = 5 !< Fourier filter ring limit integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation - integer, parameter :: num_patches_max = 10 + integer, parameter :: num_patches_max = 1000 integer, parameter :: num_bc_patches_max = 10 integer, parameter :: pathlen_max = 400 integer, parameter :: nnode = 4 !< Number of QBMM nodes diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 4645e59c13..25cd6fda5d 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -43,6 +43,14 @@ module m_mpi_common integer :: halo_size, nVars !$acc declare create(halo_size, nVars) + real(wp), private, allocatable, dimension(:), target :: buff_send_scalarfield + !! This variable is utilized to pack and send the buffer of any scalar field to neighboring processors + + real(wp), private, allocatable, dimension(:), target :: buff_recv_scalarfield + !! This variable is utilized to receive and unpack the buffer of any scalar field from neighboring processors + + !$acc declare create(buff_send_scalarfield, buff_recv_scalarfield) + contains !> The computation of parameters, the allocation of memory, @@ -91,6 +99,18 @@ contains allocate (buff_send(0:halo_size)) allocate (buff_recv(0:ubound(buff_send, 1))) + +#ifdef MFC_SIMULATION + if (fourier_transform_filtering) then + @:ALLOCATE(buff_send_scalarfield(0:-1 + buff_size*1* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (min(m, n, p) + 2*buff_size + 1))) + + @:ALLOCATE(buff_recv_scalarfield(0:ubound(buff_send_scalarfield, 1))) + end if +#endif #endif end subroutine s_initialize_mpi_common_module @@ -232,14 +252,18 @@ contains #ifdef MFC_PRE_PROCESS MPI_IO_IB_DATA%var%sf => ib_markers%sf - MPI_IO_levelset_DATA%var%sf => levelset%sf - MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf + if (store_levelset) then + MPI_IO_levelset_DATA%var%sf => levelset%sf + MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf + end if #else MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p) #ifndef MFC_POST_PROCESS - MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs) - MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3) + if (store_levelset) then + MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs) + MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3) + end if #endif #endif @@ -1071,6 +1095,233 @@ contains end subroutine s_mpi_sendrecv_variables_buffers + !> The goal of this procedure is to populate the buffers of any scalar field quantity + subroutine s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, & + mpi_dir, & + pbc_loc) + + type(scalar_field), intent(inout) :: q_temp + integer, intent(in) :: mpi_dir, pbc_loc + + integer :: i, j, k, l, r, q !< Generic loop iterators + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0 + + integer :: pack_offset, unpack_offset + + real(wp), pointer :: p_send, p_recv +#ifdef MFC_MPI + + call nvtxStartRange("RHS-COMM-PACKBUF") +!$acc update device(v_size) + + buffer_counts = (/ & + buff_size*1*(n + 1)*(p + 1), & + buff_size*1*(m + 2*buff_size + 1)*(p + 1), & + buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + do i = 1, 1 + r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + buff_send_scalarfield(r) = q_temp%sf(j + pack_offset, k, l) + end do + end do + end do + end do + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, 1 + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send_scalarfield(r) = q_temp%sf(j, k + pack_offset, l) + end do + end do + end do + end do + #:else + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, 1 + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send_scalarfield(r) = q_temp%sf(j, k, l + pack_offset) + end do + end do + end do + end do + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + p_send => buff_send_scalarfield(0) + p_recv => buff_recv_scalarfield(0) + + ! Send/Recv +#ifdef MFC_SIMULATION + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + !$acc data attach(p_send, p_recv) + !$acc host_data use_device(p_send, p_recv) + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") + !$acc update host(buff_send_scalarfield) + call nvtxEndRange + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") + #:endif + + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:if rdma_mpi + !$acc end host_data + !$acc end data + !$acc wait + #:else + call nvtxStartRange("RHS-COMM-HOST2DEV") + !$acc update device(buff_recv_scalarfield) + call nvtxEndRange + #:endif + end if + #:endfor +#else + call MPI_SENDRECV( & + p_send, buffer_count, mpi_p, dst_proc, send_tag, & + p_recv, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif + + ! Unpack Received Buffer + call nvtxStartRange("RHS-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + do i = 1, 1 + r = (i - 1) + v_size* & + (j + buff_size*((k + 1) + (n + 1)*l)) + q_temp%sf(j + unpack_offset, k, l) = buff_recv_scalarfield(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_temp%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + #:elif mpi_dir == 2 + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, 1 + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_temp%sf(j, k + unpack_offset, l) = buff_recv_scalarfield(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_temp%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + #:else + ! Unpacking buffer from bc_z%beg + !$acc parallel loop collapse(4) gang vector default(present) private(r) + do i = 1, 1 + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = (i - 1) + v_size* & + ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_temp%sf(j, k, l + unpack_offset) = buff_recv_scalarfield(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_temp%sf(j, k, l))) then + print *, "Error", j, k, l, i + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + end do + #:endif + end if + #:endfor + call nvtxEndRange + +#endif + + end subroutine s_mpi_sendrecv_variables_buffers_scalarfield + subroutine s_mpi_sendrecv_capilary_variables_buffers(c_divs_vf, mpi_dir, pbc_loc) type(scalar_field), dimension(num_dims + 1), intent(inout) :: c_divs_vf @@ -1299,6 +1550,10 @@ contains #ifdef MFC_MPI deallocate (buff_send, buff_recv) +#ifdef MFC_SIMULATION + @:DEALLOCATE(buff_send_scalarfield) + @:DEALLOCATE(buff_recv_scalarfield) +#endif #endif end subroutine s_finalize_mpi_common_module diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 00c5f0ec27..9db5321c55 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -319,6 +319,10 @@ module m_global_parameters real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) + logical :: periodic_ibs + logical :: store_levelset + logical :: slab_domain_decomposition + contains !> Assigns default values to user inputs prior to reading @@ -460,6 +464,10 @@ contains ! MHD Bx0 = dflt_real + periodic_ibs = .false. + store_levelset = .true. + slab_domain_decomposition = .false. + end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 8fc70bfe77..9e368d7fa4 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -171,7 +171,8 @@ contains & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & & 'rkck_adap_dt', 'output_partial_domain', 'relativity', & - & 'cont_damage' ] + & 'cont_damage', 'periodic_ibs', 'store_levelset', & + & 'slab_domain_decomposition' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -297,6 +298,19 @@ contains end do + else if (slab_domain_decomposition) then + if (proc_rank == 0) then + print *, 'slab domain decomposition...' + end if + + ! continuous x and y direction, block decomposition in z + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + if (mod((p+1), num_procs_z) == 0) then + ierr = 0 + end if else ! Initial values of the processor factorization optimization diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 8fece2820b..f04efd75e6 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -84,7 +84,8 @@ subroutine s_read_input_file relax_model, cf_wrt, sigma, adv_n, ib, num_ibs, & cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, & cfl_target, surface_tension, bubbles_lagrange, rkck_adap_dt, & - sim_data, hyperelasticity, Bx0, relativity, cont_damage + sim_data, hyperelasticity, Bx0, relativity, cont_damage, & + periodic_ibs, store_levelset, slab_domain_decomposition ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' diff --git a/src/pre_process/m_compute_levelset.fpp b/src/pre_process/m_compute_levelset.fpp index d339157be6..1a7547bb6b 100644 --- a/src/pre_process/m_compute_levelset.fpp +++ b/src/pre_process/m_compute_levelset.fpp @@ -474,6 +474,11 @@ contains real(wp) :: x_centroid, y_centroid, z_centroid real(wp), dimension(3) :: dist_vec + real(wp) :: x_domain_beg, x_domain_end, y_domain_beg, y_domain_end, z_domain_beg, z_domain_end + real(wp) :: x_pcen, y_pcen, z_pcen !< periodically projected centroids of sphere + real(wp), dimension(7, 3) :: dist_vec_per + real(wp), dimension(7) :: dist_per + integer :: i, j, k !< Loop index variables radius = patch_ib(ib_patch_id)%radius @@ -481,6 +486,37 @@ contains y_centroid = patch_ib(ib_patch_id)%y_centroid z_centroid = patch_ib(ib_patch_id)%z_centroid + call s_mpi_allreduce_min(x_domain%beg, x_domain_beg) + call s_mpi_allreduce_max(x_domain%end, x_domain_end) + call s_mpi_allreduce_min(y_domain%beg, y_domain_beg) + call s_mpi_allreduce_max(y_domain%end, y_domain_end) + call s_mpi_allreduce_min(z_domain%beg, z_domain_beg) + call s_mpi_allreduce_max(z_domain%end, z_domain_end) + + if (periodic_ibs) then + if ((x_centroid - x_domain_beg) <= radius) then + x_pcen = x_domain_end + (x_centroid - x_domain_beg) + else if ((x_domain_end - x_centroid) <= radius) then + x_pcen = x_domain_beg - (x_domain_end - x_centroid) + else + x_pcen = x_centroid + end if + if ((y_centroid - y_domain_beg) <= radius) then + y_pcen = y_domain_end + (y_centroid - y_domain_beg) + else if ((y_domain_end - y_centroid) <= radius) then + y_pcen = y_domain_beg - (y_domain_end - y_centroid) + else + y_pcen = y_centroid + end if + if ((z_centroid - z_domain_beg) <= radius) then + z_pcen = z_domain_end + (z_centroid - z_domain_beg) + else if ((z_domain_end - z_centroid) <= radius) then + z_pcen = z_domain_beg - (z_domain_end - z_centroid) + else + z_pcen = z_centroid + end if + end if + do i = 0, m do j = 0, n do k = 0, p @@ -488,6 +524,73 @@ contains dist_vec(2) = y_cc(j) - y_centroid dist_vec(3) = z_cc(k) - z_centroid dist = sqrt(sum(dist_vec**2)) + + ! all permutations of periodically projected ib + if (periodic_ibs) then + dist_vec_per(1, 1) = x_cc(i) - x_pcen + dist_vec_per(1, 2) = y_cc(j) - y_pcen + dist_vec_per(1, 3) = z_cc(k) - z_pcen + dist_per(1) = sqrt(sum(dist_vec_per(1, :)**2)) + if (dist_per(1) < dist) then + dist = dist_per(1) + dist_vec = dist_vec_per(1, :) + end if + + dist_vec_per(2, 1) = x_cc(i) - x_pcen + dist_vec_per(2, 2) = y_cc(j) - y_centroid + dist_vec_per(2, 3) = z_cc(k) - z_pcen + dist_per(2) = sqrt(sum(dist_vec_per(2, :)**2)) + if (dist_per(2) < dist) then + dist = dist_per(2) + dist_vec = dist_vec_per(2, :) + end if + + dist_vec_per(3, 1) = x_cc(i) - x_pcen + dist_vec_per(3, 2) = y_cc(j) - y_pcen + dist_vec_per(3, 3) = z_cc(k) - z_centroid + dist_per(3) = sqrt(sum(dist_vec_per(3, :)**2)) + if (dist_per(3) < dist) then + dist = dist_per(3) + dist_vec = dist_vec_per(3, :) + end if + + dist_vec_per(4, 1) = x_cc(i) - x_pcen + dist_vec_per(4, 2) = y_cc(j) - y_centroid + dist_vec_per(4, 3) = z_cc(k) - z_centroid + dist_per(4) = sqrt(sum(dist_vec_per(4, :)**2)) + if (dist_per(4) < dist) then + dist = dist_per(4) + dist_vec = dist_vec_per(4, :) + end if + + dist_vec_per(5, 1) = x_cc(i) - x_centroid + dist_vec_per(5, 2) = y_cc(j) - y_pcen + dist_vec_per(5, 3) = z_cc(k) - z_pcen + dist_per(5) = sqrt(sum(dist_vec_per(5, :)**2)) + if (dist_per(5) < dist) then + dist = dist_per(5) + dist_vec = dist_vec_per(5, :) + end if + + dist_vec_per(6, 1) = x_cc(i) - x_centroid + dist_vec_per(6, 2) = y_cc(j) - y_pcen + dist_vec_per(6, 3) = z_cc(k) - z_centroid + dist_per(6) = sqrt(sum(dist_vec_per(6, :)**2)) + if (dist_per(6) < dist) then + dist = dist_per(6) + dist_vec = dist_vec_per(6, :) + end if + + dist_vec_per(7, 1) = x_cc(i) - x_centroid + dist_vec_per(7, 2) = y_cc(j) - y_centroid + dist_vec_per(7, 3) = z_cc(k) - z_pcen + dist_per(7) = sqrt(sum(dist_vec_per(7, :)**2)) + if (dist_per(7) < dist) then + dist = dist_per(7) + dist_vec = dist_vec_per(7, :) + end if + end if + levelset%sf(i, j, k, ib_patch_id) = dist - radius if (dist == 0) then levelset_norm%sf(i, j, k, ib_patch_id, :) = (/1, 0, 0/) diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp index 7ae637f034..0030749793 100644 --- a/src/pre_process/m_data_output.fpp +++ b/src/pre_process/m_data_output.fpp @@ -217,17 +217,19 @@ contains end if ! Outtputting Levelset Info - file_loc = trim(t_step_dir)//'/levelset.dat' + if (store_levelset) then + file_loc = trim(t_step_dir)//'/levelset.dat' - open (1, FILE=trim(file_loc), FORM='unformatted', STATUS=status) - write (1) levelset%sf - close (1) + open (1, FILE=trim(file_loc), FORM='unformatted', STATUS=status) + write (1) levelset%sf + close (1) - file_loc = trim(t_step_dir)//'/levelset_norm.dat' + file_loc = trim(t_step_dir)//'/levelset_norm.dat' - open (1, FILE=trim(file_loc), FORM='unformatted', STATUS=status) - write (1) levelset_norm%sf - close (1) + open (1, FILE=trim(file_loc), FORM='unformatted', STATUS=status) + write (1) levelset_norm%sf + close (1) + end if ! Outputting Conservative Variables do i = 1, sys_size @@ -774,45 +776,47 @@ contains call MPI_FILE_CLOSE(ifile, ierr) - ! Levelset - write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) + if (store_levelset) then + ! Levelset + write (file_loc, '(A)') 'levelset.dat' + file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist .and. proc_rank == 0) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - ! Initial displacement to skip at beginning of file - disp = 0 + ! Initial displacement to skip at beginning of file + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelset_DATA%var%sf, data_size*num_ibs, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelset_DATA%var%sf, data_size*num_ibs, & + mpi_p, status, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_CLOSE(ifile, ierr) - ! Levelset Norm - write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) + ! Levelset Norm + write (file_loc, '(A)') 'levelset_norm.dat' + file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist .and. proc_rank == 0) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - ! Initial displacement to skip at beginning of file - disp = 0 + ! Initial displacement to skip at beginning of file + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size*num_ibs*3, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size*num_ibs*3, & + mpi_p, status, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_CLOSE(ifile, ierr) + end if end if if (ib) then diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 8305f47996..fa8966922c 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -283,6 +283,10 @@ module m_global_parameters !! conditions data to march the solution in the physical computational domain !! to the next time-step. + logical :: periodic_ibs + logical :: store_levelset + logical :: slab_domain_decomposition + contains !> Assigns default values to user inputs prior to reading @@ -550,6 +554,10 @@ contains Bx0 = dflt_real + periodic_ibs = .false. + store_levelset = .true. + slab_domain_decomposition = .false. + end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or diff --git a/src/pre_process/m_initial_condition.fpp b/src/pre_process/m_initial_condition.fpp index 0efc1c225d..78f2e73cb5 100644 --- a/src/pre_process/m_initial_condition.fpp +++ b/src/pre_process/m_initial_condition.fpp @@ -92,8 +92,10 @@ contains allocate (ib_markers%sf(0:m, 0:n, 0:p)) - allocate (levelset%sf(0:m, 0:n, 0:p, 1:num_ibs)) - allocate (levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3)) + if (store_levelset) then + allocate (levelset%sf(0:m, 0:n, 0:p, 1:num_ibs)) + allocate (levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3)) + end if if (qbmm .and. .not. polytropic) then !Allocate bubble pressure pb and vapor mass mv for non-polytropic qbmm at all quad nodes and R0 bins diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index a0bea3caee..691ba56add 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -70,7 +70,8 @@ contains & 'qbmm', 'file_per_process', 'adv_n', 'ib' , 'cfl_adap_dt', & & 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'hyperelasticity', 'pre_stress', 'elliptic_smoothing', 'viscous',& - & 'bubbles_lagrange', 'bc_io', 'mhd', 'relativity', 'cont_damage' ] + & 'bubbles_lagrange', 'bc_io', 'mhd', 'relativity', 'cont_damage', & + & 'periodic_ibs', 'store_levelset', 'slab_domain_decomposition' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor call MPI_BCAST(fluid_rho(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) @@ -251,6 +252,19 @@ contains end do + else if (slab_domain_decomposition) then + if (proc_rank == 0) then + print *, 'slab domain decomposition...' + end if + + ! continuous x and y direction, block decomposition in z + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + if (mod((p+1), num_procs_z) == 0) then + ierr = 0 + end if else ! Initial values of the processor factorization optimization diff --git a/src/pre_process/m_patches.fpp b/src/pre_process/m_patches.fpp index c01f91654b..40cb8a86fe 100644 --- a/src/pre_process/m_patches.fpp +++ b/src/pre_process/m_patches.fpp @@ -125,7 +125,9 @@ contains if (patch_ib(i)%geometry == 8) then call s_sphere(i, ib_markers_sf, q_prim_vf, ib) - call s_sphere_levelset(levelset, levelset_norm, i) + if (store_levelset) then + call s_sphere_levelset(levelset, levelset_norm, i) + end if elseif (patch_ib(i)%geometry == 9) then call s_cuboid(i, ib_markers_sf, q_prim_vf, ib) call s_cuboid_levelset(levelset, levelset_norm, i) @@ -1724,6 +1726,9 @@ contains !! Variables to initialize the pressure field that corresponds to the !! bubble-collapse test case found in Tiwari et al. (2013) + real(wp) :: x_domain_beg, x_domain_end, y_domain_beg, y_domain_end, z_domain_beg, z_domain_end + real(wp) :: x_pcen, y_pcen, z_pcen + ! Transferring spherical patch's radius, centroid, smoothing patch ! identity and smoothing coefficient information if (present(ib)) then @@ -1749,6 +1754,39 @@ contains ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. + + call s_mpi_allreduce_min(x_domain%beg, x_domain_beg) + call s_mpi_allreduce_max(x_domain%end, x_domain_end) + call s_mpi_allreduce_min(y_domain%beg, y_domain_beg) + call s_mpi_allreduce_max(y_domain%end, y_domain_end) + call s_mpi_allreduce_min(z_domain%beg, z_domain_beg) + call s_mpi_allreduce_max(z_domain%end, z_domain_end) + + ! periodically projected sphere centroid + if (periodic_ibs .and. present(ib)) then + if ((x_centroid - x_domain_beg) <= radius) then + x_pcen = x_domain_end + (x_centroid - x_domain_beg) + else if ((x_domain_end - x_centroid) <= radius) then + x_pcen = x_domain_beg - (x_domain_end - x_centroid) + else + x_pcen = x_centroid + end if + if ((y_centroid - y_domain_beg) <= radius) then + y_pcen = y_domain_end + (y_centroid - y_domain_beg) + else if ((y_domain_end - y_centroid) <= radius) then + y_pcen = y_domain_beg - (y_domain_end - y_centroid) + else + y_pcen = y_centroid + end if + if ((z_centroid - z_domain_beg) <= radius) then + z_pcen = z_domain_end + (z_centroid - z_domain_beg) + else if ((z_domain_end - z_centroid) <= radius) then + z_pcen = z_domain_beg - (z_domain_end - z_centroid) + else + z_pcen = z_centroid + end if + end if + do k = 0, p do j = 0, n do i = 0, m @@ -1788,6 +1826,36 @@ contains @:analytical() end if end if + + if (periodic_ibs .and. present(ib)) then + ! check every permutation of the projected cell location + if (((x_cc(i) - x_pcen)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_pcen)**2 <= radius**2) & + .or. ((x_cc(i) - x_pcen)**2 & + + (cart_y - y_centroid)**2 & + + (cart_z - z_centroid)**2 <= radius**2) & + .or. ((x_cc(i) - x_pcen)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_centroid)**2 <= radius**2) & + .or. ((x_cc(i) - x_pcen)**2 & + + (cart_y - y_centroid)**2 & + + (cart_z - z_pcen)**2 <= radius**2) & + .or. ((x_cc(i) - x_centroid)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_centroid)**2 <= radius**2) & + .or. ((x_cc(i) - x_centroid)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_pcen)**2 <= radius**2) & + .or. ((x_cc(i) - x_centroid)**2 & + + (cart_y - y_centroid)**2 & + + (cart_z - z_pcen)**2 <= radius**2)) & + then + + ! Updating the patch identities bookkeeping variable + patch_id_fp(i, j, k) = patch_id + end if + end if end do end do end do diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 8749c22278..71ca6523b2 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -149,7 +149,8 @@ contains n_start_old, surface_tension, hyperelasticity, pre_stress, & rkck_adap_dt, elliptic_smoothing, elliptic_smoothing_iters, & viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, & - patch_bc, Bx0, relativity, cont_damage + patch_bc, Bx0, relativity, cont_damage, & + periodic_ibs, store_levelset, slab_domain_decomposition ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp new file mode 100644 index 0000000000..17765413bd --- /dev/null +++ b/src/simulation/m_additional_forcing.fpp @@ -0,0 +1,158 @@ +#:include 'macros.fpp' + +module m_additional_forcing + use m_derived_types + + use m_global_parameters + + use m_ibm + + use m_mpi_proxy + + use m_volume_filtering + + implicit none + + private; public :: s_initialize_additional_forcing_module, & + s_add_periodic_forcing, s_finalize_additional_forcing_module, & + s_compute_phase_average, s_compute_periodic_forcing; + + real(wp), allocatable, dimension(:) :: q_bar ! 1:3 rho*u, 4 rho, 5 T + type(scalar_field), allocatable, dimension(:) :: q_periodic_force + real(wp), allocatable, dimension(:) :: q_spatial_avg, q_spatial_avg_glb ! 1:3 rho*u, 4 rho, 5 T + real(wp) :: volfrac_phi + integer :: N_x_total_glb + + !$acc declare create(q_bar, q_periodic_force, q_spatial_avg, q_spatial_avg_glb, volfrac_phi, N_x_total_glb) + +contains + + subroutine s_initialize_additional_forcing_module + integer :: i + if (periodic_forcing) then + @:ALLOCATE(q_bar(1:5)) + @:ALLOCATE(q_periodic_force(1:8)) + do i = 1, 8 + @:ALLOCATE(q_periodic_force(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(q_periodic_force(i)) + end do + @:ALLOCATE(q_spatial_avg(1:5)) + @:ALLOCATE(q_spatial_avg_glb(1:5)) + end if + + volfrac_phi = num_ibs * 4._wp/3._wp * pi * patch_ib(1)%radius**3 / ((x_domain%end - x_domain%beg)*(y_domain%end - y_domain%beg)*(z_domain%end - z_domain%beg)) + !$acc update device(volfrac_phi) + + N_x_total_glb = (m_glb + 1) * (n_glb + 1) * (p_glb + 1) + !$acc update device(N_x_total_glb) + end subroutine s_initialize_additional_forcing_module + + !< adds periodic forcing terms to RHS, as detailed in Khalloufi and Capecelatro + subroutine s_add_periodic_forcing(rhs_vf) + type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf + integer :: i, j, k + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + rhs_vf(1)%sf(i, j, k) = rhs_vf(1)%sf(i, j, k) + q_periodic_force(7)%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) ! continuity + rhs_vf(2)%sf(i, j, k) = rhs_vf(2)%sf(i, j, k) + q_periodic_force(1)%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) ! x momentum + rhs_vf(5)%sf(i, j, k) = rhs_vf(5)%sf(i, j, k) + (q_periodic_force(4)%sf(i, j, k) + q_periodic_force(8)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) ! energy + end do + end do + end do + end subroutine s_add_periodic_forcing + + subroutine s_compute_phase_average(q_cons_vf, t_step) + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + integer, intent(in) :: t_step + integer :: i, j, k + + !$acc loop seq + do i = 1, 5 + q_spatial_avg(i) = 0._wp + end do + + ! spatial average + !$acc parallel loop collapse(3) gang vector default(present) reduction(+:q_spatial_avg(:)) + do i = 0, m + do j = 0, n + do k = 0, p + q_spatial_avg(4) = q_spatial_avg(4) + q_cons_vf(1)%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) + q_spatial_avg(5) = q_spatial_avg(5) + (0.4_wp/287._wp * (q_cons_vf(5)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) & + - 0.5_wp * ((q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2 & + + (q_cons_vf(3)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2 & + + (q_cons_vf(4)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2))) * fluid_indicator_function_I%sf(i, j, k) + + q_spatial_avg(1) = q_spatial_avg(1) + (q_cons_vf(2)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) + q_spatial_avg(2) = q_spatial_avg(2) + (q_cons_vf(3)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) + q_spatial_avg(3) = q_spatial_avg(3) + (q_cons_vf(4)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) + end do + end do + end do + + !$acc update host(q_spatial_avg(:)) + + do i = 1, 5 + call s_mpi_allreduce_sum(q_spatial_avg(i), q_spatial_avg_glb(i)) + end do + + !$acc update device(q_spatial_avg_glb(:)) + + !$acc loop seq + do i = 1, 5 + q_spatial_avg_glb(i) = q_spatial_avg_glb(i) / real(N_x_total_glb, wp) + end do + + ! time average + !$acc loop seq + do i = 1, 5 + q_bar(i) = ( (q_spatial_avg_glb(i) + (t_step - 1._wp)*q_bar(i)) / t_step ) + end do + end subroutine s_compute_phase_average + + !< computes the periodic forcing terms described in Khalloufi and Capecelatro + subroutine s_compute_periodic_forcing(q_cons_vf) + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + + integer :: i, j, k + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + ! f_u + q_periodic_force(1)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - q_bar(1)/(1._wp - volfrac_phi)) / dt + q_periodic_force(2)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - q_bar(2)/(1._wp - volfrac_phi)) / dt + q_periodic_force(3)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - q_bar(3)/(1._wp - volfrac_phi)) / dt + + ! u*f_u + q_periodic_force(4)%sf(i, j, k) = q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(1)%sf(i, j, k) + q_periodic_force(5)%sf(i, j, k) = q_cons_vf(3)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(2)%sf(i, j, k) + q_periodic_force(6)%sf(i, j, k) = q_cons_vf(4)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(3)%sf(i, j, k) + + ! f_rho + q_periodic_force(7)%sf(i, j, k) = (rho_inf_ref - q_bar(4)/(1._wp - volfrac_phi)) / dt + + ! f_T + q_periodic_force(8)%sf(i, j, k) = (q_cons_vf(1)%sf(i, j, k) / 1.4_wp) * (T_inf_ref - q_bar(5)/(1._wp - volfrac_phi)) / dt + end do + end do + end do + end subroutine s_compute_periodic_forcing + + subroutine s_finalize_additional_forcing_module + integer :: i + if (periodic_forcing) then + @:DEALLOCATE(q_bar) + do i = 1, 8 + @:DEALLOCATE(q_periodic_force(i)%sf) + end do + @:DEALLOCATE(q_periodic_force) + @:DEALLOCATE(q_spatial_avg) + @:DEALLOCATE(q_spatial_avg_glb) + end if + end subroutine s_finalize_additional_forcing_module + +end module m_additional_forcing \ No newline at end of file diff --git a/src/simulation/m_checker.fpp b/src/simulation/m_checker.fpp index f3fe8eb6d5..04c1076f2a 100644 --- a/src/simulation/m_checker.fpp +++ b/src/simulation/m_checker.fpp @@ -346,6 +346,14 @@ contains subroutine s_check_inputs_misc @:PROHIBIT(probe_wrt .and. fd_order == dflt_int, "fd_order must be specified for probe_wrt") @:PROHIBIT(integral_wrt .and. (.not. bubbles_euler)) + #:for X in ['x', 'y', 'z'] + #:for BOUND in ['beg', 'end'] + @:PROHIBIT(periodic_forcing .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & + "Periodic forcing requires all BCs to be periodic") + @:PROHIBIT(fourier_transform_filtering .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & + "Explicit filtering of flow data requires all BCs to be periodic due to fourier transform") + #:endfor + #:endfor end subroutine s_check_inputs_misc subroutine s_check_inputs_mhd diff --git a/src/simulation/m_compute_particle_forces.fpp b/src/simulation/m_compute_particle_forces.fpp new file mode 100644 index 0000000000..fd84657f5f --- /dev/null +++ b/src/simulation/m_compute_particle_forces.fpp @@ -0,0 +1,72 @@ +#:include 'macros.fpp' + +module m_compute_particle_forces + use m_derived_types + + use m_global_parameters + + use m_ibm + + use m_mpi_proxy + + implicit none + + private; public :: s_initialize_particle_forces_module, & + s_compute_drag_coefficient, s_finalize_particle_forces_module + + real(wp), allocatable, dimension(:) :: FD_calc + + !$acc declare create(FD_calc) + +contains + + subroutine s_initialize_particle_forces_module + if (compute_CD) then + @:ALLOCATE(FD_calc(0:num_ibs)) + end if + + end subroutine s_initialize_particle_forces_module + + subroutine s_compute_drag_coefficient(div_pres_visc_stress) + type(scalar_field), dimension(momxb:momxe), intent(in) :: div_pres_visc_stress + real(wp), dimension(0:num_ibs) :: FD_global + real(wp) :: drag_coeff + integer :: i, j, k + + !$acc parallel loop gang vector default(present) + do i = 0, num_ibs + FD_calc(i) = 0._wp + end do + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc atomic + FD_calc(ib_markers%sf(i, j, k)) = FD_calc(ib_markers%sf(i, j, k)) & + + div_pres_visc_stress(momxb)%sf(i, j, k) * dx(i) * dy(j) * dz(k) + end do + end do + end do + + !$acc update host(FD_calc(:)) + + do i = 0, num_ibs + call s_mpi_allreduce_sum(FD_calc(i), FD_global(i)) + end do + + drag_coeff = FD_global(1) / (0.5_wp * rho_inf_ref * (u_inf_ref**2) * pi * (patch_ib(1)%radius**2)) + if (proc_rank == 0) then + print *, 'C_D: ', drag_coeff + end if + + end subroutine s_compute_drag_coefficient + + subroutine s_finalize_particle_forces_module + if (compute_CD) then + @:DEALLOCATE(FD_calc) + end if + + end subroutine s_finalize_particle_forces_module + +end module m_compute_particle_forces diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 1bdc9f1d97..0ae8d7763e 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -501,6 +501,19 @@ module m_global_parameters !$acc declare create(tau_star, cont_damage_s, alpha_bar) !> @} + logical :: periodic_ibs + logical :: compute_CD + real(wp) :: mu_visc !< reference viscosity + real(wp) :: u_inf_ref !< reference freestream velocity + real(wp) :: rho_inf_ref !< reference freestream density + real(wp) :: T_inf_ref !< reference freestream temperature + logical :: periodic_forcing + logical :: fourier_transform_filtering + logical :: store_levelset + logical :: slab_domain_decomposition + + !$acc declare create(mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref) + contains !> Assigns default values to the user inputs before reading @@ -776,6 +789,17 @@ contains relativity = .false. #:endif + periodic_ibs = .false. + compute_CD = .false. + mu_visc = dflt_real + u_inf_ref = dflt_real + rho_inf_ref = dflt_real + T_inf_ref = dflt_real + periodic_forcing = .false. + fourier_transform_filtering = .false. + store_levelset = .true. + slab_domain_decomposition = .false. + end subroutine s_assign_default_values_to_user_inputs !> The computation of parameters, the allocation of memory, diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 0c18d17327..c5eb54bd87 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -45,6 +45,9 @@ module m_ibm integer :: num_inner_gps !< Number of ghost points !$acc declare create(gp_layers, num_gps, num_inner_gps) + real(wp) :: x_domain_beg_glb, x_domain_end_glb, y_domain_beg_glb, y_domain_end_glb, z_domain_beg_glb, z_domain_end_glb !< global domain beginning/end + !$acc declare create(x_domain_beg_glb, x_domain_end_glb, y_domain_beg_glb, y_domain_end_glb, z_domain_beg_glb, z_domain_end_glb) + contains !> Allocates memory for the variables in the IBM module @@ -53,10 +56,12 @@ contains if (p > 0) then @:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, & -gp_layers:n+gp_layers, -gp_layers:p+gp_layers)) - @:ALLOCATE(levelset%sf(-gp_layers:m+gp_layers, & - -gp_layers:n+gp_layers, -gp_layers:p+gp_layers, 1:num_ibs)) - @:ALLOCATE(levelset_norm%sf(-gp_layers:m+gp_layers, & - -gp_layers:n+gp_layers, -gp_layers:p+gp_layers, 1:num_ibs, 1:3)) + if (store_levelset) then + @:ALLOCATE(levelset%sf(-gp_layers:m+gp_layers, & + -gp_layers:n+gp_layers, -gp_layers:p+gp_layers, 1:num_ibs)) + @:ALLOCATE(levelset_norm%sf(-gp_layers:m+gp_layers, & + -gp_layers:n+gp_layers, -gp_layers:p+gp_layers, 1:num_ibs, 1:3)) + end if else @:ALLOCATE(ib_markers%sf(-gp_layers:m+gp_layers, & -gp_layers:n+gp_layers, 0:0)) @@ -67,9 +72,11 @@ contains end if @:ACC_SETUP_SFs(ib_markers) - @:ACC_SETUP_SFs(levelset) - @:ACC_SETUP_SFs(levelset_norm) - + if (store_levelset) then + @:ACC_SETUP_SFs(levelset) + @:ACC_SETUP_SFs(levelset_norm) + end if + !$acc enter data copyin(num_gps, num_inner_gps) end subroutine s_initialize_ibm_module @@ -106,6 +113,14 @@ contains call s_compute_interpolation_coeffs(ghost_points) !$acc update device(ghost_points) + call s_mpi_allreduce_min(x_domain%beg, x_domain_beg_glb) + call s_mpi_allreduce_max(x_domain%end, x_domain_end_glb) + call s_mpi_allreduce_min(y_domain%beg, y_domain_beg_glb) + call s_mpi_allreduce_max(y_domain%end, y_domain_end_glb) + call s_mpi_allreduce_min(z_domain%beg, z_domain_beg_glb) + call s_mpi_allreduce_max(z_domain%end, z_domain_end_glb) + !$acc update device(x_domain_beg_glb, x_domain_end_glb, y_domain_beg_glb, y_domain_end_glb, z_domain_beg_glb, z_domain_end_glb) + end subroutine s_ibm_setup !> Subroutine that updates the conservative variables at the ghost points @@ -362,6 +377,13 @@ contains integer :: dir integer :: index + real(wp) :: radius, x_centroid, y_centroid, z_centroid + real(wp) :: x_pcen, y_pcen, z_pcen + real(wp) :: dist_calc + real(wp), dimension(3) :: dist_vec + real(wp), dimension(7, 3) :: dist_vec_per + real(wp), dimension(7) :: dist_per + do q = 1, num_gps gp = ghost_points(q) i = gp%loc(1) @@ -377,8 +399,106 @@ contains ! Calculate and store the precise location of the image point patch_id = gp%ib_patch_id - dist = abs(levelset%sf(i, j, k, patch_id)) - norm(:) = levelset_norm%sf(i, j, k, patch_id, :) + if (store_levelset) then + dist = abs(levelset%sf(i, j, k, patch_id)) + norm(:) = levelset_norm%sf(i, j, k, patch_id, :) + else ! compute levelset and levelset_norm on the fly + radius = patch_ib(patch_id)%radius + x_centroid = patch_ib(patch_id)%x_centroid + y_centroid = patch_ib(patch_id)%y_centroid + z_centroid = patch_ib(patch_id)%z_centroid + if ((x_centroid - x_domain_beg_glb) <= radius) then + x_pcen = x_domain_end_glb + (x_centroid - x_domain_beg_glb) + else if ((x_domain_end_glb - x_centroid) <= radius) then + x_pcen = x_domain_beg_glb - (x_domain_end_glb - x_centroid) + else + x_pcen = x_centroid + end if + if ((y_centroid - y_domain_beg_glb) <= radius) then + y_pcen = y_domain_end_glb + (y_centroid - y_domain_beg_glb) + else if ((y_domain_end_glb - y_centroid) <= radius) then + y_pcen = y_domain_beg_glb - (y_domain_end_glb - y_centroid) + else + y_pcen = y_centroid + end if + if ((z_centroid - z_domain_beg_glb) <= radius) then + z_pcen = z_domain_end_glb + (z_centroid - z_domain_beg_glb) + else if ((z_domain_end_glb - z_centroid) <= radius) then + z_pcen = z_domain_beg_glb - (z_domain_end_glb - z_centroid) + else + z_pcen = z_centroid + end if + dist_vec(1) = x_cc(i) - x_centroid + dist_vec(2) = y_cc(j) - y_centroid + dist_vec(3) = z_cc(k) - z_centroid + dist_calc = sqrt(sum(dist_vec**2)) + ! all permutations of periodically projected ib + if (periodic_ibs) then + dist_vec_per(1, 1) = x_cc(i) - x_pcen + dist_vec_per(1, 2) = y_cc(j) - y_pcen + dist_vec_per(1, 3) = z_cc(k) - z_pcen + dist_per(1) = sqrt(sum(dist_vec_per(1, :)**2)) + if (dist_per(1) < dist_calc) then + dist_calc = dist_per(1) + dist_vec = dist_vec_per(1, :) + end if + dist_vec_per(2, 1) = x_cc(i) - x_pcen + dist_vec_per(2, 2) = y_cc(j) - y_centroid + dist_vec_per(2, 3) = z_cc(k) - z_pcen + dist_per(2) = sqrt(sum(dist_vec_per(2, :)**2)) + if (dist_per(2) < dist_calc) then + dist_calc = dist_per(2) + dist_vec = dist_vec_per(2, :) + end if + dist_vec_per(3, 1) = x_cc(i) - x_pcen + dist_vec_per(3, 2) = y_cc(j) - y_pcen + dist_vec_per(3, 3) = z_cc(k) - z_centroid + dist_per(3) = sqrt(sum(dist_vec_per(3, :)**2)) + if (dist_per(3) < dist_calc) then + dist_calc = dist_per(3) + dist_vec = dist_vec_per(3, :) + end if + dist_vec_per(4, 1) = x_cc(i) - x_pcen + dist_vec_per(4, 2) = y_cc(j) - y_centroid + dist_vec_per(4, 3) = z_cc(k) - z_centroid + dist_per(4) = sqrt(sum(dist_vec_per(4, :)**2)) + if (dist_per(4) < dist_calc) then + dist_calc = dist_per(4) + dist_vec = dist_vec_per(4, :) + end if + dist_vec_per(5, 1) = x_cc(i) - x_centroid + dist_vec_per(5, 2) = y_cc(j) - y_pcen + dist_vec_per(5, 3) = z_cc(k) - z_pcen + dist_per(5) = sqrt(sum(dist_vec_per(5, :)**2)) + if (dist_per(5) < dist_calc) then + dist_calc = dist_per(5) + dist_vec = dist_vec_per(5, :) + end if + dist_vec_per(6, 1) = x_cc(i) - x_centroid + dist_vec_per(6, 2) = y_cc(j) - y_pcen + dist_vec_per(6, 3) = z_cc(k) - z_centroid + dist_per(6) = sqrt(sum(dist_vec_per(6, :)**2)) + if (dist_per(6) < dist_calc) then + dist_calc = dist_per(6) + dist_vec = dist_vec_per(6, :) + end if + dist_vec_per(7, 1) = x_cc(i) - x_centroid + dist_vec_per(7, 2) = y_cc(j) - y_centroid + dist_vec_per(7, 3) = z_cc(k) - z_pcen + dist_per(7) = sqrt(sum(dist_vec_per(7, :)**2)) + if (dist_per(7) < dist_calc) then + dist_calc = dist_per(7) + dist_vec = dist_vec_per(7, :) + end if + end if + dist = abs(dist_calc - radius) + if (dist_calc == 0) then + norm(:) = (/1, 0, 0/) + else + norm(:) = dist_vec(:)/dist_calc + end if + end if ! end store_levelset if statement + ghost_points(q)%ip_loc(:) = physical_loc(:) + 2*dist*norm(:) ! Find the closest grid point to the image point @@ -863,8 +983,10 @@ contains subroutine s_finalize_ibm_module() @:DEALLOCATE(ib_markers%sf) - @:DEALLOCATE(levelset%sf) - @:DEALLOCATE(levelset_norm%sf) + if (store_levelset) then + @:DEALLOCATE(levelset%sf) + @:DEALLOCATE(levelset_norm%sf) + end if end subroutine s_finalize_ibm_module diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 33b61a9284..b1f1c28c8c 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -91,7 +91,9 @@ contains & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage' ] + & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage', & + & 'periodic_ibs', 'compute_CD', 'periodic_forcing', 'fourier_transform_filtering', & + & 'store_levelset', 'slab_domain_decomposition' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -130,7 +132,8 @@ contains & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0', & - & 'tau_star', 'cont_damage_s', 'alpha_bar' ] + & 'tau_star', 'cont_damage_s', 'alpha_bar', 'mu_visc', 'u_inf_ref', & + & 'rho_inf_ref', 'T_inf_ref' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -294,6 +297,19 @@ contains end do + else if (slab_domain_decomposition) then + if (proc_rank == 0) then + print *, 'slab domain decomposition...' + end if + + ! continuous x and y direction, block decomposition in z + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + if (mod((p+1), num_procs_z) == 0) then + ierr = 0 + end if else ! Initial estimate of optimal processor topology diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 6930f3caa7..626aed96ce 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -609,7 +609,7 @@ contains end subroutine s_initialize_rhs_module - subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb, rhs_pb, mv, rhs_mv, t_step, time_avg) + subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb, rhs_pb, mv, rhs_mv, t_step, time_avg, div_pres_visc_stress) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), intent(inout) :: q_T_sf @@ -620,6 +620,7 @@ contains real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv, rhs_mv integer, intent(in) :: t_step real(wp), intent(inout) :: time_avg + type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: div_pres_visc_stress real(wp), dimension(0:m, 0:n, 0:p) :: nbub real(wp) :: t_start, t_finish @@ -809,7 +810,8 @@ contains rhs_vf, & q_cons_qp, & q_prim_qp, & - flux_src_n(id)) + flux_src_n(id), & + div_pres_visc_stress) call nvtxEndRange ! RHS additions for hypoelasticity @@ -828,7 +830,8 @@ contains flux_src_n(id)%vf, & dq_prim_dx_qp(1)%vf, & dq_prim_dy_qp(1)%vf, & - dq_prim_dz_qp(1)%vf) + dq_prim_dz_qp(1)%vf, & + div_pres_visc_stress) call nvtxEndRange end if @@ -935,13 +938,14 @@ contains end subroutine s_compute_rhs - subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf) + subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, div_pres_visc_stress) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(vector_field), intent(inout) :: q_cons_vf type(vector_field), intent(inout) :: q_prim_vf type(vector_field), intent(inout) :: flux_src_n_vf + type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: div_pres_visc_stress integer :: i, j, k, l, q @@ -994,6 +998,25 @@ contains end do end do + ! particle forces loop, x-dir + if (compute_CD .and. present(div_pres_visc_stress)) then + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + !$acc loop seq + do l = momxb, momxe + div_pres_visc_stress(l)%sf(i, j, k) = 1._wp/dx(i) * & + (flux_n(1)%vf(l)%sf(i-1, j, k) - & + flux_n(1)%vf(l)%sf(i, j, k)) - 0.5_wp/dx(i) * & + (q_cons_vf%vf(2)%sf(i+1, j, k)*q_cons_vf%vf(l)%sf(i+1, j, k)/q_cons_vf%vf(1)%sf(i+1, j, k) - & + q_cons_vf%vf(2)%sf(i-1, j, k)*q_cons_vf%vf(l)%sf(i-1, j, k)/q_cons_vf%vf(1)%sf(i-1, j, k)) + end do + end do + end do + end do + end if + if (model_eqns == 3) then !$acc parallel loop collapse(4) gang vector default(present) do l = 0, p @@ -1104,6 +1127,25 @@ contains end do end do + ! particle forces loop, y-dir + if (compute_CD .and. present(div_pres_visc_stress)) then + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + !$acc loop seq + do l = momxb, momxe + div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & + (flux_n(2)%vf(l)%sf(i, j-1, k) - & + flux_n(2)%vf(l)%sf(i, j, k)) - 0.5_wp/dy(j) * & + (q_cons_vf%vf(3)%sf(i, j+1, k)*q_cons_vf%vf(l)%sf(i, j+1, k)/q_cons_vf%vf(1)%sf(i, j+1, k) - & + q_cons_vf%vf(3)%sf(i, j-1, k)*q_cons_vf%vf(l)%sf(i, j-1, k)/q_cons_vf%vf(1)%sf(i, j-1, k)) + end do + end do + end do + end do + end if + if (model_eqns == 3) then !$acc parallel loop collapse(4) gang vector default(present) do l = 0, p @@ -1310,6 +1352,25 @@ contains end do end if + ! particle forces loop, z-dir + if (compute_CD .and. present(div_pres_visc_stress)) then + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + !$acc loop seq + do l = momxb, momxe + div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & + (flux_n(3)%vf(l)%sf(i, j, k-1) - & + flux_n(3)%vf(l)%sf(i, j, k)) - 0.5_wp/dz(k) * & + (q_cons_vf%vf(4)%sf(i, j, k+1)*q_cons_vf%vf(l)%sf(i, j, k+1)/q_cons_vf%vf(1)%sf(i, j, k+1) - & + q_cons_vf%vf(4)%sf(i, j, k-1)*q_cons_vf%vf(l)%sf(i, j, k-1)/q_cons_vf%vf(1)%sf(i, j, k-1)) + end do + end do + end do + end do + end if + if (model_eqns == 3) then !$acc parallel loop collapse(4) gang vector default(present) do l = 0, p @@ -1491,13 +1552,14 @@ contains end subroutine s_compute_advection_source_term subroutine s_compute_additional_physics_rhs(idir, q_prim_vf, rhs_vf, flux_src_n, & - dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf) + dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf, div_pres_visc_stress) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(scalar_field), dimension(sys_size), intent(in) :: flux_src_n type(scalar_field), dimension(sys_size), intent(in) :: dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf + type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: div_pres_visc_stress integer :: i, j, k, l @@ -1533,6 +1595,23 @@ contains end do end do + ! particle momentum exchange, viscous stress tensor, x-dir + if (compute_CD .and. present(div_pres_visc_stress)) then + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + !$acc loop seq + do l = momxb, momxe + div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dx(i) * & + (flux_src_n(l)%sf(i-1, j, k) - & + flux_src_n(l)%sf(i, j, k)) + end do + end do + end do + end do + end if + elseif (idir == 2) then ! y-direction if (surface_tension) then @@ -1615,6 +1694,23 @@ contains end do end if + ! particle momentum exchange, viscous stress tensor, y-dir + if (compute_CD .and. present(div_pres_visc_stress)) then + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + !$acc loop seq + do l = momxb, momxe + div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & + (flux_src_n(l)%sf(i, j-1, k) - & + flux_src_n(l)%sf(i, j, k)) + end do + end do + end do + end do + end if + ! Applying the geometrical viscous Riemann source fluxes calculated as average ! of values at cell boundaries if (cyl_coord) then @@ -1700,6 +1796,23 @@ contains end do end do + ! particle momentum exchange, viscous stress tensor, z-dir + if (compute_CD .and. present(div_pres_visc_stress)) then + !$acc parallel loop collapse(3) gang vector default(present) + do k = 0, p + do j = 0, n + do i = 0, m + !$acc loop seq + do l = momxb, momxe + div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & + (flux_src_n(l)%sf(i, j, k-1) - & + flux_src_n(l)%sf(i, j, k)) + end do + end do + end do + end do + end if + if (grid_geometry == 3) then !$acc parallel loop collapse(3) gang vector default(present) do l = 0, p diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index a45346673c..d2e9e344a3 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -89,6 +89,12 @@ module m_start_up use m_mhd + use m_compute_particle_forces + + use m_additional_forcing + + use m_volume_filtering + implicit none private; public :: s_read_input_file, & @@ -180,7 +186,10 @@ contains bubbles_lagrange, lag_params, & rkck_adap_dt, rkck_tolerance, & hyperelasticity, R0ref, num_bc_patches, Bx0, powell, & - cont_damage, tau_star, cont_damage_s, alpha_bar + cont_damage, tau_star, cont_damage_s, alpha_bar, & + periodic_ibs, compute_CD, mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref, & + periodic_forcing, fourier_transform_filtering, store_levelset, & + slab_domain_decomposition ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. @@ -434,33 +443,35 @@ contains call s_mpi_abort(trim(file_path)//' is missing. Exiting.') end if - ! Read Levelset - write (file_path, '(A)') & - trim(t_step_dir)//'/levelset.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (file_exist) then - open (2, FILE=trim(file_path), & - FORM='unformatted', & - ACTION='read', & - STATUS='old') - read (2) levelset%sf(0:m, 0:n, 0:p, 1:num_ibs); close (2) - ! print*, 'check', STL_levelset(106, 50, 0, 1) - else - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') - end if + if (store_levelset) then + ! Read Levelset + write (file_path, '(A)') & + trim(t_step_dir)//'/levelset.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (file_exist) then + open (2, FILE=trim(file_path), & + FORM='unformatted', & + ACTION='read', & + STATUS='old') + read (2) levelset%sf(0:m, 0:n, 0:p, 1:num_ibs); close (2) + ! print*, 'check', STL_levelset(106, 50, 0, 1) + else + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if - ! Read Levelset Norm - write (file_path, '(A)') & - trim(t_step_dir)//'/levelset_norm.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (file_exist) then - open (2, FILE=trim(file_path), & - FORM='unformatted', & - ACTION='read', & - STATUS='old') - read (2) levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3); close (2) - else - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + ! Read Levelset Norm + write (file_path, '(A)') & + trim(t_step_dir)//'/levelset_norm.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (file_exist) then + open (2, FILE=trim(file_path), & + FORM='unformatted', & + ACTION='read', & + STATUS='old') + read (2) levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3); close (2) + else + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if end if do i = 1, num_ibs @@ -693,44 +704,46 @@ contains call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') end if - ! Read Levelset - write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + if (store_levelset) then + ! Read Levelset + write (file_loc, '(A)') 'levelset.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') - end if + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if - ! Read Levelset Norm - write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + ! Read Levelset Norm + write (file_loc, '(A)') 'levelset_norm.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if end if end if @@ -842,44 +855,46 @@ contains call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') end if - ! Read Levelset - write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + if (store_levelset) then + ! Read Levelset + write (file_loc, '(A)') 'levelset.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') - end if + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if - ! Read Levelset Norm - write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + ! Read Levelset Norm + write (file_loc, '(A)') 'levelset_norm.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if end if end if @@ -1552,6 +1567,10 @@ contains if (mhd .and. powell) call s_initialize_mhd_powell_module + call s_initialize_particle_forces_module() + call s_initialize_additional_forcing_module() + if (fourier_transform_filtering) call s_initialize_fftw_explicit_filter_module() + end subroutine s_initialize_modules subroutine s_initialize_mpi_domain @@ -1663,6 +1682,9 @@ contains if (ib) then !$acc update device(ib_markers%sf) end if + + !$acc update device(mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref) + end subroutine s_initialize_gpu_vars subroutine s_finalize_modules @@ -1691,6 +1713,10 @@ contains if (bodyForces) call s_finalize_body_forces_module() if (mhd .and. powell) call s_finalize_mhd_powell_module + call s_finalize_particle_forces_module() + call s_finalize_additional_forcing_module() + if (fourier_transform_filtering) call s_finalize_fftw_explicit_filter_module + ! Terminating MPI execution environment call s_mpi_finalize() end subroutine s_finalize_modules diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index f8cdb7a7ac..8291e2d9e7 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -46,6 +46,12 @@ module m_time_steppers use m_body_forces + use m_compute_particle_forces + + use m_additional_forcing + + use m_volume_filtering + implicit none type(vector_field), allocatable, dimension(:) :: q_cons_ts !< @@ -79,7 +85,12 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme + type(scalar_field), allocatable, dimension(:) :: div_pres_visc_stress + + type(scalar_field), allocatable, dimension(:) :: q_cons_filtered + !$acc declare create(q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, rhs_ts_rkck, q_prim_ts, rhs_mv, rhs_pb, max_dt) + !$acc declare create(div_pres_visc_stress) contains @@ -355,6 +366,26 @@ contains end do end do + if (compute_CD) then + @:ALLOCATE(div_pres_visc_stress(momxb:momxe)) + do i = momxb, momxe + @:ALLOCATE(div_pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(div_pres_visc_stress(i)) + end do + end if + + if (fourier_transform_filtering) then + @:ALLOCATE(q_cons_filtered(1:sys_size)) + do i = 1, sys_size + @:ALLOCATE(q_cons_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(q_cons_filtered(i)) + end do + end if + end subroutine s_initialize_time_steppers_module !> 1st order TVD RK time-stepping algorithm @@ -670,7 +701,20 @@ contains call nvtxStartRange("TIMESTEP") end if - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg) + if (periodic_forcing) then + call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) + call s_compute_periodic_forcing(q_cons_ts(1)%vf) + end if + + call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, div_pres_visc_stress) + + if (compute_CD) then + call s_compute_drag_coefficient(div_pres_visc_stress) + end if + + if (periodic_forcing) then + call s_add_periodic_forcing(rhs_vf) + end if if (run_time_info) then call s_write_run_time_information(q_prim_vf, t_step) @@ -761,6 +805,10 @@ contains call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg) + if (periodic_forcing) then + call s_add_periodic_forcing(rhs_vf) + end if + if (bubbles_lagrange) then call s_compute_EL_coupled_solver(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, stage=2) call s_update_lagrange_tdv_rk(stage=2) @@ -837,6 +885,10 @@ contains ! Stage 3 of 3 call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg) + if (periodic_forcing) then + call s_add_periodic_forcing(rhs_vf) + end if + if (bubbles_lagrange) then call s_compute_EL_coupled_solver(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, stage=3) call s_update_lagrange_tdv_rk(stage=3) @@ -1328,6 +1380,13 @@ contains @:DEALLOCATE(rhs_vf) end if + if (compute_CD) then + do i = momxb, momxe + @:DEALLOCATE(div_pres_visc_stress(i)%sf) + end do + @:DEALLOCATE(div_pres_visc_stress) + end if + ! Writing the footer of and closing the run-time information file if (proc_rank == 0 .and. run_time_info) then call s_close_run_time_information_file() diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp new file mode 100644 index 0000000000..6caffaa4c5 --- /dev/null +++ b/src/simulation/m_volume_filtering.fpp @@ -0,0 +1,1049 @@ +#:include 'macros.fpp' + +module m_volume_filtering + + use, intrinsic :: iso_c_binding + + use m_derived_types !< Definitions of the derived types + + use m_global_parameters !< Definitions of the global parameters + + use m_mpi_proxy !< Message passing interface (MPI) module proxy + + use m_ibm + +#ifdef MFC_MPI + use mpi !< Message passing interface (MPI) module +#endif + +#if defined(MFC_OpenACC) && defined(__PGI) + use cufft +#endif + + implicit none + + private; public :: s_initialize_fftw_explicit_filter_module, & + s_apply_fftw_filter_cons, & + s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & + s_initialize_filtered_fluid_indicator_function, & + s_finalize_fftw_explicit_filter_module, & + s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield + +#if !defined(MFC_OpenACC) + include 'fftw3.f03' +#endif + + integer :: ierr + + ! fluid indicator function (1 = fluid, 0 = otherwise) + type(scalar_field), public :: fluid_indicator_function_I + + !$acc declare create(fluid_indicator_function_I) + +#if defined(MFC_OpenACC) + ! GPU plans + integer :: plan_x_fwd_gpu, plan_x_bwd_gpu, plan_y_gpu, plan_z_gpu +#else + ! CPU plans + type(c_ptr) :: plan_x_r2c_fwd, plan_x_c2r_bwd + type(c_ptr) :: plan_y_c2c_fwd, plan_y_c2c_bwd + type(c_ptr) :: plan_z_c2c_fwd, plan_z_c2c_bwd + type(c_ptr) :: plan_x_r2c_kernelG, plan_y_c2c_kernelG, plan_z_c2c_kernelG +#endif + + ! domain size information (global, complex, local) + integer :: Nx, Ny, Nz, NxC, Nyloc, Nzloc + + ! 1D real and complex vectors for FFT routines + real(c_double), allocatable :: data_real_in1d(:) + complex(c_double_complex), allocatable :: data_cmplx_out1d(:) + complex(c_double_complex), allocatable :: data_cmplx_out1dy(:) + + ! 3D arrays for slab transposes + complex(c_double_complex), allocatable :: data_cmplx_slabz(:, :, :), data_cmplx_slaby(:, :, :) + + ! input/output array for FFT routine + real(c_double), allocatable :: data_real_3D_slabz(:, :, :) + + ! filtering kernel in physical space + real(c_double), allocatable :: real_kernelG_in(:, :, :) + + ! FFT of filtering kernel + complex(c_double_complex), allocatable :: cmplx_kernelG1d(:) + + !$acc declare create(Nx, Ny, Nz, NxC, Nyloc, Nzloc) + !$acc declare create(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy, data_cmplx_slabz, data_cmplx_slaby, data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) + +contains + + !< create fft plans to be used for explicit filtering of data + subroutine s_initialize_fftw_explicit_filter_module + integer :: size_n(1), inembed(1), onembed(1) + + !< global sizes + Nx = m_glb + 1 + Ny = n_glb + 1 + Nz = p_glb + 1 + + !< complex size + NxC = Nx/2 + 1 + + !< local sizes on each processor + Nyloc = Ny / num_procs + Nzloc = p + 1 + + !$acc update device(Nx, Ny, Nz, NxC, Nyloc, Nzloc) + + @:ALLOCATE(data_real_in1d(Nx*Ny*Nzloc)) + @:ALLOCATE(data_cmplx_out1d(NxC*Ny*Nz/num_procs)) + @:ALLOCATE(data_cmplx_out1dy(NxC*Ny*Nz/num_procs)) + @:ALLOCATE(cmplx_kernelG1d(NxC*Nyloc*Nz)) + @:ALLOCATE(real_kernelG_in(Nx, Ny, Nzloc)) + @:ALLOCATE(data_real_3D_slabz(Nx, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slabz(NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby(NxC, Nyloc, Nz)) + +#if defined(MFC_OpenACC) + !< GPU FFT plans + !< X - plans + size_n(1) = Nx + inembed(1) = Nx + onembed(1) = NxC + ierr = cufftPlanMany(plan_x_fwd_gpu, 1, size_n, inembed, 1, Nx, onembed, 1, NxC, CUFFT_D2Z, Ny*Nzloc) + size_n(1) = Nx + inembed(1) = NxC + onembed(1) = Nx + ierr = cufftPlanMany(plan_x_bwd_gpu, 1, size_n, inembed, 1, NxC, onembed, 1, Nx, CUFFT_Z2D, Ny*Nzloc) + !< Y - plans + size_n(1) = Ny + inembed(1) = Ny + onembed(1) = Ny + ierr = cufftPlanMany(plan_y_gpu, 1, size_n, inembed, 1, Ny, onembed, 1, Ny, CUFFT_Z2Z, NxC*Nzloc) + !< Z - plans + size_n(1) = Nz + inembed(1) = Nz + onembed(1) = Nz + ierr = cufftPlanMany(plan_z_gpu, 1, size_n, inembed, 1, Nz, onembed, 1, Nz, CUFFT_Z2Z, NxC*Nyloc) +#else + !< CPU FFT plans + !< X - direction plans + size_n(1) = Nx + inembed(1) = Nx + onembed(1) = NxC + plan_x_r2c_fwd = fftw_plan_many_dft_r2c(1, size_n, Ny*Nzloc, & ! rank, n, howmany + data_real_in1d, inembed, 1, Nx, & ! in, inembed, istride, idist + data_cmplx_out1d, onembed, 1, NxC, & ! out, onembed, ostride, odist + FFTW_MEASURE) ! sign, flags + size_n(1) = Nx + inembed(1) = NxC + onembed(1) = Nx + plan_x_c2r_bwd = fftw_plan_many_dft_c2r(1, size_n, Ny*Nzloc, & + data_cmplx_out1d, inembed, 1, NxC, & + data_real_in1d, onembed, 1, Nx, & + FFTW_MEASURE) + !< Y - direction plans + size_n(1) = Ny + inembed(1) = Ny + onembed(1) = Ny + plan_y_c2c_fwd = fftw_plan_many_dft(1, size_n, NxC*Nzloc, & + data_cmplx_out1dy, inembed, 1, Ny, & + data_cmplx_out1dy, onembed, 1, Ny, & + FFTW_FORWARD, FFTW_MEASURE) + plan_y_c2c_bwd = fftw_plan_many_dft(1, size_n, NxC*Nzloc, & + data_cmplx_out1dy, inembed, 1, Ny, & + data_cmplx_out1dy, onembed, 1, Ny, & + FFTW_BACKWARD, FFTW_MEASURE) + !< Z - direction plans + size_n(1) = Nz + inembed(1) = Nz + onembed(1) = Nz + plan_z_c2c_fwd = fftw_plan_many_dft(1, size_n, NxC*Nyloc, & + data_cmplx_out1d, inembed, 1, Nz, & + data_cmplx_out1d, onembed, 1, Nz, & + FFTW_FORWARD, FFTW_MEASURE) + plan_z_c2c_bwd = fftw_plan_many_dft(1, size_n, NxC*Nyloc, & + data_cmplx_out1d, inembed, 1, Nz, & + data_cmplx_out1d, onembed, 1, Nz, & + FFTW_BACKWARD, FFTW_MEASURE) + ! forward plans for filtering kernel + ! X kernel plan + size_n(1) = Nx + inembed(1) = Nx + onembed(1) = NxC + plan_x_r2c_kernelG = fftw_plan_many_dft_r2c(1, size_n, Ny*Nzloc, & + data_real_in1d, inembed, 1, Nx, & + cmplx_kernelG1d, onembed, 1, NxC, & + FFTW_MEASURE) + ! Y kernel plan + size_n(1) = Ny + inembed(1) = Ny + onembed(1) = Ny + plan_y_c2c_kernelG = fftw_plan_many_dft(1, size_n, NxC*Nzloc, & + data_cmplx_out1dy, inembed, 1, Ny, & + data_cmplx_out1dy, onembed, 1, Ny, & + FFTW_FORWARD, FFTW_MEASURE) + ! Z kernel plan + size_n(1) = Nz + inembed(1) = Nz + onembed(1) = Nz + plan_z_c2c_kernelG = fftw_plan_many_dft(1, size_n, NxC*Nyloc, & + cmplx_kernelG1d, inembed, 1, Nz, & + cmplx_kernelG1d, onembed, 1, Nz, & + FFTW_FORWARD, FFTW_MEASURE) +#endif + end subroutine s_initialize_fftw_explicit_filter_module + + !< initialize the gaussian filtering kernel in real space and then compute its DFT + subroutine s_initialize_filtering_kernel + real(dp) :: sigma_stddev + real(dp) :: Lx, Ly, Lz + real(dp) :: x_r, y_r, z_r + real(dp) :: r2 + real(dp) :: G_norm_int, G_norm_int_glb + integer :: i, j, k, idx + + ! gaussian filter + sigma_stddev = 3.0_dp * 0.05_dp + + Lx = x_domain_end_glb - x_domain_beg_glb + Ly = y_domain_end_glb - y_domain_beg_glb + Lz = z_domain_end_glb - z_domain_beg_glb + + G_norm_int = 0.0_dp + + !$acc parallel loop collapse(3) gang vector default(present) reduction(+:G_norm_int) copyin(Lx, Ly, Lz, sigma_stddev) private(x_r, y_r, z_r, r2) + do i = 0, m + do j = 0, n + do k = 0, p + x_r = min(abs(x_cc(i) - x_domain_beg_glb), Lx - abs(x_cc(i) - x_domain_beg_glb)) + y_r = min(abs(y_cc(j) - y_domain_beg_glb), Ly - abs(y_cc(j) - y_domain_beg_glb)) + z_r = min(abs(z_cc(k) - z_domain_beg_glb), Lz - abs(z_cc(k) - z_domain_beg_glb)) + + r2 = x_r**2 + y_r**2 + z_r**2 + + real_kernelG_in(i+1, j+1, k+1) = exp(-r2 / (2.0_dp*sigma_stddev**2)) + + G_norm_int = G_norm_int + real_kernelG_in(i+1, j+1, k+1)*dx(i)*dy(j)*dz(k) + end do + end do + end do + + call s_mpi_allreduce_sum(G_norm_int, G_norm_int_glb) + + ! FFT of kernel + ! normalize kernel + !$acc parallel loop collapse(3) gang vector default(present) copyin(G_norm_int_glb) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = real_kernelG_in(i, j, k) / G_norm_int_glb + end do + end do + end do + + ! 3D z-slab -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, cmplx_kernelG1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_kernelG, data_real_in1d, cmplx_kernelG1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = cmplx_kernelG1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_kernelG, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz(i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! transpose z-slab to y-slab + call s_mpi_transpose_slabZ2Y + + ! 3D y-slab -> 1D z, x, y + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby(i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, cmplx_kernelG1d, cmplx_kernelG1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_kernelG, cmplx_kernelG1d, cmplx_kernelG1d) +#endif + + ! normalize FFT + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) / (real(Nx*Ny*Nz, dp)) + end do + end do + end do + + ! return cmplx_kernelG1d: 1D z, x, y + end subroutine s_initialize_filtering_kernel + + !< initialize fluid indicator function + subroutine s_initialize_fluid_indicator_function + integer :: i, j, k + + @:ALLOCATE(fluid_indicator_function_I%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(fluid_indicator_function_I) + + ! define fluid indicator function + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + if (ib_markers%sf(i, j, k) == 0) then + fluid_indicator_function_I%sf(i, j, k) = 1.0_dp + else + fluid_indicator_function_I%sf(i, j, k) = 0.0_dp + end if + end do + end do + end do + + end subroutine s_initialize_fluid_indicator_function + + !< compute the filtered fluid indicator function counterpart + subroutine s_initialize_filtered_fluid_indicator_function(filtered_fluid_indicator_function) + type(scalar_field) :: filtered_fluid_indicator_function + + integer :: i, j, k + + ! filter fluid indicator function -> stored in q_cons_vf(advxb) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = fluid_indicator_function_I%sf(i-1, j-1, k-1) + end do + end do + end do + + call s_mpi_FFT_fwd + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + call s_mpi_FFT_bwd + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + filtered_fluid_indicator_function%sf(i-1, j-1, k-1) = data_real_3D_slabz(i, j, k) / (real(Nx*Ny*Nz, dp)) + end do + end do + end do + + end subroutine s_initialize_filtered_fluid_indicator_function + + !< apply the gaussian filter to the conservative variables and compute their filtered components + subroutine s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered + + integer :: l + + do l = 1, sys_size-1 + call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .true., q_cons_vf(l), q_cons_filtered(l)) + end do + + end subroutine s_apply_fftw_filter_cons + + !< applies the gaussian filter to an arbitrary scalar field + subroutine s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, fluid_quantity, q_temp_in, q_temp_out) + type(scalar_field), intent(in) :: filtered_fluid_indicator_function + type(scalar_field), intent(inout) :: q_temp_in + type(scalar_field), intent(inout), optional :: q_temp_out + + logical, intent(in) :: fluid_quantity !< whether or not convolution integral is over V_f or V_p^(i) - integral over fluid volume or particle volume + + integer :: i, j, k + + if (fluid_quantity) then + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = q_temp_in%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) + end do + end do + end do + else + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = q_temp_in%sf(i, j, k) * (1.0_dp - fluid_indicator_function_I%sf(i, j, k)) + end do + end do + end do + end if + + call s_mpi_FFT_fwd + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + call s_mpi_FFT_bwd + + if (present(q_temp_out)) then + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + q_temp_out%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + else + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + q_temp_in%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end if + + end subroutine s_apply_fftw_filter_scalarfield + + !< apply the gaussian filter to the requisite tensors to compute unclosed terms of interest + subroutine s_apply_fftw_filter_tensor(pt_Re_stress, R_mu, q_cons_filtered, rhs_rhouu, pImT_filtered) + type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered + type(scalar_field), dimension(momxb:momxe), intent(inout) :: rhs_rhouu + type(scalar_field), dimension(1:num_dims), intent(inout) :: pImT_filtered + + integer :: i, j, k, l, q + + ! pseudo turbulent reynolds stress + do l = 1, num_dims + do q = 1, num_dims + call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .true., pt_Re_stress(l)%vf(q)) + end do + end do + + ! effective viscosity + do l = 1, num_dims + do q = 1, num_dims + call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .true., R_mu(l)%vf(q)) + end do + end do + + ! interphase momentum exchange + do l = 1, num_dims + call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .false., rhs_rhouu(momxb-1+l), pImT_filtered(l)) + end do + + end subroutine s_apply_fftw_filter_tensor + + !< transpose domain from z-slabs to y-slabs on each processor + subroutine s_mpi_transpose_slabZ2Y + complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) + integer :: dest_rank, src_rank + integer :: i, j, k + + allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) + allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) + + !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slabz(i, j+dest_rank*Nyloc, k) + end do + end do + end do + end do + + call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + data_cmplx_slaby(i, j, k+src_rank*Nzloc) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + + deallocate(sendbuf, recvbuf) + end subroutine s_mpi_transpose_slabZ2Y + + !< transpose domain from y-slabs to z-slabs on each processor + subroutine s_mpi_transpose_slabY2Z + complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) + integer :: dest_rank, src_rank + integer :: i, j, k + + allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) + allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) + + !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slaby(i, j, k+dest_rank*Nzloc) + end do + end do + end do + end do + + call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + data_cmplx_slabz(i, j+src_rank*Nyloc, k) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + + deallocate(sendbuf, recvbuf) + end subroutine s_mpi_transpose_slabY2Z + + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_mpi_FFT_fwd + integer :: i, j, k + + ! 3D z-slab -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz(i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! transpose z-slab to y-slab + call s_mpi_transpose_slabZ2Y + + ! 3D y-slab -> 1D z, x, y + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby(i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! return data_cmplx_out1d: 1D z, x, y + end subroutine s_mpi_FFT_fwd + + !< compute inverse FFT, input: data_cmplx_out1d, output: data_real_3D_slabz + subroutine s_mpi_FFT_bwd + integer :: i, j, k + + ! Z inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! 1D z, x, y -> 3D y-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby(i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + ! transpose y-slab to z-slab + call s_mpi_transpose_slabY2Z + + ! 3D z-slab -> 1D y, x, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz(i, j, k) + end do + end do + end do + + ! Y inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! X inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + + ! 1D x, y, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + end do + end do + end do + + end subroutine s_mpi_FFT_bwd + + !< setup for calculation of unclosed terms in volume filtered momentum eqn + subroutine s_setup_terms_filtering(q_cons_vf, pt_Re_stress, R_mu) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu + + integer :: i, j, k, l, q + + ! pseudo turbulent reynolds stress setup + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + !$acc loop seq + do q = 1, num_dims + pt_Re_stress(l)%vf(q)%sf(i, j, k) = (q_cons_vf(momxb-1+l)%sf(i, j, k) * q_cons_vf(momxb-1+q)%sf(i, j, k)) / q_cons_vf(1)%sf(i, j, k) ! (rho*u x rho*u)/rho = rho*(u x u) + end do + end do + end do + end do + end do + + ! set density and momentum buffers +#ifdef MFC_MPI + do i = 1, momxe + call s_populate_scalarfield_buffers(q_cons_vf(i)) + end do +#else + do i = 1, momxe + q_cons_vf(i)%sf(-buff_size:-1, :, :) = q_cons_vf(i)%sf(m-buff_size+1:m, :, :) + q_cons_vf(i)%sf(m+1:m+buff_size, :, :) = q_cons_vf(i)%sf(0:buff_size-1, :, :) + + q_cons_vf(i)%sf(:, -buff_size:-1, :) = q_cons_vf(i)%sf(:, n-buff_size+1:n, :) + q_cons_vf(i)%sf(:, n+1:n+buff_size, :) = q_cons_vf(i)%sf(:, 0:buff_size-1, :) + + q_cons_vf(i)%sf(:, :, -buff_size:-1) = q_cons_vf(i)%sf(:, :, p-buff_size+1:p) + q_cons_vf(i)%sf(:, :, p+1:p+buff_size) = q_cons_vf(i)%sf(:, :, 0:buff_size-1) + end do +#endif + + ! R_mu setup + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + R_mu(1)%vf(1)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) + + R_mu(2)%vf(2)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) + + R_mu(3)%vf(3)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)) & + - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) + + R_mu(1)%vf(2)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k) & + + (q_cons_vf(momxb+1)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb+1)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_vf(1)%sf(i, j, k)) + + R_mu(2)%vf(1)%sf(i, j, k) = R_mu(1)%vf(2)%sf(i, j, k) + + R_mu(1)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & + + (q_cons_vf(momxb+2)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb+2)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_vf(1)%sf(i, j, k)) + + R_mu(3)%vf(1)%sf(i, j, k) = R_mu(1)%vf(3)%sf(i, j, k) + + R_mu(2)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb+1)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+1)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & + + (q_cons_vf(momxb+2)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+2)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k)) + + R_mu(3)%vf(2)%sf(i, j, k) = R_mu(2)%vf(3)%sf(i, j, k) + end do + end do + end do + + end subroutine s_setup_terms_filtering + + subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, pt_Re_stress, mag_div_Ru) + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered + type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress + type(scalar_field), intent(inout) :: mag_div_Ru + real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_Ru + integer :: i, j, k, l, q + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + !$acc loop seq + do q = 1, num_dims + pt_Re_stress(l)%vf(q)%sf(i, j, k) = pt_Re_stress(l)%vf(q)%sf(i, j, k) & + - (q_cons_filtered(momxb-1+l)%sf(i, j, k) * q_cons_filtered(momxb-1+q)%sf(i, j, k) / q_cons_filtered(1)%sf(i, j, k)) + end do + end do + end do + end do + end do + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + !$acc loop seq + do q = 1, num_dims + pt_Re_stress(l)%vf(q)%sf(i, j, k) = pt_Re_stress(l)%vf(q)%sf(i, j, k) * q_cons_filtered(advxb)%sf(i, j, k) + end do + end do + end do + end do + end do + + ! set boundary buffer zone values +#ifdef MFC_MPI + do l = 1, num_dims + do q = 1, num_dims + call s_populate_scalarfield_buffers(pt_Re_stress(l)%vf(q)) + end do + end do +#else + do l = 1, num_dims + do q = 1, num_dims + pt_Re_stress(l)%vf(q)%sf(-buff_size:-1, :, :) = pt_Re_stress(l)%vf(q)%sf(m-buff_size+1:m, :, :) + pt_Re_stress(l)%vf(q)%sf(m+1:m+buff_size, :, :) = pt_Re_stress(l)%vf(q)%sf(0:buff_size-1, :, :) + + pt_Re_stress(l)%vf(q)%sf(:, -buff_size:-1, :) = pt_Re_stress(l)%vf(q)%sf(:, n-buff_size+1:n, :) + pt_Re_stress(l)%vf(q)%sf(:, n+1:n+buff_size, :) = pt_Re_stress(l)%vf(q)%sf(:, 0:buff_size-1, :) + + pt_Re_stress(l)%vf(q)%sf(:, :, -buff_size:-1) = pt_Re_stress(l)%vf(q)%sf(:, :, p-buff_size+1:p) + pt_Re_stress(l)%vf(q)%sf(:, :, p+1:p+buff_size) = pt_Re_stress(l)%vf(q)%sf(:, :, 0:buff_size-1) + end do + end do +#endif + + ! div(Ru), using CD2 FD scheme + !$acc parallel loop collapse(3) gang vector default(present) copy(div_Ru) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + div_Ru(l, i, j, k) = (pt_Re_stress(l)%vf(1)%sf(i+1, j, k) - pt_Re_stress(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (pt_Re_stress(l)%vf(2)%sf(i, j+1, k) - pt_Re_stress(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (pt_Re_stress(l)%vf(3)%sf(i, j, k+1) - pt_Re_stress(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) + end do + end do + end do + end do + + !$acc parallel loop collapse(3) gang vector default(present) copyin(div_Ru) + do i = 0, m + do j = 0, n + do k = 0, p + mag_div_Ru%sf(i, j, k) = sqrt(div_Ru(1, i, j, k)**2 + div_Ru(2, i, j, k)**2 + div_Ru(3, i, j, k)**2) + end do + end do + end do + + end subroutine s_compute_pseudo_turbulent_reynolds_stress + + subroutine s_compute_R_mu(q_cons_filtered, R_mu, mag_div_R_mu) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered + type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu + type(scalar_field), intent(inout) :: mag_div_R_mu + real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_R_mu + + integer :: i, j, k, l, q + + ! set buffers for filtered momentum quantities and density +#ifdef MFC_MPI + do i = 1, momxe + call s_populate_scalarfield_buffers(q_cons_filtered(i)) + end do +#else + do i = 1, momxe + q_cons_filtered(i)%sf(-buff_size:-1, :, :) = q_cons_filtered(i)%sf(m-buff_size+1:m, :, :) + q_cons_filtered(i)%sf(m+1:m+buff_size, :, :) = q_cons_filtered(i)%sf(0:buff_size-1, :, :) + + q_cons_filtered(i)%sf(:, -buff_size:-1, :) = q_cons_filtered(i)%sf(:, n-buff_size+1:n, :) + q_cons_filtered(i)%sf(:, n+1:n+buff_size, :) = q_cons_filtered(i)%sf(:, 0:buff_size-1, :) + + q_cons_filtered(i)%sf(:, :, -buff_size:-1) = q_cons_filtered(i)%sf(:, :, p-buff_size+1:p) + q_cons_filtered(i)%sf(:, :, p+1:p+buff_size) = q_cons_filtered(i)%sf(:, :, 0:buff_size-1) + end do +#endif + + ! calculate R_mu + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + R_mu(1)%vf(1)%sf(i, j, k) = R_mu(1)%vf(1)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) + + R_mu(2)%vf(2)%sf(i, j, k) = R_mu(2)%vf(2)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) + + R_mu(3)%vf(3)%sf(i, j, k) = R_mu(3)%vf(3)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)) & + - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) + + R_mu(1)%vf(2)%sf(i, j, k) = R_mu(1)%vf(2)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k) & + + (q_cons_filtered(momxb+1)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb+1)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_filtered(1)%sf(i, j, k)) + + R_mu(2)%vf(1)%sf(i, j, k) = R_mu(1)%vf(2)%sf(i, j, k) + + R_mu(1)%vf(3)%sf(i, j, k) = R_mu(1)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & + + (q_cons_filtered(momxb+2)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb+2)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_filtered(1)%sf(i, j, k)) + + R_mu(3)%vf(1)%sf(i, j, k) = R_mu(1)%vf(3)%sf(i, j, k) + + R_mu(2)%vf(3)%sf(i, j, k) = R_mu(2)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb+1)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+1)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & + + (q_cons_filtered(momxb+2)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+2)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k)) + + R_mu(3)%vf(2)%sf(i, j, k) = R_mu(2)%vf(3)%sf(i, j, k) + + end do + end do + end do + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + !$acc loop seq + do q = 1, num_dims + R_mu(l)%vf(q)%sf(i, j, k) = R_mu(l)%vf(q)%sf(i, j, k) * q_cons_filtered(advxb)%sf(i, j, k) + end do + end do + end do + end do + end do + + ! set boundary buffer zone values +#ifdef MFC_MPI + do l = 1, num_dims + do q = 1, num_dims + call s_populate_scalarfield_buffers(R_mu(l)%vf(q)) + end do + end do +#else + do l = 1, num_dims + do q = 1, num_dims + R_mu(l)%vf(q)%sf(-buff_size:-1, :, :) = R_mu(l)%vf(q)%sf(m-buff_size+1:m, :, :) + R_mu(l)%vf(q)%sf(m+1:m+buff_size, :, :) = R_mu(l)%vf(q)%sf(0:buff_size-1, :, :) + + R_mu(l)%vf(q)%sf(:, -buff_size:-1, :) = R_mu(l)%vf(q)%sf(:, n-buff_size+1:n, :) + R_mu(l)%vf(q)%sf(:, n+1:n+buff_size, :) = R_mu(l)%vf(q)%sf(:, 0:buff_size-1, :) + + R_mu(l)%vf(q)%sf(:, :, -buff_size:-1) = R_mu(l)%vf(q)%sf(:, :, p-buff_size+1:p) + R_mu(l)%vf(q)%sf(:, :, p+1:p+buff_size) = R_mu(l)%vf(q)%sf(:, :, 0:buff_size-1) + end do + end do +#endif + + ! div(R_mu), using CD2 FD scheme + !$acc parallel loop collapse(3) gang vector default(present) copy(div_R_mu) + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + div_R_mu(l, i, j, k) = (R_mu(l)%vf(1)%sf(i+1, j, k) - R_mu(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (R_mu(l)%vf(2)%sf(i, j+1, k) - R_mu(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (R_mu(l)%vf(3)%sf(i, j, k+1) - R_mu(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) + end do + end do + end do + end do + + !$acc parallel loop collapse(3) gang vector default(present) copyin(div_R_mu) + do i = 0, m + do j = 0, n + do k = 0, p + mag_div_R_mu%sf(i, j, k) = sqrt(div_R_mu(1, i, j, k)**2 + div_R_mu(2, i, j, k)**2 + div_R_mu(3, i, j, k)**2) + end do + end do + end do + + end subroutine s_compute_R_mu + + subroutine s_compute_interphase_momentum_exchange_term(pImT_filtered, mag_F_IMET) + type(scalar_field), dimension(1:num_dims), intent(in) :: pImT_filtered + type(scalar_field), intent(inout) :: mag_F_IMET + + integer :: i, j, k, l, q, ii + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + mag_F_IMET%sf(i, j, k) = sqrt(pImT_filtered(1)%sf(i, j, k)**2 & + + pImT_filtered(2)%sf(i, j, k)**2 & + + pImT_filtered(3)%sf(i, j, k)**2) + end do + end do + end do + + end subroutine s_compute_interphase_momentum_exchange_term + + subroutine s_finalize_fftw_explicit_filter_module + @:DEALLOCATE(fluid_indicator_function_I%sf) + + @:DEALLOCATE(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) + @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) + @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) + +#if defined(MFC_OpenACC) + ierr = cufftDestroy(plan_x_fwd_gpu) + ierr = cufftDestroy(plan_x_bwd_gpu) + ierr = cufftDestroy(plan_y_gpu) + ierr = cufftDestroy(plan_z_gpu) +#else + call fftw_destroy_plan(plan_x_r2c_fwd) + call fftw_destroy_plan(plan_x_c2r_bwd) + call fftw_destroy_plan(plan_y_c2c_fwd) + call fftw_destroy_plan(plan_y_c2c_bwd) + call fftw_destroy_plan(plan_z_c2c_fwd) + call fftw_destroy_plan(plan_z_c2c_bwd) + call fftw_destroy_plan(plan_x_r2c_kernelG) + call fftw_destroy_plan(plan_y_c2c_kernelG) + call fftw_destroy_plan(plan_z_c2c_kernelG) +#endif + + end subroutine s_finalize_fftw_explicit_filter_module + +end module m_volume_filtering \ No newline at end of file diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 57dcbfbdf6..7f07744d8b 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -22,6 +22,8 @@ program p_main use m_nvtx + use m_volume_filtering + implicit none integer :: t_step !< Iterator for the time-stepping loop @@ -54,6 +56,10 @@ program p_main call s_initialize_gpu_vars() call nvtxEndRange + call s_initialize_fluid_indicator_function() + if (fourier_transform_filtering) call s_initialize_filtering_kernel() + if (fourier_transform_filtering) call s_initialize_filtered_fluid_indicator_function(q_cons_filtered(advxb)) + ! Setting the time-step iterator to the first time-step if (cfl_dt) then t_step = 0 diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index a2d2ca559b..e425e53b6d 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -61,6 +61,9 @@ def analytic(self): 'Bx0': ParamType.REAL, 'relativity': ParamType.LOG, 'cont_damage': ParamType.LOG, + 'periodic_ibs': ParamType.LOG, + 'store_levelset': ParamType.LOG, + 'slab_domain_decomposition': ParamType.LOG, } PRE_PROCESS = COMMON.copy() @@ -103,7 +106,7 @@ def analytic(self): 'bubbles_lagrange': ParamType.LOG, }) -for ib_id in range(1, 10+1): +for ib_id in range(1, 1000+1): for real_attr, ty in [("geometry", ParamType.INT), ("radius", ParamType.REAL), ("theta", ParamType.REAL), ("slip", ParamType.LOG), ("c", ParamType.REAL), ("p", ParamType.REAL), @@ -297,6 +300,13 @@ def analytic(self): 'tau_star': ParamType.REAL, 'cont_damage_s': ParamType.REAL, 'alpha_bar': ParamType.REAL, + 'compute_CD': ParamType.LOG, + 'mu_visc': ParamType.REAL, + 'u_inf_ref': ParamType.REAL, + 'rho_inf_ref': ParamType.REAL, + 'T_inf_ref': ParamType.REAL, + 'periodic_forcing': ParamType.LOG, + 'fourier_transform_filtering': ParamType.LOG, }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', diff --git a/voronoi/gen_voronoi_2D.py b/voronoi/gen_voronoi_2D.py new file mode 100644 index 0000000000..73beb4b8d7 --- /dev/null +++ b/voronoi/gen_voronoi_2D.py @@ -0,0 +1,99 @@ +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import freud + + +# lloyd relaxation +def compute_simplex_centroid(simplex_vertices): + v1 = simplex_vertices[:, :, 0] + v2 = simplex_vertices[:, :, 1] + v3 = simplex_vertices[:, :, 2] + + v1_mean = np.mean(v1, axis=1) + v2_mean = np.mean(v2, axis=1) + v3_mean = np.mean(v3, axis=1) + + simplex_centroids = np.array([v1_mean, v2_mean, v3_mean]) + + return simplex_centroids + +def compute_simplex_area(simplex_vertices): + v1 = simplex_vertices[:, :, 0] + v2 = simplex_vertices[:, :, 1] + v3 = simplex_vertices[:, :, 2] + + area = 0.5 * np.linalg.norm( np.cross(v2 - v1, v3 - v1), axis=1 ) + + return area + +def lloyd_relaxation_2d(initial_points, box, w=1.0, iterations=20): + points = initial_points + + for _ in range(iterations): + voro = freud.locality.Voronoi() + voro_data = voro.compute((box, initial_points)) + vertices = voro_data.polytopes + + for i in range(len(points)): + n = len(vertices[i]) + + simplex_vertices = np.array( [(points[i, :], vertices[i][j-1], vertices[i][j]) for j in range(n)] ) + + simplex_centroids = compute_simplex_centroid(simplex_vertices) + simplex_areas = compute_simplex_area(simplex_vertices) + + centroid = (1/np.sum(simplex_areas)) * (np.sum(simplex_centroids*simplex_areas, axis=1)) + + dist = centroid - points[i, :] + + points[i, :] += w * dist + + points = box.wrap(points) + + return points + +if (__name__ == '__main__'): + print('running 2D...') + + # setup + phi = 0.4 + D = 0.1 + L = 10*D + + N = int( 4*phi*L**2 / (np.pi*D**2) ) + print(f'volume fraction phi: {phi}, number of circles: {N}') + + x_i = L/2 * np.random.uniform(-1, 1, N) + y_i = L/2 * np.random.uniform(-1, 1, N) + z_i = L/2 * np.random.uniform(-1, 1, N) * 0 + + initial_points = np.stack((x_i, y_i, z_i), axis=1) + + box = freud.box.Box.square(L) + voro = freud.locality.Voronoi() + + cells = voro.compute((box, initial_points)).polytopes + + # plot initial distribution + plt.figure() + ax = plt.gca() + voro.plot(ax=ax, cmap='RdBu') + ax.scatter(initial_points[:, 0], initial_points[:, 1], s=5, c='k') + plt.show() + plt.close() + + # calculate relaxed points + relaxed_points = lloyd_relaxation_2d(initial_points, box, w=1.5, iterations=25) + voro.compute((box, relaxed_points)) + + # plot relaxed distribution + plt.figure() + ax = plt.gca() + voro.plot(ax=ax, cmap='RdBu') + ax.scatter(relaxed_points[:, 0], relaxed_points[:, 1], s=5, c='k') + plt.show() + plt.close() + + + diff --git a/voronoi/gen_voronoi_3D.py b/voronoi/gen_voronoi_3D.py new file mode 100644 index 0000000000..ce700acb5d --- /dev/null +++ b/voronoi/gen_voronoi_3D.py @@ -0,0 +1,98 @@ +import os +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import freud + + +# lloyd relaxation +def compute_tetrahedron_centroid(tetrahedron_vertices): + + return np.mean(tetrahedron_vertices, axis=0) + +def compute_tetrahedron_volume(tetrahedron_vertices): + v0, v1, v2, v3 = tetrahedron_vertices + matrix = np.vstack([v1 - v0, v2 - v0, v3 - v0]).T + volume = np.abs(np.linalg.det(matrix)) / 6 + + return volume + +def lloyd_relaxation_3d(initial_points, box, w=1, iterations=10): + points = initial_points + + for _ in range(iterations): + voro = freud.locality.Voronoi() + voro_data = voro.compute((box, points)) + vertices = voro_data.polytopes + + for i in range(len(points)): + n = len(vertices[i]) + + tetrahedra = [] + for j in range(n): + tetrahedra.append([points[i, :], vertices[i][j], vertices[i][(j+1) % n], vertices[i][(j+2) % n]]) + + centroids = np.array([compute_tetrahedron_centroid(t) for t in tetrahedra]) + volumes = np.array([compute_tetrahedron_volume(t) for t in tetrahedra]) + + weighted_centroid = np.sum(centroids * volumes[:, np.newaxis], axis=0) + total_volume = np.sum(volumes) + + if total_volume > 1.0e-12: + centroid = weighted_centroid / total_volume + dist = centroid - points[i, :] + + points[i, :] += w * dist + + points = box.wrap(points) + + return points + +if (__name__ == '__main__'): + print('running 3D...') + + # setup + phi = 0.05 + str_phi = '005' + + D = 0.1 + L = 10*D + + output_dir = '../examples/phi'+str_phi + if os.path.exists(output_dir) == False: + os.mkdir(output_dir) + + N_sphere = int( 6*phi*L**3 / (np.pi*D**3) ) + print(f'volume fraction phi: {phi}, number of spheres: {N_sphere}') + print(f'actual phi value: {N_sphere*4/3*np.pi*(D/2)**3/(L**3)}') + + x_i = L/2 * np.random.uniform(-1, 1, N_sphere) + y_i = L/2 * np.random.uniform(-1, 1, N_sphere) + z_i = L/2 * np.random.uniform(-1, 1, N_sphere) + + initial_points = np.stack((x_i, y_i, z_i), axis=1) + box = freud.box.Box.cube(L) + + relaxed_points = lloyd_relaxation_3d(initial_points, box, iterations=30) + print(np.shape(relaxed_points)) + + np.savetxt(output_dir+'/sphere_array_locations.txt', relaxed_points) + + # check no spheres are overlaping + for i in range(N_sphere): + for j in range(N_sphere): + if (i != j): + dist = np.sqrt((relaxed_points[i, 0] - relaxed_points[j, 0])**2 + (relaxed_points[i, 1] - relaxed_points[j, 1])**2 + (relaxed_points[i, 2] - relaxed_points[j, 2])**2) + if (dist <= 1.05*D): + print(f'spheres overlaping, dist={dist}, spheres #: {i}, {j}') + print(f'locations: ({relaxed_points[i, :]}), ({relaxed_points[j, :]})') + + fig = plt.figure(figsize=(10,5)) + ax1 = fig.add_subplot(121, projection='3d') + ax1.scatter(initial_points[:, 0], initial_points[:, 1], initial_points[:, 2], color='blue', s=10) + ax1.set_title('initial points') + ax2 = fig.add_subplot(122, projection='3d') + ax2.scatter(relaxed_points[:, 0], relaxed_points[:, 1], relaxed_points[:, 2], color='red', s=10) + ax2.set_title('relaxed points') + plt.show() + plt.close() From bd91e4f08b6eb5545498f84aae007d1e7dfd7b88 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Mon, 9 Jun 2025 21:41:46 -0600 Subject: [PATCH 02/30] statistics computation --- runs/3d_1sphere_filtering/case.py | 154 +++++++++++++ src/common/m_mpi_common.fpp | 34 ++- src/post_process/m_data_input.f90 | 279 ++++++++++++++++++++++- src/post_process/m_global_parameters.fpp | 15 ++ src/post_process/m_mpi_proxy.fpp | 2 +- src/post_process/m_start_up.f90 | 30 ++- src/simulation/m_compute_statistics.fpp | 130 +++++++++++ src/simulation/m_data_output.fpp | 34 ++- src/simulation/m_global_parameters.fpp | 12 + src/simulation/m_rhs.fpp | 12 +- src/simulation/m_start_up.fpp | 14 +- src/simulation/m_time_steppers.fpp | 158 ++++++++++++- src/simulation/m_volume_filtering.fpp | 32 +-- toolchain/mfc/run/case_dicts.py | 1 + 14 files changed, 867 insertions(+), 40 deletions(-) create mode 100644 runs/3d_1sphere_filtering/case.py create mode 100644 src/simulation/m_compute_statistics.fpp diff --git a/runs/3d_1sphere_filtering/case.py b/runs/3d_1sphere_filtering/case.py new file mode 100644 index 0000000000..fa38be1ff0 --- /dev/null +++ b/runs/3d_1sphere_filtering/case.py @@ -0,0 +1,154 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 31 +t_save = 1 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "q_filtered_wrt": "T", + "parallel_io": "T", + # Patch: Constant Tube filled with air + # Specify the cylindrical air tube grid geometry + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + "compute_CD": "F", + "fourier_transform_filtering": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + "mu_visc": mu, + + "store_levelset": "F", + "slab_domain_decomposition": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 25cd6fda5d..8214120fe7 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -153,7 +153,7 @@ contains !! @param levelset closest distance from every cell to the IB !! @param levelset_norm normalized vector from every cell to the closest point to the IB !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta) + subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta, R_u_stat, R_mu_stat, F_IMET_stat) type(scalar_field), & dimension(sys_size), & @@ -174,6 +174,10 @@ contains type(scalar_field), & intent(in), optional :: beta + type(scalar_field), dimension(2:4), intent(in), optional :: R_u_stat + type(scalar_field), dimension(2:4), intent(in), optional :: R_mu_stat + type(scalar_field), dimension(2:4), intent(in), optional :: F_IMET_stat + integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start @@ -187,6 +191,8 @@ contains if (present(beta)) then alt_sys = sys_size + 1 + else if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + alt_sys = sys_size + 9 else alt_sys = sys_size end if @@ -194,6 +200,18 @@ contains do i = 1, sys_size MPI_IO_DATA%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p) end do + + if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + do i = sys_size+1, sys_size+3 + MPI_IO_DATA%var(i)%sf => R_u_stat(i-sys_size+1)%sf(0:m, 0:n, 0:p) + end do + do i = sys_size+4, sys_size+6 + MPI_IO_DATA%var(i)%sf => R_mu_stat(i-sys_size-2)%sf(0:m, 0:n, 0:p) + end do + do i = sys_size+7, sys_size+9 + MPI_IO_DATA%var(i)%sf => F_IMET_stat(i-sys_size-5)%sf(0:m, 0:n, 0:p) + end do + end if if (present(beta)) then MPI_IO_DATA%var(alt_sys)%sf => beta%sf(0:m, 0:n, 0:p) @@ -1124,7 +1142,7 @@ contains buffer_counts = (/ & buff_size*1*(n + 1)*(p + 1), & buff_size*1*(m + 2*buff_size + 1)*(p + 1), & - buff_size*v_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + buff_size*1*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & /) buffer_count = buffer_counts(mpi_dir) @@ -1166,7 +1184,7 @@ contains do k = 0, n do j = 0, buff_size - 1 do i = 1, 1 - r = (i - 1) + v_size*(j + buff_size*(k + (n + 1)*l)) + r = (i - 1) + 1*(j + buff_size*(k + (n + 1)*l)) buff_send_scalarfield(r) = q_temp%sf(j + pack_offset, k, l) end do end do @@ -1178,7 +1196,7 @@ contains do l = 0, p do k = 0, buff_size - 1 do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & + r = (i - 1) + 1* & ((j + buff_size) + (m + 2*buff_size + 1)* & (k + buff_size*l)) buff_send_scalarfield(r) = q_temp%sf(j, k + pack_offset, l) @@ -1192,7 +1210,7 @@ contains do l = 0, buff_size - 1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & + r = (i - 1) + 1* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)*l)) buff_send_scalarfield(r) = q_temp%sf(j, k, l + pack_offset) @@ -1258,7 +1276,7 @@ contains do k = 0, n do j = -buff_size, -1 do i = 1, 1 - r = (i - 1) + v_size* & + r = (i - 1) + 1* & (j + buff_size*((k + 1) + (n + 1)*l)) q_temp%sf(j + unpack_offset, k, l) = buff_recv_scalarfield(r) #if defined(__INTEL_COMPILER) @@ -1277,7 +1295,7 @@ contains do l = 0, p do k = -buff_size, -1 do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & + r = (i - 1) + 1* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + buff_size*l)) q_temp%sf(j, k + unpack_offset, l) = buff_recv_scalarfield(r) @@ -1298,7 +1316,7 @@ contains do l = -buff_size, -1 do k = -buff_size, n + buff_size do j = -buff_size, m + buff_size - r = (i - 1) + v_size* & + r = (i - 1) + 1* & ((j + buff_size) + (m + 2*buff_size + 1)* & ((k + buff_size) + (n + 2*buff_size + 1)* & (l + buff_size))) diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 5385ef7619..69b13707b1 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -29,6 +29,7 @@ module m_data_input s_read_parallel_data_files, & s_populate_grid_variables_buffer_regions, & s_populate_conservative_variables_buffer_regions, & + s_populate_filtered_variables_buffer_regions, & s_finalize_data_input_module abstract interface @@ -60,6 +61,10 @@ end subroutine s_read_abstract_data_files ! type(scalar_field), public :: ib_markers !< type(integer_field), public :: ib_markers + type(scalar_field), allocatable, dimension(:), public :: R_u_stat + type(scalar_field), allocatable, dimension(:), public :: R_mu_stat + type(scalar_field), allocatable, dimension(:), public :: F_IMET_stat + procedure(s_read_abstract_data_files), pointer :: s_read_data_files => null() contains @@ -296,6 +301,8 @@ subroutine s_read_parallel_data_files(t_step) if (bubbles_lagrange) then alt_sys = sys_size + 1 + else if (q_filtered_wrt) then + alt_sys = sys_size + 9 else alt_sys = sys_size end if @@ -454,7 +461,14 @@ subroutine s_read_parallel_data_files(t_step) ! Initialize MPI data I/O if (ib) then - call s_initialize_mpi_data(q_cons_vf, ib_markers) + if (q_filtered_wrt) then + call s_initialize_mpi_data(q_cons_vf, ib_markers, & + R_u_stat=R_u_stat, & + R_mu_stat=R_mu_stat, & + F_IMET_stat=F_IMET_stat) + else + call s_initialize_mpi_data(q_cons_vf, ib_markers) + end if elseif (bubbles_lagrange) then call s_initialize_mpi_data(q_cons_vf, beta=q_particle(1)) else @@ -481,6 +495,18 @@ subroutine s_read_parallel_data_files(t_step) ! Initial displacement to skip at beginning of file disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & + mpi_p, status, ierr) + end do + else if (q_filtered_wrt) then + do i = 1, alt_sys + var_MOK = int(i, MPI_OFFSET_KIND) + + ! Initial displacement to skip at beginning of file + disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & 'native', mpi_info_int, ierr) call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & @@ -1302,6 +1328,224 @@ subroutine s_populate_conservative_variables_buffer_regions(q_particle) end subroutine s_populate_conservative_variables_buffer_regions + subroutine s_populate_filtered_variables_buffer_regions(q_particle) + + type(scalar_field), intent(inout), optional :: q_particle + + integer :: i, j, k !< Generic loop iterators + + ! Populating Buffer Regions in the x-direction + + ! Periodic BC at the beginning + if (bc_x%beg == BC_PERIODIC) then + + do j = 1, buff_size + if (present(q_particle)) then + q_particle%sf(-j, 0:n, 0:p) = & + q_particle%sf((m + 1) - j, 0:n, 0:p) + else + do i = 2, 4 + R_u_stat(i)%sf(-j, 0:n, 0:p) = & + R_u_stat(i)%sf((m + 1) - j, 0:n, 0:p) + R_mu_stat(i)%sf(-j, 0:n, 0:p) = & + R_mu_stat(i)%sf((m + 1) - j, 0:n, 0:p) + F_IMET_stat(i)%sf(-j, 0:n, 0:p) = & + F_IMET_stat(i)%sf((m + 1) - j, 0:n, 0:p) + end do + end if + end do + + ! Processor BC at the beginning + else + if (present(q_particle)) then + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'beg', 'x', q_particle) + else + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'beg', 'x') + end if + + end if + + ! Perodic BC at the end + if (bc_x%end == BC_PERIODIC) then + + do j = 1, buff_size + if (present(q_particle)) then + q_particle%sf(m + j, 0:n, 0:p) = & + q_particle%sf(j - 1, 0:n, 0:p) + else + do i = 2, 4 + R_u_stat(i)%sf(m + j, 0:n, 0:p) = & + R_u_stat(i)%sf(j - 1, 0:n, 0:p) + R_mu_stat(i)%sf(m + j, 0:n, 0:p) = & + R_mu_stat(i)%sf(j - 1, 0:n, 0:p) + F_IMET_stat(i)%sf(m + j, 0:n, 0:p) = & + F_IMET_stat(i)%sf(j - 1, 0:n, 0:p) + end do + end if + end do + + ! Processor BC at the end + else + + if (present(q_particle)) then + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'end', 'x', q_particle) + else + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'end', 'x') + end if + + end if + + ! END: Populating Buffer Regions in the x-direction + + ! Populating Buffer Regions in the y-direction + + if (n > 0) then + + ! Periodic BC at the beginning + if (bc_y%beg == BC_PERIODIC) then + + do j = 1, buff_size + if (present(q_particle)) then + q_particle%sf(:, -j, 0:p) = & + q_particle%sf(:, (n + 1) - j, 0:p) + else + do i = 2, 4 + R_u_stat(i)%sf(:, -j, 0:p) = & + R_u_stat(i)%sf(:, (n + 1) - j, 0:p) + R_mu_stat(i)%sf(:, -j, 0:p) = & + R_mu_stat(i)%sf(:, (n + 1) - j, 0:p) + F_IMET_stat(i)%sf(:, -j, 0:p) = & + F_IMET_stat(i)%sf(:, (n + 1) - j, 0:p) + end do + end if + end do + + ! Processor BC at the beginning + else + if (present(q_particle)) then + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'beg', 'y', q_particle) + else + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'beg', 'y') + end if + + end if + + ! Perodic BC at the end + if (bc_y%end == BC_PERIODIC) then + + do j = 1, buff_size + if (present(q_particle)) then + q_particle%sf(:, n + j, 0:p) = & + q_particle%sf(:, j - 1, 0:p) + else + do i = 2, 4 + R_u_stat(i)%sf(:, n + j, 0:p) = & + R_u_stat(i)%sf(:, j - 1, 0:p) + R_mu_stat(i)%sf(:, n + j, 0:p) = & + R_mu_stat(i)%sf(:, j - 1, 0:p) + F_IMET_stat(i)%sf(:, n + j, 0:p) = & + F_IMET_stat(i)%sf(:, j - 1, 0:p) + end do + end if + end do + + ! Processor BC at the end + else + + if (present(q_particle)) then + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'end', 'y', q_particle) + else + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'end', 'y') + end if + + end if + + ! END: Populating Buffer Regions in the y-direction + + ! Populating Buffer Regions in the z-direction + + if (p > 0) then + + ! Periodic BC at the beginning + if (bc_z%beg == BC_PERIODIC) then + + do j = 1, buff_size + if (present(q_particle)) then + q_particle%sf(:, :, -j) = & + q_particle%sf(:, :, (p + 1) - j) + else + do i = 2, 4 + R_u_stat(i)%sf(:, :, -j) = & + R_u_stat(i)%sf(:, :, (p + 1) - j) + R_mu_stat(i)%sf(:, :, -j) = & + R_mu_stat(i)%sf(:, :, (p + 1) - j) + F_IMET_stat(i)%sf(:, :, -j) = & + F_IMET_stat(i)%sf(:, :, (p + 1) - j) + end do + end if + end do + + ! Processor BC at the beginning + else + + if (present(q_particle)) then + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'beg', 'z', q_particle) + else + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'beg', 'z') + end if + + end if + + ! Perodic BC at the end + if (bc_z%end == BC_PERIODIC) then + + do j = 1, buff_size + if (present(q_particle)) then + q_particle%sf(:, :, p + j) = & + q_particle%sf(:, :, j - 1) + else + do i = 2, 4 + R_u_stat(i)%sf(:, :, p + j) = & + R_u_stat(i)%sf(:, :, j - 1) + R_mu_stat(i)%sf(:, :, p + j) = & + R_mu_stat(i)%sf(:, :, j - 1) + F_IMET_stat(i)%sf(:, :, p + j) = & + F_IMET_stat(i)%sf(:, :, j - 1) + end do + end if + end do + + ! Processor BC at the end + else + + if (present(q_particle)) then + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'end', 'z', q_particle) + else + call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & + 'end', 'z') + end if + + end if + + end if + + end if + + ! END: Populating Buffer Regions in the z-direction + + end subroutine s_populate_filtered_variables_buffer_regions + !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module subroutine s_initialize_data_input_module @@ -1315,6 +1559,10 @@ subroutine s_initialize_data_input_module allocate (q_prim_vf(1:sys_size)) if (bubbles_lagrange) allocate (q_particle(1)) + if (q_filtered_wrt) allocate (R_u_stat(2:4)) + if (q_filtered_wrt) allocate (R_mu_stat(2:4)) + if (q_filtered_wrt) allocate (F_IMET_stat(2:4)) + ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the ! simulation @@ -1352,6 +1600,20 @@ subroutine s_initialize_data_input_module -buff_size:p + buff_size)) end if + if (q_filtered_wrt) then + do i = 2, 4 + allocate (R_u_stat(i)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + allocate (R_mu_stat(i)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + allocate (F_IMET_stat(i)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + end do + end if + ! Simulation is 2D else @@ -1444,6 +1706,21 @@ subroutine s_finalize_data_input_module deallocate (q_T_sf%sf) end if + if (q_filtered_wrt) then + do i = 2, 4 + deallocate (R_u_stat(i)%sf) + end do + deallocate(R_u_stat) + do i = 2, 4 + deallocate (R_mu_stat(i)%sf) + end do + deallocate(R_mu_stat) + do i = 2, 4 + deallocate (F_IMET_stat(i)%sf) + end do + deallocate(F_IMET_stat) + end if + s_read_data_files => null() end subroutine s_finalize_data_input_module diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 9db5321c55..ec6a3ca3f9 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -322,6 +322,7 @@ module m_global_parameters logical :: periodic_ibs logical :: store_levelset logical :: slab_domain_decomposition + logical :: q_filtered_wrt contains @@ -467,6 +468,7 @@ contains periodic_ibs = .false. store_levelset = .true. slab_domain_decomposition = .false. + q_filtered_wrt = .false. end subroutine s_assign_default_values_to_user_inputs @@ -778,6 +780,13 @@ contains allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do + else if (q_filtered_wrt) then + allocate (MPI_IO_DATA%view(1:sys_size+9)) + allocate (MPI_IO_DATA%var(1:sys_size+9)) + do i = 1, sys_size+9 + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do else allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) @@ -964,6 +973,12 @@ contains if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null() + if (q_filtered_wrt) then + do i = sys_size+1, sys_size+9 + MPI_IO_DATA%var(i)%sf => null() + end do + end if + deallocate (MPI_IO_DATA%var) deallocate (MPI_IO_DATA%view) end if diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 9e368d7fa4..357bb326a7 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -172,7 +172,7 @@ contains & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & & 'rkck_adap_dt', 'output_partial_domain', 'relativity', & & 'cont_damage', 'periodic_ibs', 'store_levelset', & - & 'slab_domain_decomposition' ] + & 'slab_domain_decomposition', 'q_filtered_wrt' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index f04efd75e6..8a61c72e07 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -85,7 +85,8 @@ subroutine s_read_input_file cfl_adap_dt, cfl_const_dt, t_save, t_stop, n_start, & cfl_target, surface_tension, bubbles_lagrange, rkck_adap_dt, & sim_data, hyperelasticity, Bx0, relativity, cont_damage, & - periodic_ibs, store_levelset, slab_domain_decomposition + periodic_ibs, store_levelset, slab_domain_decomposition, & + q_filtered_wrt ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -179,6 +180,7 @@ subroutine s_perform_time_step(t_step) ! Populating the buffer regions of the conservative variables if (buff_size > 0) then call s_populate_conservative_variables_buffer_regions() + if (q_filtered_wrt) call s_populate_filtered_variables_buffer_regions() if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) end if @@ -323,6 +325,32 @@ subroutine s_save_data(t_step, varname, pres, c, H) end if end do + ! Adding filtered quantities + if (q_filtered_wrt) then + ! filtered cons vars + do i = 2, 4 + q_sf = R_u_stat(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'R_u_stats', i + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + do i = 2, 4 + q_sf = R_mu_stat(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'R_mu_stats', i + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + do i = 2, 4 + q_sf = F_IMET_stat(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'F_IMET_stats', i + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + end if + ! Adding the species' concentrations to the formatted database file if (chemistry) then do i = 1, num_species diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp new file mode 100644 index 0000000000..bcd6732cf2 --- /dev/null +++ b/src/simulation/m_compute_statistics.fpp @@ -0,0 +1,130 @@ +#:include 'macros.fpp' + +module m_compute_statistics + use m_derived_types + + use m_global_parameters + + use m_mpi_proxy + + implicit none + + private; public :: s_initialize_statistics_module, s_finalize_statistics_module, s_compute_s_order_statistics + + type(scalar_field), allocatable, dimension(:) :: xnbar_stat + + type(scalar_field), allocatable, dimension(:) :: delta_stat + + type(vector_field), allocatable, dimension(:) :: Msn_stat + + !$acc declare create(xnbar_stat, delta_stat, Msn_stat) + +contains + + subroutine s_initialize_statistics_module + integer :: i, j + @:ALLOCATE(xnbar_stat(1:3)) + do i = 1, 3 + @:ALLOCATE(xnbar_stat(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(xnbar_stat(i)) + end do + + @:ALLOCATE(delta_stat(1:3)) + do i = 1, 3 + @:ALLOCATE(delta_stat(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(delta_stat(i)) + end do + + @:ALLOCATE(Msn_stat(1:num_dims)) + do i = 1, 3 + @:ALLOCATE(Msn_stat(i)%vf(2:4)) + end do + do i = 1, 3 + do j = 2, 4 + @:ALLOCATE(Msn_stat(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_stat(i)) + end do + + end subroutine s_initialize_statistics_module + + subroutine s_compute_s_order_statistics(q_temp, n_step, s_order_stat, id) + type(scalar_field), intent(in) :: q_temp + integer, intent(in) :: n_step + type(scalar_field), dimension(2:4), intent(inout) :: s_order_stat + integer, intent(in) :: id + real(wp) :: ns + integer :: i, j, k, ii + + ns = real(n_step, wp) + + if (n_step == 1) then + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + xnbar_stat(id)%sf(i, j, k) = q_temp%sf(i, j, k) + Msn_stat(id)%vf(2)%sf(i, j, k) = 0.0_wp + Msn_stat(id)%vf(3)%sf(i, j, k) = 0.0_wp + Msn_stat(id)%vf(4)%sf(i, j, k) = 0.0_wp + s_order_stat(2)%sf(i, j, k) = 0.0_wp + s_order_stat(3)%sf(i, j, k) = 0.0_wp + s_order_stat(4)%sf(i, j, k) = 0.0_wp + end do + end do + end do + else + !$acc parallel loop collapse(3) gang vector default(present) copyin(ns) + do i = 0, m + do j = 0, n + do k = 0, p + delta_stat(id)%sf(i, j, k) = q_temp%sf(i, j, k) - xnbar_stat(id)%sf(i, j, k) + + xnbar_stat(id)%sf(i, j, k) = xnbar_stat(id)%sf(i, j, k) + delta_stat(id)%sf(i, j, k)/ns + + Msn_stat(id)%vf(4)%sf(i, j, k) = Msn_stat(id)%vf(4)%sf(i, j, k) & + + (delta_stat(id)%sf(i, j, k)**4)*(ns - 1.0_wp)*(ns**2 - 3.0_wp*ns + 3.0_wp)/(ns**3) & + + 6.0_wp*(delta_stat(id)%sf(i, j, k)**2)*Msn_stat(id)%vf(2)%sf(i, j, k)/(ns**2) & + - 4.0_wp*delta_stat(id)%sf(i, j, k)*Msn_stat(id)%vf(3)%sf(i, j, k)/ns + + Msn_stat(id)%vf(3)%sf(i, j, k) = Msn_stat(id)%vf(3)%sf(i, j, k) & + + (delta_stat(id)%sf(i, j, k)**3)*(ns - 1.0_wp)*(ns - 2.0_wp)/(ns**2) & + - 3.0_wp*delta_stat(id)%sf(i, j, k)*Msn_stat(id)%vf(2)%sf(i, j, k)/ns + + Msn_stat(id)%vf(2)%sf(i, j, k) = Msn_stat(id)%vf(2)%sf(i, j, k) & + + (delta_stat(id)%sf(i, j, k)**2)*(ns - 1.0_wp)/ns + + s_order_stat(2)%sf(i, j, k) = Msn_stat(id)%vf(2)%sf(i, j, k)/(ns - 1.0_wp) + + s_order_stat(3)%sf(i, j, k) = sqrt(ns)*Msn_stat(id)%vf(3)%sf(i, j, k)/(Msn_stat(id)%vf(2)%sf(i, j, k)**1.5_wp) + + s_order_stat(4)%sf(i, j, k) = ns*Msn_stat(id)%vf(4)%sf(i, j, k)/(Msn_stat(id)%vf(2)%sf(i, j, k)**2) - 3.0_wp + end do + end do + end do + end if + + end subroutine s_compute_s_order_statistics + + subroutine s_finalize_statistics_module + integer :: i, j + do i = 1, 3 + @:DEALLOCATE(xnbar_stat(i)%sf) + end do + @:DEALLOCATE(xnbar_stat) + + do i = 1, 3 + @:DEALLOCATE(delta_stat(i)%sf) + end do + @:DEALLOCATE(delta_stat) + + do i = 1, 3 + do j = 2, 4 + @:DEALLOCATE(Msn_stat(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_stat(i)%vf) + end do + @:DEALLOCATE(Msn_stat) + end subroutine s_finalize_statistics_module + +end module m_compute_statistics \ No newline at end of file diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 56b21207ff..63b8fa2b32 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -76,7 +76,7 @@ contains !! @param q_cons_vf Conservative variables !! @param q_prim_vf Primitive variables !! @param t_step Current time step - subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) + subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta, R_u_stat, R_mu_stat, F_IMET_stat) type(scalar_field), & dimension(sys_size), & @@ -94,10 +94,14 @@ contains type(scalar_field), & intent(inout), optional :: beta + type(scalar_field), dimension(2:4), intent(inout), optional :: R_u_stat + type(scalar_field), dimension(2:4), intent(inout), optional :: R_mu_stat + type(scalar_field), dimension(2:4), intent(inout), optional :: F_IMET_stat + if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) else - call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta) + call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, R_u_stat, R_mu_stat, F_IMET_stat) end if end subroutine s_write_data_files @@ -786,12 +790,15 @@ contains !! @param q_prim_vf Cell-average primitive variables !! @param t_step Current time-step !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta) + subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, R_u_stat, R_mu_stat, F_IMET_stat) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta + type(scalar_field), dimension(2:4), intent(inout), optional :: R_u_stat + type(scalar_field), dimension(2:4), intent(inout), optional :: R_mu_stat + type(scalar_field), dimension(2:4), intent(inout), optional :: F_IMET_stat #ifdef MFC_MPI @@ -813,6 +820,8 @@ contains if (present(beta)) then alt_sys = sys_size + 1 + else if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + alt_sys = sys_size + 9 else alt_sys = sys_size end if @@ -896,7 +905,12 @@ contains ! Initialize MPI data I/O if (ib) then - call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) + if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, & + R_u_stat=R_u_stat, R_mu_stat=R_mu_stat, F_IMET_stat=F_IMET_stat) + else + call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) + end if elseif (present(beta)) then call s_initialize_mpi_data(q_cons_vf, beta=beta) else @@ -951,6 +965,18 @@ contains mpi_p, status, ierr) end do end if + else if (fourier_transform_filtering) then + do i = 1, alt_sys + var_MOK = int(i, MPI_OFFSET_KIND) + + ! Initial displacement to skip at beginning of file + disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) + + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & + 'native', mpi_info_int, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & + mpi_p, status, ierr) + end do else do i = 1, sys_size !TODO: check if correct (sys_size var_MOK = int(i, MPI_OFFSET_KIND) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 0ae8d7763e..0158af546f 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -1153,6 +1153,9 @@ contains elseif (bubbles_lagrange) then allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) + else if (fourier_transform_filtering) then + allocate (MPI_IO_DATA%view(1:sys_size+9)) + allocate (MPI_IO_DATA%var(1:sys_size+9)) else allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) @@ -1172,6 +1175,11 @@ contains allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do + else if (fourier_transform_filtering) then + do i = sys_size+1, sys_size+9 + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do end if ! Configuring the WENO average flag that will be used to regulate @@ -1344,6 +1352,10 @@ contains do i = 1, sys_size + 1 MPI_IO_DATA%var(i)%sf => null() end do + else if (fourier_transform_filtering) then + do i = 1, sys_size+9 + MPI_IO_DATA%var(i)%sf => null() + end do else do i = 1, sys_size MPI_IO_DATA%var(i)%sf => null() diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 626aed96ce..884e6a07ad 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -999,7 +999,7 @@ contains end do ! particle forces loop, x-dir - if (compute_CD .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n @@ -1128,7 +1128,7 @@ contains end do ! particle forces loop, y-dir - if (compute_CD .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n @@ -1353,7 +1353,7 @@ contains end if ! particle forces loop, z-dir - if (compute_CD .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n @@ -1596,7 +1596,7 @@ contains end do ! particle momentum exchange, viscous stress tensor, x-dir - if (compute_CD .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n @@ -1695,7 +1695,7 @@ contains end if ! particle momentum exchange, viscous stress tensor, y-dir - if (compute_CD .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n @@ -1797,7 +1797,7 @@ contains end do ! particle momentum exchange, viscous stress tensor, z-dir - if (compute_CD .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index d2e9e344a3..4c3fd33495 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1420,6 +1420,11 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") + do i = 2, 4 + !$acc update host(R_u_stat(i)%sf) + !$acc update host(R_mu_stat(i)%sf) + !$acc update host(F_IMET_stat(i)%sf) + end do do i = 1, sys_size !$acc update host(q_cons_ts(1)%vf(i)%sf) do l = 0, p @@ -1452,7 +1457,12 @@ contains call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else - call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count) + if (fourier_transform_filtering) then + call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, & + R_u_stat=R_u_stat, R_mu_stat=R_mu_stat, F_IMET_stat=F_IMET_stat) + else + call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count) + end if end if call nvtxEndRange @@ -1571,6 +1581,8 @@ contains call s_initialize_additional_forcing_module() if (fourier_transform_filtering) call s_initialize_fftw_explicit_filter_module() + call s_initialize_statistics_module() + end subroutine s_initialize_modules subroutine s_initialize_mpi_domain diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 8291e2d9e7..343a542e4b 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -52,6 +52,8 @@ module m_time_steppers use m_volume_filtering + use m_compute_statistics + implicit none type(vector_field), allocatable, dimension(:) :: q_cons_ts !< @@ -89,8 +91,22 @@ module m_time_steppers type(scalar_field), allocatable, dimension(:) :: q_cons_filtered + type(vector_field), allocatable, dimension(:) :: pt_Re_stress + type(vector_field), allocatable, dimension(:) :: R_mu + type(scalar_field), allocatable, dimension(:) :: pres_visc_stress_filtered + + type(scalar_field) :: mag_div_Ru + type(scalar_field) :: mag_div_R_mu + type(scalar_field) :: mag_F_IMET + + type(scalar_field), allocatable, dimension(:) :: R_u_stat + type(scalar_field), allocatable, dimension(:) :: R_mu_stat + type(scalar_field), allocatable, dimension(:) :: F_IMET_stat + !$acc declare create(q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, rhs_ts_rkck, q_prim_ts, rhs_mv, rhs_pb, max_dt) - !$acc declare create(div_pres_visc_stress) + !$acc declare create(div_pres_visc_stress, q_cons_filtered, pt_Re_stress, R_mu, pres_visc_stress_filtered) + !$acc declare create(mag_div_Ru, mag_div_R_mu, mag_F_IMET) + !$acc declare create(R_u_stat, R_mu_stat, F_IMET_stat) contains @@ -366,7 +382,7 @@ contains end do end do - if (compute_CD) then + if (compute_CD .or. fourier_transform_filtering) then @:ALLOCATE(div_pres_visc_stress(momxb:momxe)) do i = momxb, momxe @:ALLOCATE(div_pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & @@ -384,8 +400,73 @@ contains idwbuff(3)%beg:idwbuff(3)%end)) @:ACC_SETUP_SFs(q_cons_filtered(i)) end do + + @:ALLOCATE(pt_Re_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(pt_Re_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(pt_Re_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(pt_Re_stress(i)) + end do + + @:ALLOCATE(R_mu(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(R_mu(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(R_mu(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(R_mu(i)) + end do + + @:ALLOCATE(pres_visc_stress_filtered(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(pres_visc_stress_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(pres_visc_stress_filtered(i)) + end do + + @:ALLOCATE(mag_div_Ru%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(mag_div_Ru) + + @:ALLOCATE(mag_div_R_mu%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(mag_div_R_mu) + + @:ALLOCATE(mag_F_IMET%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(mag_F_IMET) end if + @:ALLOCATE(R_u_stat(2:4)) + do i = 2, 4 + @:ALLOCATE(R_u_stat(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(R_u_stat(i)) + end do + @:ALLOCATE(R_mu_stat(2:4)) + do i = 2, 4 + @:ALLOCATE(R_mu_stat(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(R_mu_stat(i)) + end do + @:ALLOCATE(F_IMET_stat(2:4)) + do i = 2, 4 + @:ALLOCATE(F_IMET_stat(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(F_IMET_stat(i)) + end do + end subroutine s_initialize_time_steppers_module !> 1st order TVD RK time-stepping algorithm @@ -694,6 +775,8 @@ contains real(wp) :: start, finish + integer :: n_step + ! Stage 1 of 3 if (.not. adap_dt) then @@ -708,6 +791,30 @@ contains call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, div_pres_visc_stress) + if (fourier_transform_filtering) then + call s_apply_fftw_filter_cons(q_cons_ts(1)%vf, q_cons_filtered) + call s_setup_terms_filtering(q_cons_ts(1)%vf, pt_Re_stress, R_mu) + call s_apply_fftw_filter_tensor(pt_Re_stress, R_mu, q_cons_filtered, div_pres_visc_stress, pres_visc_stress_filtered) + call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, pt_Re_stress, mag_div_Ru) + call s_compute_R_mu(q_cons_filtered, R_mu, mag_div_R_mu) + call s_compute_interphase_momentum_exchange_term(pres_visc_stress_filtered, mag_F_IMET) + end if + + if (t_step > 5) then + n_step = t_step - 5 + print *, n_step + call s_compute_s_order_statistics(mag_div_Ru, n_step, R_u_stat, 1) + !call s_compute_s_order_statistics(mag_div_R_mu, n_step, R_mu_stat, 2) + !call s_compute_s_order_statistics(mag_F_IMET, n_step, F_IMET_stat, 3) + end if + + + ! R_u_stat(2)%sf(0:m, 0:n, 0:p) = q_cons_filtered(6)%sf(0:m, 0:n, 0:p) + ! R_u_stat(3)%sf(0:m, 0:n, 0:p) = mag_div_Ru%sf(0:m, 0:n, 0:p) + ! R_u_stat(4)%sf(0:m, 0:n, 0:p) = mag_div_R_mu%sf(0:m, 0:n, 0:p) + ! R_mu_stat(2)%sf(0:m, 0:n, 0:p) = mag_F_IMET%sf(0:m, 0:n, 0:p) + + if (compute_CD) then call s_compute_drag_coefficient(div_pres_visc_stress) end if @@ -1380,13 +1487,58 @@ contains @:DEALLOCATE(rhs_vf) end if - if (compute_CD) then + if (compute_CD .or. fourier_transform_filtering) then do i = momxb, momxe @:DEALLOCATE(div_pres_visc_stress(i)%sf) end do @:DEALLOCATE(div_pres_visc_stress) end if + if (fourier_transform_filtering) then + do i = 1, sys_size + @:DEALLOCATE(q_cons_filtered(i)%sf) + end do + @:DEALLOCATE(q_cons_filtered) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(pt_Re_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(pt_Re_stress(i)%vf) + end do + @:DEALLOCATE(pt_Re_stress) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(R_mu(i)%vf(j)%sf) + end do + @:DEALLOCATE(R_mu(i)%vf) + end do + @:DEALLOCATE(R_mu) + + do i = 1, num_dims + @:DEALLOCATE(pres_visc_stress_filtered(i)%sf) + end do + @:DEALLOCATE(pres_visc_stress_filtered) + + @:DEALLOCATE(mag_div_Ru%sf) + @:DEALLOCATE(mag_div_R_mu%sf) + @:DEALLOCATE(mag_F_IMET%sf) + end if + + do i = 2, 4 + @:DEALLOCATE(R_u_stat(i)%sf) + end do + @:DEALLOCATE(R_u_stat) + do i = 2, 4 + @:DEALLOCATE(R_mu_stat(i)%sf) + end do + @:DEALLOCATE(R_mu_stat) + do i = 2, 4 + @:DEALLOCATE(F_IMET_stat(i)%sf) + end do + @:DEALLOCATE(F_IMET_stat) + ! Writing the footer of and closing the run-time information file if (proc_rank == 0 .and. run_time_info) then call s_close_run_time_information_file() diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 6caffaa4c5..adb102df35 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -12,6 +12,8 @@ module m_volume_filtering use m_ibm + use m_boundary_common + #ifdef MFC_MPI use mpi !< Message passing interface (MPI) module #endif @@ -23,11 +25,11 @@ module m_volume_filtering implicit none private; public :: s_initialize_fftw_explicit_filter_module, & - s_apply_fftw_filter_cons, & - s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & - s_initialize_filtered_fluid_indicator_function, & + s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, s_initialize_filtered_fluid_indicator_function, & s_finalize_fftw_explicit_filter_module, & - s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield + s_apply_fftw_filter_cons, s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield, & + s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & + s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_R_mu, s_compute_interphase_momentum_exchange_term #if !defined(MFC_OpenACC) include 'fftw3.f03' @@ -460,12 +462,12 @@ contains end subroutine s_apply_fftw_filter_scalarfield !< apply the gaussian filter to the requisite tensors to compute unclosed terms of interest - subroutine s_apply_fftw_filter_tensor(pt_Re_stress, R_mu, q_cons_filtered, rhs_rhouu, pImT_filtered) + subroutine s_apply_fftw_filter_tensor(pt_Re_stress, R_mu, q_cons_filtered, div_pres_visc_stress, pres_visc_stress_filtered) type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered - type(scalar_field), dimension(momxb:momxe), intent(inout) :: rhs_rhouu - type(scalar_field), dimension(1:num_dims), intent(inout) :: pImT_filtered + type(scalar_field), dimension(momxb:momxe), intent(inout) :: div_pres_visc_stress + type(scalar_field), dimension(1:num_dims), intent(inout) :: pres_visc_stress_filtered integer :: i, j, k, l, q @@ -485,7 +487,7 @@ contains ! interphase momentum exchange do l = 1, num_dims - call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .false., rhs_rhouu(momxb-1+l), pImT_filtered(l)) + call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .false., div_pres_visc_stress(momxb-1+l), pres_visc_stress_filtered(l)) end do end subroutine s_apply_fftw_filter_tensor @@ -983,8 +985,8 @@ contains !$acc loop seq do l = 1, num_dims div_R_mu(l, i, j, k) = (R_mu(l)%vf(1)%sf(i+1, j, k) - R_mu(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (R_mu(l)%vf(2)%sf(i, j+1, k) - R_mu(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (R_mu(l)%vf(3)%sf(i, j, k+1) - R_mu(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) + + (R_mu(l)%vf(2)%sf(i, j+1, k) - R_mu(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (R_mu(l)%vf(3)%sf(i, j, k+1) - R_mu(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) end do end do end do @@ -1001,8 +1003,8 @@ contains end subroutine s_compute_R_mu - subroutine s_compute_interphase_momentum_exchange_term(pImT_filtered, mag_F_IMET) - type(scalar_field), dimension(1:num_dims), intent(in) :: pImT_filtered + subroutine s_compute_interphase_momentum_exchange_term(pres_visc_stress_filtered, mag_F_IMET) + type(scalar_field), dimension(1:num_dims), intent(in) :: pres_visc_stress_filtered type(scalar_field), intent(inout) :: mag_F_IMET integer :: i, j, k, l, q, ii @@ -1011,9 +1013,9 @@ contains do i = 0, m do j = 0, n do k = 0, p - mag_F_IMET%sf(i, j, k) = sqrt(pImT_filtered(1)%sf(i, j, k)**2 & - + pImT_filtered(2)%sf(i, j, k)**2 & - + pImT_filtered(3)%sf(i, j, k)**2) + mag_F_IMET%sf(i, j, k) = sqrt(pres_visc_stress_filtered(1)%sf(i, j, k)**2 & + + pres_visc_stress_filtered(2)%sf(i, j, k)**2 & + + pres_visc_stress_filtered(3)%sf(i, j, k)**2) end do end do end do diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index e425e53b6d..b3a26a65b5 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -446,6 +446,7 @@ def analytic(self): 'surface_tension': ParamType.LOG, 'output_partial_domain': ParamType.LOG, 'bubbles_lagrange': ParamType.LOG, + 'q_filtered_wrt': ParamType.LOG, }) for cmp_id in range(1,3+1): From 3b1572631045452a5c247863ef8a4e8d727ccc58 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Tue, 24 Jun 2025 22:22:02 -0600 Subject: [PATCH 03/30] autocorrelation function calc --- runs/3d_1sphere_filtering/case.py | 1 + runs/phi01/case.py | 160 ++++++++++++++++++++++++ src/simulation/m_additional_forcing.fpp | 3 +- src/simulation/m_compute_statistics.fpp | 48 ++++++- src/simulation/m_global_parameters.fpp | 2 + src/simulation/m_mpi_proxy.fpp | 2 +- src/simulation/m_start_up.fpp | 2 +- src/simulation/m_time_steppers.fpp | 11 +- toolchain/mfc/run/case_dicts.py | 1 + voronoi/gen_voronoi_3D.py | 6 +- 10 files changed, 224 insertions(+), 12 deletions(-) create mode 100644 runs/phi01/case.py diff --git a/runs/3d_1sphere_filtering/case.py b/runs/3d_1sphere_filtering/case.py index fa38be1ff0..9a8c4b1f4c 100644 --- a/runs/3d_1sphere_filtering/case.py +++ b/runs/3d_1sphere_filtering/case.py @@ -147,6 +147,7 @@ "store_levelset": "F", "slab_domain_decomposition": "T", + "compute_autocorrelation": "T", } case_dict.update(ib_dict) diff --git a/runs/phi01/case.py b/runs/phi01/case.py new file mode 100644 index 0000000000..56390a1943 --- /dev/null +++ b/runs/phi01/case.py @@ -0,0 +1,160 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 10 +t_save = 1 + +Nx = 99 +Ny = 99 +Nz = 99 + +# load initial sphere locations +sphere_loc = np.loadtxt('sphere_array_locations.txt') +N_sphere = len(sphere_loc) + +# immersed boundary dictionary +ib_dict = {} +for i in range(N_sphere): + ib_dict.update({ + f"patch_ib({i+1})%geometry": 8, + f"patch_ib({i+1})%x_centroid": sphere_loc[i, 0], + f"patch_ib({i+1})%y_centroid": sphere_loc[i, 1], + f"patch_ib({i+1})%z_centroid": sphere_loc[i, 2], + f"patch_ib({i+1})%radius": D / 2, + f"patch_ib({i+1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": N_sphere, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "q_filtered_wrt": "T", + "parallel_io": "T", + # Patch: Constant Tube filled with air + # Specify the cylindrical air tube grid geometry + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + "compute_CD": "F", + "fourier_transform_filtering": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + "mu_visc": mu, + + "store_levelset": "F", + "slab_domain_decomposition": "T", + "compute_autocorrelation": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp index 17765413bd..2971ba0e9a 100644 --- a/src/simulation/m_additional_forcing.fpp +++ b/src/simulation/m_additional_forcing.fpp @@ -19,7 +19,8 @@ module m_additional_forcing real(wp), allocatable, dimension(:) :: q_bar ! 1:3 rho*u, 4 rho, 5 T type(scalar_field), allocatable, dimension(:) :: q_periodic_force - real(wp), allocatable, dimension(:) :: q_spatial_avg, q_spatial_avg_glb ! 1:3 rho*u, 4 rho, 5 T + real(wp), allocatable, dimension(:) :: q_spatial_avg + real(wp), allocatable, dimension(:), public :: q_spatial_avg_glb ! 1:3 rho*u, 4 rho, 5 T real(wp) :: volfrac_phi integer :: N_x_total_glb diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp index bcd6732cf2..1721b0706c 100644 --- a/src/simulation/m_compute_statistics.fpp +++ b/src/simulation/m_compute_statistics.fpp @@ -7,9 +7,12 @@ module m_compute_statistics use m_mpi_proxy + use m_additional_forcing + implicit none - private; public :: s_initialize_statistics_module, s_finalize_statistics_module, s_compute_s_order_statistics + private; public :: s_initialize_statistics_module, s_finalize_statistics_module, & + s_compute_s_order_statistics, s_autocorrelation_function type(scalar_field), allocatable, dimension(:) :: xnbar_stat @@ -17,6 +20,10 @@ module m_compute_statistics type(vector_field), allocatable, dimension(:) :: Msn_stat + real(wp), allocatable, dimension(:) :: xm_th + + real(wp), allocatable, dimension(:) :: x_mom_autocorr + !$acc declare create(xnbar_stat, delta_stat, Msn_stat) contains @@ -46,6 +53,11 @@ contains @:ACC_SETUP_VFs(Msn_stat(i)) end do + if (compute_autocorrelation) then + @:ALLOCATE(xm_th(t_step_stop)) + @:ALLOCATE(x_mom_autocorr(t_step_stop)) + end if + end subroutine s_initialize_statistics_module subroutine s_compute_s_order_statistics(q_temp, n_step, s_order_stat, id) @@ -106,6 +118,40 @@ contains end subroutine s_compute_s_order_statistics + subroutine s_autocorrelation_function(n_step, q_cons_vf) + integer, intent(in) :: n_step + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + real(wp) :: q_avg, q_var, test + integer :: i, j, k, s, it + + !$acc update host(q_cons_vf(2)) + xm_th(n_step) = q_cons_vf(2)%sf(m/4, n/4, p/4) + + if (n_step > 1) then + ! compute average + q_avg = sum(xm_th(1:n_step)) / real(n_step, wp) + + ! compute variance + q_var = sum((xm_th(1:n_step) - q_avg)**2) / real(n_step, wp) + + ! compute autocorrelation function + do s = 0, n_step - 1 + x_mom_autocorr(s+1) = 0.0_wp + do it = 1, n_step - s + x_mom_autocorr(s+1) = x_mom_autocorr(s+1) + (xm_th(it) - q_avg) * (xm_th(it+s) - q_avg) + end do + x_mom_autocorr(s+1) = x_mom_autocorr(s+1) / ((n_step - s) * q_var) + end do + + print *, q_cons_vf(2)%sf(m/4, n/4, p/4) + print *, 'Autocorrelation at lag 0:', x_mom_autocorr(1) + print *, 'Autocorrelation at lag N/2:', x_mom_autocorr(n_step/2) + print *, 'Autocorrelation at max lag:', x_mom_autocorr(n_step) + + end if + + end subroutine s_autocorrelation_function + subroutine s_finalize_statistics_module integer :: i, j do i = 1, 3 diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 0158af546f..eaea7c04e3 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -511,6 +511,7 @@ module m_global_parameters logical :: fourier_transform_filtering logical :: store_levelset logical :: slab_domain_decomposition + logical :: compute_autocorrelation !$acc declare create(mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref) @@ -799,6 +800,7 @@ contains fourier_transform_filtering = .false. store_levelset = .true. slab_domain_decomposition = .false. + compute_autocorrelation = .false. end subroutine s_assign_default_values_to_user_inputs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index b1f1c28c8c..6171f9faf3 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -93,7 +93,7 @@ contains & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage', & & 'periodic_ibs', 'compute_CD', 'periodic_forcing', 'fourier_transform_filtering', & - & 'store_levelset', 'slab_domain_decomposition' ] + & 'store_levelset', 'slab_domain_decomposition', 'compute_autocorrelation' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 4c3fd33495..62b641da19 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -189,7 +189,7 @@ contains cont_damage, tau_star, cont_damage_s, alpha_bar, & periodic_ibs, compute_CD, mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref, & periodic_forcing, fourier_transform_filtering, store_levelset, & - slab_domain_decomposition + slab_domain_decomposition, compute_autocorrelation ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 343a542e4b..b88766eccd 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -800,12 +800,13 @@ contains call s_compute_interphase_momentum_exchange_term(pres_visc_stress_filtered, mag_F_IMET) end if - if (t_step > 5) then - n_step = t_step - 5 - print *, n_step + + call s_autocorrelation_function(t_step+1, q_cons_ts(1)%vf) + if (t_step > 10) then + n_step = t_step - 10 call s_compute_s_order_statistics(mag_div_Ru, n_step, R_u_stat, 1) - !call s_compute_s_order_statistics(mag_div_R_mu, n_step, R_mu_stat, 2) - !call s_compute_s_order_statistics(mag_F_IMET, n_step, F_IMET_stat, 3) + call s_compute_s_order_statistics(mag_div_R_mu, n_step, R_mu_stat, 2) + call s_compute_s_order_statistics(mag_F_IMET, n_step, F_IMET_stat, 3) end if diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index b3a26a65b5..a58d29869f 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -307,6 +307,7 @@ def analytic(self): 'T_inf_ref': ParamType.REAL, 'periodic_forcing': ParamType.LOG, 'fourier_transform_filtering': ParamType.LOG, + 'compute_autocorrelation': ParamType.LOG, }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', diff --git a/voronoi/gen_voronoi_3D.py b/voronoi/gen_voronoi_3D.py index ce700acb5d..c56a02fb8e 100644 --- a/voronoi/gen_voronoi_3D.py +++ b/voronoi/gen_voronoi_3D.py @@ -52,13 +52,13 @@ def lloyd_relaxation_3d(initial_points, box, w=1, iterations=10): print('running 3D...') # setup - phi = 0.05 - str_phi = '005' + phi = 0.1 + str_phi = '01' D = 0.1 L = 10*D - output_dir = '../examples/phi'+str_phi + output_dir = '../runs/phi'+str_phi if os.path.exists(output_dir) == False: os.mkdir(output_dir) From aa4a876d3c36f12b2964d5768b38b011e73e5ef5 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Tue, 24 Jun 2025 22:28:44 -0600 Subject: [PATCH 04/30] sphere locations for phi=0.1 --- runs/phi01/sphere_array_locations.txt | 190 ++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 runs/phi01/sphere_array_locations.txt diff --git a/runs/phi01/sphere_array_locations.txt b/runs/phi01/sphere_array_locations.txt new file mode 100644 index 0000000000..cb062253cc --- /dev/null +++ b/runs/phi01/sphere_array_locations.txt @@ -0,0 +1,190 @@ +-2.269415855407714844e-01 -1.414051055908203125e-01 3.922535181045532227e-01 +4.000198841094970703e-01 2.981948852539062500e-02 -2.832174301147460938e-01 +-3.220155239105224609e-01 -3.898024559020996094e-01 -3.041059970855712891e-01 +2.814270257949829102e-01 -7.608795166015625000e-02 -1.437755823135375977e-01 +-2.728327512741088867e-01 4.227894544601440430e-01 3.520679473876953125e-02 +-4.947633743286132812e-01 -4.232151508331298828e-01 -2.972397804260253906e-01 +-1.808261871337890625e-02 2.877434492111206055e-01 -2.310247421264648438e-01 +3.818988800048828125e-01 3.529353141784667969e-01 1.727198362350463867e-01 +-2.346787452697753906e-01 2.829644680023193359e-01 1.594238281250000000e-01 +-4.887726306915283203e-01 4.662406444549560547e-02 7.227540016174316406e-02 +-2.048213481903076172e-01 4.885343313217163086e-01 -2.821706533432006836e-01 +-4.693455696105957031e-01 4.566423892974853516e-01 1.360166072845458984e-02 +-2.810692787170410156e-02 3.964089155197143555e-01 2.224528789520263672e-01 +-4.457854032516479492e-01 2.029451131820678711e-01 -2.691650390625000000e-01 +-4.315460920333862305e-01 3.888773918151855469e-02 -4.190684556961059570e-01 +4.569005966186523438e-01 4.780390262603759766e-01 -1.672872304916381836e-01 +4.523042440414428711e-01 2.975084781646728516e-01 -5.123972892761230469e-03 +1.155309677124023438e-01 3.826811313629150391e-01 3.157733678817749023e-01 +-3.529649972915649414e-01 3.223993778228759766e-01 3.534083366394042969e-01 +-1.699209213256835938e-03 -3.757699728012084961e-01 4.251234531402587891e-01 +3.104512691497802734e-01 3.631212711334228516e-01 3.740961551666259766e-01 +3.886995315551757812e-01 -4.476237297058105469e-01 3.331944942474365234e-01 +-8.131015300750732422e-02 3.511540889739990234e-01 6.623625755310058594e-02 +5.544662475585937500e-03 2.087895870208740234e-01 -4.609942436218261719e-03 +-2.697887420654296875e-01 7.647264003753662109e-02 1.385573148727416992e-01 +-4.056740999221801758e-01 -2.304553985595703125e-03 -2.276074886322021484e-01 +-3.986057043075561523e-01 -8.398652076721191406e-02 8.779549598693847656e-02 +1.455659866333007812e-01 -5.315554141998291016e-02 3.587335348129272461e-01 +-3.624105453491210938e-02 -1.932673454284667969e-01 3.783030509948730469e-01 +2.404289245605468750e-01 2.313592433929443359e-01 -9.129595756530761719e-02 +4.290236234664916992e-01 -2.806437015533447266e-01 -3.928461074829101562e-01 +3.948264122009277344e-01 1.061335802078247070e-01 -1.345469951629638672e-01 +4.199941158294677734e-01 -5.409121513366699219e-02 -4.431722164154052734e-01 +-1.276044845581054688e-01 5.453205108642578125e-02 4.209027290344238281e-01 +2.240920066833496094e-01 5.745470523834228516e-02 -2.274198532104492188e-01 +3.475044965744018555e-01 -1.186680793762207031e-02 3.881464004516601562e-01 +-1.399791240692138672e-02 -5.303645133972167969e-02 3.192350864410400391e-01 +3.149266242980957031e-01 -4.960085153579711914e-01 4.852926731109619141e-01 +1.159789562225341797e-01 7.240676879882812500e-02 -2.081871032714843750e-03 +3.457980155944824219e-01 -4.685097932815551758e-01 1.311070919036865234e-01 +-3.134734630584716797e-01 -1.447633504867553711e-01 2.294397354125976562e-01 +-2.322396039962768555e-01 4.453787803649902344e-01 2.214672565460205078e-01 +7.549452781677246094e-02 2.149226665496826172e-01 1.942512989044189453e-01 +4.877026081085205078e-01 9.565687179565429688e-02 4.446644783020019531e-01 +2.452219724655151367e-01 -1.041567325592041016e-02 -4.420824050903320312e-01 +3.802477121353149414e-01 -2.260215282440185547e-01 -6.829130649566650391e-02 +4.026585817337036133e-01 -9.730875492095947266e-02 5.328035354614257812e-02 +-1.340943574905395508e-01 -2.988189458847045898e-01 4.915304183959960938e-01 +1.499507427215576172e-01 -1.232669353485107422e-01 -3.215692043304443359e-01 +7.229900360107421875e-02 1.496689319610595703e-01 -1.584017276763916016e-01 +4.887890815734863281e-02 -2.996931076049804688e-01 -6.179094314575195312e-02 +3.264107704162597656e-01 1.829891204833984375e-01 4.166131019592285156e-01 +3.418397903442382812e-01 -3.681684732437133789e-01 -1.888689994812011719e-01 +-1.746954917907714844e-01 3.889560699462890625e-03 2.538719177246093750e-01 +-1.082150936126708984e-01 -1.183983087539672852e-01 -4.667383432388305664e-01 +4.464948177337646484e-02 7.829546928405761719e-02 -4.987317323684692383e-01 +2.724659442901611328e-01 3.989661931991577148e-01 -2.271368503570556641e-01 +2.325954437255859375e-01 2.180564403533935547e-01 7.740092277526855469e-02 +4.475378990173339844e-01 8.053278923034667969e-02 2.720277309417724609e-01 +2.500159740447998047e-01 1.361670494079589844e-01 -4.378540515899658203e-01 +-8.050751686096191406e-02 2.042385339736938477e-01 4.733436107635498047e-01 +6.334328651428222656e-02 3.953868150711059570e-01 -1.099604368209838867e-01 +-6.584823131561279297e-02 4.609835147857666016e-01 -2.351213693618774414e-01 +-3.965889215469360352e-01 2.626715898513793945e-01 -4.403696060180664062e-01 +-4.123499393463134766e-01 4.679954051971435547e-01 -4.630439281463623047e-01 +-3.268948793411254883e-01 -2.706754207611083984e-01 4.083846807479858398e-01 +4.519817829132080078e-01 -4.413130283355712891e-01 -4.950367212295532227e-01 +1.736700534820556641e-01 -4.334635734558105469e-01 3.858578205108642578e-01 +-2.476015090942382812e-01 -8.808064460754394531e-02 -3.171390295028686523e-01 +1.416424512863159180e-01 6.130337715148925781e-03 1.613216400146484375e-01 +1.161313056945800781e-01 -8.472347259521484375e-02 -6.638598442077636719e-02 +6.862294673919677734e-02 7.571196556091308594e-02 -3.263452053070068359e-01 +-2.883186340332031250e-01 1.637139320373535156e-01 -1.617478132247924805e-01 +4.712302684783935547e-01 -1.252410411834716797e-01 2.302359342575073242e-01 +-3.321516513824462891e-02 -3.931099176406860352e-01 -1.693089008331298828e-01 +4.347057342529296875e-01 3.060367107391357422e-01 -1.781182289123535156e-01 +4.378421306610107422e-01 -2.324944734573364258e-01 4.174745082855224609e-01 +1.022851467132568359e-02 -1.360912322998046875e-01 6.093466281890869141e-02 +1.258683204650878906e-01 -2.447234392166137695e-01 3.956108093261718750e-01 +-1.879813671112060547e-01 3.079674243927001953e-01 3.408046960830688477e-01 +-3.804820775985717773e-01 3.240450620651245117e-01 -1.224457025527954102e-01 +4.557719230651855469e-01 -3.179004192352294922e-01 2.294783592224121094e-01 +-1.324630975723266602e-01 -2.825807332992553711e-01 2.794981002807617188e-02 +-9.088420867919921875e-02 -4.938784837722778320e-01 -4.559993743896484375e-01 +4.321286678314208984e-01 1.908559799194335938e-01 -4.160747528076171875e-01 +4.761004447937011719e-01 -3.449964523315429688e-02 -9.512662887573242188e-02 +-3.295238018035888672e-01 -4.874784946441650391e-01 3.628075122833251953e-01 +-3.269430398941040039e-01 4.961208105087280273e-01 -1.530282497406005859e-01 +1.903204917907714844e-01 4.334928989410400391e-01 1.328067779541015625e-01 +-1.938850879669189453e-01 -3.347861766815185547e-01 3.228425979614257812e-01 +-7.716512680053710938e-02 -1.792883872985839844e-01 -1.214803457260131836e-01 +2.945523262023925781e-01 4.375331401824951172e-01 -4.941940307617187500e-02 +2.805604934692382812e-01 3.923368453979492188e-02 3.594279289245605469e-03 +-3.963446617126464844e-02 4.087066650390625000e-02 1.291446685791015625e-01 +3.017591238021850586e-01 -4.672487974166870117e-01 -3.370153903961181641e-01 +-5.923175811767578125e-02 -1.029053926467895508e-01 -2.954306602478027344e-01 +-4.299471378326416016e-01 1.944204568862915039e-01 1.885912418365478516e-01 +1.226736307144165039e-01 -4.231331348419189453e-01 -4.431772232055664062e-01 +-1.630305051803588867e-01 1.654865741729736328e-01 1.177084445953369141e-02 +-2.820068597793579102e-01 -1.914020776748657227e-01 4.649567604064941406e-02 +-1.803944110870361328e-01 -5.573785305023193359e-02 3.654372692108154297e-02 +3.560798168182373047e-01 -2.656357288360595703e-01 1.175208091735839844e-01 +4.641888141632080078e-01 3.300178050994873047e-01 4.690952301025390625e-01 +-3.651157617568969727e-01 4.143847227096557617e-01 -3.058776855468750000e-01 +4.892169237136840820e-01 4.351882934570312500e-01 3.436787128448486328e-01 +1.252651214599609375e-02 1.140588521957397461e-01 3.147521018981933594e-01 +2.564185857772827148e-01 4.870939254760742188e-01 2.839587926864624023e-01 +2.440360784530639648e-01 2.740068435668945312e-01 2.384872436523437500e-01 +-1.093761920928955078e-01 2.005448341369628906e-01 2.263984680175781250e-01 +2.751414775848388672e-01 3.257715702056884766e-01 -4.216753244400024414e-01 +-2.344570159912109375e-01 3.708822727203369141e-01 -4.901626110076904297e-01 +3.321342468261718750e-01 2.205178737640380859e-01 -2.770333290100097656e-01 +-3.562602996826171875e-01 2.268432378768920898e-01 3.148293495178222656e-02 +-2.453712224960327148e-01 -3.159594535827636719e-01 -1.403638124465942383e-01 +-4.225530624389648438e-01 1.738572120666503906e-01 4.009822607040405273e-01 +-2.291325330734252930e-01 3.076609373092651367e-01 -2.942405939102172852e-01 +-4.163160324096679688e-01 -1.362502574920654297e-01 -4.328134059906005859e-01 +1.602690219879150391e-01 4.211304187774658203e-01 4.947811365127563477e-01 +1.699512004852294922e-01 -3.455421924591064453e-01 1.857841014862060547e-01 +1.917399168014526367e-01 -2.274444103240966797e-01 -1.499438285827636719e-01 +5.063652992248535156e-02 -7.577204704284667969e-02 -4.671556949615478516e-01 +1.856522560119628906e-01 1.085456609725952148e-01 3.598620891571044922e-01 +2.133283615112304688e-01 -1.748585700988769531e-01 5.385351181030273438e-02 +1.607365608215332031e-01 2.551939487457275391e-01 -2.725876569747924805e-01 +4.380518198013305664e-01 2.549636363983154297e-01 2.876336574554443359e-01 +-2.457389831542968750e-01 -4.205622673034667969e-01 -4.621033668518066406e-01 +-4.958317279815673828e-01 -4.657427072525024414e-01 1.988265514373779297e-01 +6.845688819885253906e-02 2.681604623794555664e-01 -4.308686256408691406e-01 +-4.200789928436279297e-01 3.732924461364746094e-01 1.710724830627441406e-01 +3.544092178344726562e-02 -3.218197822570800781e-01 8.597135543823242188e-02 +-5.194902420043945312e-02 1.222956180572509766e-02 -5.136704444885253906e-02 +1.391673088073730469e-01 2.476061582565307617e-01 4.182490110397338867e-01 +-1.033620834350585938e-01 3.683781623840332031e-02 -3.891081809997558594e-01 +-4.138703346252441406e-01 -3.311948776245117188e-01 -4.624009132385253906e-01 +-9.261775016784667969e-02 1.478457450866699219e-01 -1.957361698150634766e-01 +2.608032226562500000e-01 -1.573407649993896484e-01 4.948087930679321289e-01 +-1.243667602539062500e-01 -4.962480068206787109e-01 3.667256832122802734e-01 +-4.454655647277832031e-01 -2.705636024475097656e-01 1.070654392242431641e-01 +-4.106376171112060547e-01 -1.618578433990478516e-01 -6.648111343383789062e-02 +3.302078247070312500e-01 -2.219557762145996094e-02 1.648344993591308594e-01 +-1.774271726608276367e-01 3.244402408599853516e-01 -9.372758865356445312e-02 +2.811634540557861328e-01 1.279127597808837891e-01 2.315803766250610352e-01 +2.449696063995361328e-01 -3.595451116561889648e-01 -6.689429283142089844e-03 +5.237126350402832031e-02 -2.531653642654418945e-01 -4.304802417755126953e-01 +-2.635989189147949219e-01 -2.267163991928100586e-01 -4.170490503311157227e-01 +-2.721209526062011719e-01 1.574560403823852539e-01 2.993257045745849609e-01 +-3.956274986267089844e-01 2.191853523254394531e-02 2.550070285797119141e-01 +-1.563029289245605469e-01 -2.704749107360839844e-01 -2.991802692413330078e-01 +7.597208023071289062e-02 -1.699868440628051758e-01 2.227045297622680664e-01 +-3.653595447540283203e-01 -4.391734600067138672e-01 1.462922096252441406e-01 +1.705410480499267578e-01 -4.559497833251953125e-01 -1.512272357940673828e-01 +-1.343528032302856445e-01 -1.545200347900390625e-01 2.051105499267578125e-01 +5.652284622192382812e-02 -3.860473632812500000e-02 -1.806387901306152344e-01 +7.492136955261230469e-02 -4.894123077392578125e-01 3.830230236053466797e-02 +2.993867397308349609e-01 -3.184000253677368164e-01 2.854095697402954102e-01 +9.030818939208984375e-02 4.506881237030029297e-01 -3.190367221832275391e-01 +1.546680927276611328e-01 -3.337359428405761719e-01 -2.724964618682861328e-01 +1.143584251403808594e-01 3.319869041442871094e-01 3.964900970458984375e-02 +3.128879070281982422e-01 -1.711206436157226562e-01 -2.891231775283813477e-01 +3.134812116622924805e-01 -3.195825815200805664e-01 4.452092647552490234e-01 +-4.257751703262329102e-01 -3.556568622589111328e-01 3.161740303039550781e-01 +-4.604424238204956055e-01 1.566462516784667969e-01 -7.651758193969726562e-02 +-4.535093307495117188e-01 -1.047830581665039062e-01 3.779829740524291992e-01 +-7.651937007904052734e-02 3.510303497314453125e-01 -4.015958309173583984e-01 +-5.069994926452636719e-02 -3.019337654113769531e-01 2.270703315734863281e-01 +4.413585662841796875e-01 3.929922580718994141e-01 -3.560695648193359375e-01 +2.530579566955566406e-01 -3.169052600860595703e-01 -4.228000640869140625e-01 +-6.997537612915039062e-02 1.933835744857788086e-01 -3.526034355163574219e-01 +-1.785504817962646484e-01 1.803159713745117188e-03 -1.765856742858886719e-01 +-1.506757736206054688e-02 3.296717405319213867e-01 4.055316448211669922e-01 +-4.429192543029785156e-01 -3.453509807586669922e-01 -1.209781169891357422e-01 +-2.643674612045288086e-01 1.182488203048706055e-01 -3.157637119293212891e-01 +4.684782028198242188e-02 -4.617999792098999023e-01 2.509958744049072266e-01 +-3.250834941864013672e-01 2.819657325744628906e-03 4.174815416336059570e-01 +-3.355050086975097656e-02 -4.035353660583496094e-01 -3.605549335479736328e-01 +-3.662085533142089844e-01 -2.316244840621948242e-01 -2.756531238555908203e-01 +-2.576720714569091797e-01 -1.255595684051513672e-02 -4.626390933990478516e-01 +-3.275632858276367188e-01 2.991151809692382812e-02 -4.782438278198242188e-02 +4.056546688079833984e-01 1.594020128250122070e-01 7.798624038696289062e-02 +-2.715262174606323242e-01 -3.173813819885253906e-01 1.938788890838623047e-01 +3.270006179809570312e-02 -2.296169996261596680e-01 -2.338488101959228516e-01 +-1.381781101226806641e-01 -4.450683593750000000e-01 1.390277147293090820e-01 +4.581812620162963867e-01 -4.004166126251220703e-01 5.525112152099609375e-03 +-2.281215190887451172e-01 -1.310509443283081055e-01 -1.401650905609130859e-01 +-2.425242662429809570e-01 1.733251810073852539e-01 -4.973032474517822266e-01 +-1.258821487426757812e-01 -4.724828004837036133e-01 -5.991733074188232422e-02 +4.821944236755371094e-01 -1.722755432128906250e-01 -2.475099563598632812e-01 +2.750682830810546875e-02 4.665797948837280273e-01 4.664119482040405273e-01 +-3.053290843963623047e-01 -3.777220249176025391e-01 2.397775650024414062e-03 +2.908480167388916016e-01 -1.594734191894531250e-01 2.671622037887573242e-01 From d39eca955ce9a2501659bdb50038340d4f9c1ba6 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Mon, 18 Aug 2025 14:33:05 -0500 Subject: [PATCH 05/30] Reorganization for PR --- examples/3D_ibm_sphere_periodic/case.py | 107 ++++++ runs/3d_1sphere_filtering/case.py | 2 +- runs/3d_1sphere_periodic/case.py | 2 +- runs/phi01/case.py | 2 +- src/common/m_mpi_common.fpp | 20 +- src/post_process/m_data_input.f90 | 108 +++--- src/post_process/m_start_up.f90 | 12 +- src/simulation/m_additional_forcing.fpp | 16 +- src/simulation/m_checker.fpp | 2 +- src/simulation/m_compute_particle_forces.fpp | 6 +- src/simulation/m_data_output.fpp | 26 +- src/simulation/m_global_parameters.fpp | 10 +- src/simulation/m_mpi_proxy.fpp | 2 +- src/simulation/m_rhs.fpp | 40 +-- src/simulation/m_start_up.fpp | 23 +- src/simulation/m_time_steppers.fpp | 200 +++-------- src/simulation/m_volume_filtering.fpp | 357 +++++++++++++------ src/simulation/p_main.fpp | 3 +- toolchain/mfc/run/case_dicts.py | 2 +- 19 files changed, 552 insertions(+), 388 deletions(-) create mode 100644 examples/3D_ibm_sphere_periodic/case.py diff --git a/examples/3D_ibm_sphere_periodic/case.py b/examples/3D_ibm_sphere_periodic/case.py new file mode 100644 index 0000000000..41938f69fd --- /dev/null +++ b/examples/3D_ibm_sphere_periodic/case.py @@ -0,0 +1,107 @@ +import json +import math + +Mu = 1.84e-05 +gam_a = 1.4 + +D = 0.1 + +# Configuring case dictionary +print( + json.dumps( + { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -2.5 * D, + "y_domain%end": 2.5 * D, + # z direction + "z_domain%beg": -2.5 * D, + "z_domain%end": 2.5 * D, + "cyl_coord": "F", + "m": 99, + "n": 99, + "p": 99, + "dt": 1.0e-6, + "t_step_start": 0, + "t_step_stop": 200, # 3000 + "t_step_save": 10, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-16, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "wave_speeds": 1, + # Periodic BCs + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + # Patch: Constant Tube filled with air + # Specify the cylindrical air tube grid geometry + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 5 * D, + "patch_icpp(1)%length_z": 5 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": 527.2e00, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": 10918.2549, + "patch_icpp(1)%alpha_rho(1)": 0.2199, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + "patch_ib(1)%geometry": 8, + "patch_ib(1)%x_centroid": -3.0e-3, + "patch_ib(1)%y_centroid": 0.0, + "patch_ib(1)%z_centroid": 0.0, + "patch_ib(1)%radius": D / 2, + "patch_ib(1)%slip": "T", + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": 7535533.2, + } + ) +) diff --git a/runs/3d_1sphere_filtering/case.py b/runs/3d_1sphere_filtering/case.py index 9a8c4b1f4c..0964ea5dd4 100644 --- a/runs/3d_1sphere_filtering/case.py +++ b/runs/3d_1sphere_filtering/case.py @@ -138,7 +138,7 @@ "periodic_forcing": "T", "periodic_ibs": "T", "compute_CD": "F", - "fourier_transform_filtering": "T", + "volume_filtering_momentum_eqn": "T", "u_inf_ref": v1, "rho_inf_ref": rho, diff --git a/runs/3d_1sphere_periodic/case.py b/runs/3d_1sphere_periodic/case.py index 857841ad0c..f4512b5f00 100644 --- a/runs/3d_1sphere_periodic/case.py +++ b/runs/3d_1sphere_periodic/case.py @@ -138,7 +138,7 @@ "periodic_ibs": "T", #"compute_CD_vi": "F", #"compute_CD_si": "F", - #"fourier_transform_filtering": "T", + #"volume_filtering_momentum_eqn": "T", "u_inf_ref": v1, "rho_inf_ref": rho, diff --git a/runs/phi01/case.py b/runs/phi01/case.py index 56390a1943..8e7a5bff4b 100644 --- a/runs/phi01/case.py +++ b/runs/phi01/case.py @@ -143,7 +143,7 @@ "periodic_forcing": "T", "periodic_ibs": "T", "compute_CD": "F", - "fourier_transform_filtering": "T", + "volume_filtering_momentum_eqn": "T", "u_inf_ref": v1, "rho_inf_ref": rho, diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 8214120fe7..662d096665 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -101,7 +101,7 @@ contains allocate (buff_recv(0:ubound(buff_send, 1))) #ifdef MFC_SIMULATION - if (fourier_transform_filtering) then + if (volume_filtering_momentum_eqn) then @:ALLOCATE(buff_send_scalarfield(0:-1 + buff_size*1* & & (m + 2*buff_size + 1)* & & (n + 2*buff_size + 1)* & @@ -153,7 +153,7 @@ contains !! @param levelset closest distance from every cell to the IB !! @param levelset_norm normalized vector from every cell to the closest point to the IB !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta, R_u_stat, R_mu_stat, F_IMET_stat) + subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), & dimension(sys_size), & @@ -174,9 +174,9 @@ contains type(scalar_field), & intent(in), optional :: beta - type(scalar_field), dimension(2:4), intent(in), optional :: R_u_stat - type(scalar_field), dimension(2:4), intent(in), optional :: R_mu_stat - type(scalar_field), dimension(2:4), intent(in), optional :: F_IMET_stat + type(scalar_field), dimension(2:4), intent(in), optional :: stat_reynolds_stress + type(scalar_field), dimension(2:4), intent(in), optional :: stat_eff_visc + type(scalar_field), dimension(2:4), intent(in), optional :: stat_int_mom_exch integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start @@ -191,7 +191,7 @@ contains if (present(beta)) then alt_sys = sys_size + 1 - else if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + else if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then alt_sys = sys_size + 9 else alt_sys = sys_size @@ -201,15 +201,15 @@ contains MPI_IO_DATA%var(i)%sf => q_cons_vf(i)%sf(0:m, 0:n, 0:p) end do - if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then do i = sys_size+1, sys_size+3 - MPI_IO_DATA%var(i)%sf => R_u_stat(i-sys_size+1)%sf(0:m, 0:n, 0:p) + MPI_IO_DATA%var(i)%sf => stat_reynolds_stress(i-sys_size+1)%sf(0:m, 0:n, 0:p) end do do i = sys_size+4, sys_size+6 - MPI_IO_DATA%var(i)%sf => R_mu_stat(i-sys_size-2)%sf(0:m, 0:n, 0:p) + MPI_IO_DATA%var(i)%sf => stat_eff_visc(i-sys_size-2)%sf(0:m, 0:n, 0:p) end do do i = sys_size+7, sys_size+9 - MPI_IO_DATA%var(i)%sf => F_IMET_stat(i-sys_size-5)%sf(0:m, 0:n, 0:p) + MPI_IO_DATA%var(i)%sf => stat_int_mom_exch(i-sys_size-5)%sf(0:m, 0:n, 0:p) end do end if diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 69b13707b1..1efc1b97d3 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -61,9 +61,9 @@ end subroutine s_read_abstract_data_files ! type(scalar_field), public :: ib_markers !< type(integer_field), public :: ib_markers - type(scalar_field), allocatable, dimension(:), public :: R_u_stat - type(scalar_field), allocatable, dimension(:), public :: R_mu_stat - type(scalar_field), allocatable, dimension(:), public :: F_IMET_stat + type(scalar_field), allocatable, dimension(:), public :: stat_reynolds_stress + type(scalar_field), allocatable, dimension(:), public :: stat_eff_visc + type(scalar_field), allocatable, dimension(:), public :: stat_int_mom_exch procedure(s_read_abstract_data_files), pointer :: s_read_data_files => null() @@ -463,9 +463,9 @@ subroutine s_read_parallel_data_files(t_step) if (ib) then if (q_filtered_wrt) then call s_initialize_mpi_data(q_cons_vf, ib_markers, & - R_u_stat=R_u_stat, & - R_mu_stat=R_mu_stat, & - F_IMET_stat=F_IMET_stat) + stat_reynolds_stress=stat_reynolds_stress, & + stat_eff_visc=stat_eff_visc, & + stat_int_mom_exch=stat_int_mom_exch) else call s_initialize_mpi_data(q_cons_vf, ib_markers) end if @@ -1345,12 +1345,12 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf((m + 1) - j, 0:n, 0:p) else do i = 2, 4 - R_u_stat(i)%sf(-j, 0:n, 0:p) = & - R_u_stat(i)%sf((m + 1) - j, 0:n, 0:p) - R_mu_stat(i)%sf(-j, 0:n, 0:p) = & - R_mu_stat(i)%sf((m + 1) - j, 0:n, 0:p) - F_IMET_stat(i)%sf(-j, 0:n, 0:p) = & - F_IMET_stat(i)%sf((m + 1) - j, 0:n, 0:p) + stat_reynolds_stress(i)%sf(-j, 0:n, 0:p) = & + stat_reynolds_stress(i)%sf((m + 1) - j, 0:n, 0:p) + stat_eff_visc(i)%sf(-j, 0:n, 0:p) = & + stat_eff_visc(i)%sf((m + 1) - j, 0:n, 0:p) + stat_int_mom_exch(i)%sf(-j, 0:n, 0:p) = & + stat_int_mom_exch(i)%sf((m + 1) - j, 0:n, 0:p) end do end if end do @@ -1376,12 +1376,12 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(j - 1, 0:n, 0:p) else do i = 2, 4 - R_u_stat(i)%sf(m + j, 0:n, 0:p) = & - R_u_stat(i)%sf(j - 1, 0:n, 0:p) - R_mu_stat(i)%sf(m + j, 0:n, 0:p) = & - R_mu_stat(i)%sf(j - 1, 0:n, 0:p) - F_IMET_stat(i)%sf(m + j, 0:n, 0:p) = & - F_IMET_stat(i)%sf(j - 1, 0:n, 0:p) + stat_reynolds_stress(i)%sf(m + j, 0:n, 0:p) = & + stat_reynolds_stress(i)%sf(j - 1, 0:n, 0:p) + stat_eff_visc(i)%sf(m + j, 0:n, 0:p) = & + stat_eff_visc(i)%sf(j - 1, 0:n, 0:p) + stat_int_mom_exch(i)%sf(m + j, 0:n, 0:p) = & + stat_int_mom_exch(i)%sf(j - 1, 0:n, 0:p) end do end if end do @@ -1414,12 +1414,12 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, (n + 1) - j, 0:p) else do i = 2, 4 - R_u_stat(i)%sf(:, -j, 0:p) = & - R_u_stat(i)%sf(:, (n + 1) - j, 0:p) - R_mu_stat(i)%sf(:, -j, 0:p) = & - R_mu_stat(i)%sf(:, (n + 1) - j, 0:p) - F_IMET_stat(i)%sf(:, -j, 0:p) = & - F_IMET_stat(i)%sf(:, (n + 1) - j, 0:p) + stat_reynolds_stress(i)%sf(:, -j, 0:p) = & + stat_reynolds_stress(i)%sf(:, (n + 1) - j, 0:p) + stat_eff_visc(i)%sf(:, -j, 0:p) = & + stat_eff_visc(i)%sf(:, (n + 1) - j, 0:p) + stat_int_mom_exch(i)%sf(:, -j, 0:p) = & + stat_int_mom_exch(i)%sf(:, (n + 1) - j, 0:p) end do end if end do @@ -1445,12 +1445,12 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, j - 1, 0:p) else do i = 2, 4 - R_u_stat(i)%sf(:, n + j, 0:p) = & - R_u_stat(i)%sf(:, j - 1, 0:p) - R_mu_stat(i)%sf(:, n + j, 0:p) = & - R_mu_stat(i)%sf(:, j - 1, 0:p) - F_IMET_stat(i)%sf(:, n + j, 0:p) = & - F_IMET_stat(i)%sf(:, j - 1, 0:p) + stat_reynolds_stress(i)%sf(:, n + j, 0:p) = & + stat_reynolds_stress(i)%sf(:, j - 1, 0:p) + stat_eff_visc(i)%sf(:, n + j, 0:p) = & + stat_eff_visc(i)%sf(:, j - 1, 0:p) + stat_int_mom_exch(i)%sf(:, n + j, 0:p) = & + stat_int_mom_exch(i)%sf(:, j - 1, 0:p) end do end if end do @@ -1483,12 +1483,12 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, :, (p + 1) - j) else do i = 2, 4 - R_u_stat(i)%sf(:, :, -j) = & - R_u_stat(i)%sf(:, :, (p + 1) - j) - R_mu_stat(i)%sf(:, :, -j) = & - R_mu_stat(i)%sf(:, :, (p + 1) - j) - F_IMET_stat(i)%sf(:, :, -j) = & - F_IMET_stat(i)%sf(:, :, (p + 1) - j) + stat_reynolds_stress(i)%sf(:, :, -j) = & + stat_reynolds_stress(i)%sf(:, :, (p + 1) - j) + stat_eff_visc(i)%sf(:, :, -j) = & + stat_eff_visc(i)%sf(:, :, (p + 1) - j) + stat_int_mom_exch(i)%sf(:, :, -j) = & + stat_int_mom_exch(i)%sf(:, :, (p + 1) - j) end do end if end do @@ -1515,12 +1515,12 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, :, j - 1) else do i = 2, 4 - R_u_stat(i)%sf(:, :, p + j) = & - R_u_stat(i)%sf(:, :, j - 1) - R_mu_stat(i)%sf(:, :, p + j) = & - R_mu_stat(i)%sf(:, :, j - 1) - F_IMET_stat(i)%sf(:, :, p + j) = & - F_IMET_stat(i)%sf(:, :, j - 1) + stat_reynolds_stress(i)%sf(:, :, p + j) = & + stat_reynolds_stress(i)%sf(:, :, j - 1) + stat_eff_visc(i)%sf(:, :, p + j) = & + stat_eff_visc(i)%sf(:, :, j - 1) + stat_int_mom_exch(i)%sf(:, :, p + j) = & + stat_int_mom_exch(i)%sf(:, :, j - 1) end do end if end do @@ -1559,9 +1559,9 @@ subroutine s_initialize_data_input_module allocate (q_prim_vf(1:sys_size)) if (bubbles_lagrange) allocate (q_particle(1)) - if (q_filtered_wrt) allocate (R_u_stat(2:4)) - if (q_filtered_wrt) allocate (R_mu_stat(2:4)) - if (q_filtered_wrt) allocate (F_IMET_stat(2:4)) + if (q_filtered_wrt) allocate (stat_reynolds_stress(2:4)) + if (q_filtered_wrt) allocate (stat_eff_visc(2:4)) + if (q_filtered_wrt) allocate (stat_int_mom_exch(2:4)) ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the @@ -1602,13 +1602,13 @@ subroutine s_initialize_data_input_module if (q_filtered_wrt) then do i = 2, 4 - allocate (R_u_stat(i)%sf(-buff_size:m + buff_size, & + allocate (stat_reynolds_stress(i)%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & -buff_size:p + buff_size)) - allocate (R_mu_stat(i)%sf(-buff_size:m + buff_size, & + allocate (stat_eff_visc(i)%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & -buff_size:p + buff_size)) - allocate (F_IMET_stat(i)%sf(-buff_size:m + buff_size, & + allocate (stat_int_mom_exch(i)%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & -buff_size:p + buff_size)) end do @@ -1708,17 +1708,17 @@ subroutine s_finalize_data_input_module if (q_filtered_wrt) then do i = 2, 4 - deallocate (R_u_stat(i)%sf) + deallocate (stat_reynolds_stress(i)%sf) end do - deallocate(R_u_stat) + deallocate(stat_reynolds_stress) do i = 2, 4 - deallocate (R_mu_stat(i)%sf) + deallocate (stat_eff_visc(i)%sf) end do - deallocate(R_mu_stat) + deallocate(stat_eff_visc) do i = 2, 4 - deallocate (F_IMET_stat(i)%sf) + deallocate (stat_int_mom_exch(i)%sf) end do - deallocate(F_IMET_stat) + deallocate(stat_int_mom_exch) end if s_read_data_files => null() diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 8a61c72e07..b454764c3e 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -329,22 +329,22 @@ subroutine s_save_data(t_step, varname, pres, c, H) if (q_filtered_wrt) then ! filtered cons vars do i = 2, 4 - q_sf = R_u_stat(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) - write (varname, '(A,I0)') 'R_u_stats', i + q_sf = stat_reynolds_stress(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'stat_reynolds_stresss', i call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' end do do i = 2, 4 - q_sf = R_mu_stat(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) - write (varname, '(A,I0)') 'R_mu_stats', i + q_sf = stat_eff_visc(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'stat_eff_viscs', i call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' end do do i = 2, 4 - q_sf = F_IMET_stat(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) - write (varname, '(A,I0)') 'F_IMET_stats', i + q_sf = stat_int_mom_exch(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'stat_int_mom_exchs', i call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp index 2971ba0e9a..cc90cce4ef 100644 --- a/src/simulation/m_additional_forcing.fpp +++ b/src/simulation/m_additional_forcing.fpp @@ -57,9 +57,9 @@ contains do i = 0, m do j = 0, n do k = 0, p - rhs_vf(1)%sf(i, j, k) = rhs_vf(1)%sf(i, j, k) + q_periodic_force(7)%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) ! continuity - rhs_vf(2)%sf(i, j, k) = rhs_vf(2)%sf(i, j, k) + q_periodic_force(1)%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) ! x momentum - rhs_vf(5)%sf(i, j, k) = rhs_vf(5)%sf(i, j, k) + (q_periodic_force(4)%sf(i, j, k) + q_periodic_force(8)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) ! energy + rhs_vf(1)%sf(i, j, k) = rhs_vf(1)%sf(i, j, k) + q_periodic_force(7)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! continuity + rhs_vf(2)%sf(i, j, k) = rhs_vf(2)%sf(i, j, k) + q_periodic_force(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! x momentum + rhs_vf(5)%sf(i, j, k) = rhs_vf(5)%sf(i, j, k) + (q_periodic_force(4)%sf(i, j, k) + q_periodic_force(8)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) ! energy end do end do end do @@ -80,15 +80,15 @@ contains do i = 0, m do j = 0, n do k = 0, p - q_spatial_avg(4) = q_spatial_avg(4) + q_cons_vf(1)%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) + q_spatial_avg(4) = q_spatial_avg(4) + q_cons_vf(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) q_spatial_avg(5) = q_spatial_avg(5) + (0.4_wp/287._wp * (q_cons_vf(5)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) & - 0.5_wp * ((q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2 & + (q_cons_vf(3)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2 & - + (q_cons_vf(4)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2))) * fluid_indicator_function_I%sf(i, j, k) + + (q_cons_vf(4)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2))) * fluid_indicator_function%sf(i, j, k) - q_spatial_avg(1) = q_spatial_avg(1) + (q_cons_vf(2)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) - q_spatial_avg(2) = q_spatial_avg(2) + (q_cons_vf(3)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) - q_spatial_avg(3) = q_spatial_avg(3) + (q_cons_vf(4)%sf(i, j, k)) * fluid_indicator_function_I%sf(i, j, k) + q_spatial_avg(1) = q_spatial_avg(1) + (q_cons_vf(2)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) + q_spatial_avg(2) = q_spatial_avg(2) + (q_cons_vf(3)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) + q_spatial_avg(3) = q_spatial_avg(3) + (q_cons_vf(4)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) end do end do end do diff --git a/src/simulation/m_checker.fpp b/src/simulation/m_checker.fpp index 04c1076f2a..d0c2c278ec 100644 --- a/src/simulation/m_checker.fpp +++ b/src/simulation/m_checker.fpp @@ -350,7 +350,7 @@ contains #:for BOUND in ['beg', 'end'] @:PROHIBIT(periodic_forcing .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & "Periodic forcing requires all BCs to be periodic") - @:PROHIBIT(fourier_transform_filtering .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & + @:PROHIBIT(volume_filtering_momentum_eqn .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & "Explicit filtering of flow data requires all BCs to be periodic due to fourier transform") #:endfor #:endfor diff --git a/src/simulation/m_compute_particle_forces.fpp b/src/simulation/m_compute_particle_forces.fpp index fd84657f5f..8a1ef5f092 100644 --- a/src/simulation/m_compute_particle_forces.fpp +++ b/src/simulation/m_compute_particle_forces.fpp @@ -27,8 +27,8 @@ contains end subroutine s_initialize_particle_forces_module - subroutine s_compute_drag_coefficient(div_pres_visc_stress) - type(scalar_field), dimension(momxb:momxe), intent(in) :: div_pres_visc_stress + subroutine s_compute_drag_coefficient(pres_visc_stress) + type(scalar_field), dimension(momxb:momxe), intent(in) :: pres_visc_stress real(wp), dimension(0:num_ibs) :: FD_global real(wp) :: drag_coeff integer :: i, j, k @@ -44,7 +44,7 @@ contains do k = 0, p !$acc atomic FD_calc(ib_markers%sf(i, j, k)) = FD_calc(ib_markers%sf(i, j, k)) & - + div_pres_visc_stress(momxb)%sf(i, j, k) * dx(i) * dy(j) * dz(k) + + pres_visc_stress(momxb)%sf(i, j, k) * dx(i) * dy(j) * dz(k) end do end do end do diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 63b8fa2b32..f43cebc798 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -76,7 +76,7 @@ contains !! @param q_cons_vf Conservative variables !! @param q_prim_vf Primitive variables !! @param t_step Current time step - subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta, R_u_stat, R_mu_stat, F_IMET_stat) + subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), & dimension(sys_size), & @@ -94,14 +94,14 @@ contains type(scalar_field), & intent(inout), optional :: beta - type(scalar_field), dimension(2:4), intent(inout), optional :: R_u_stat - type(scalar_field), dimension(2:4), intent(inout), optional :: R_mu_stat - type(scalar_field), dimension(2:4), intent(inout), optional :: F_IMET_stat + type(scalar_field), dimension(2:4), intent(inout), optional :: stat_reynolds_stress + type(scalar_field), dimension(2:4), intent(inout), optional :: stat_eff_visc + type(scalar_field), dimension(2:4), intent(inout), optional :: stat_int_mom_exch if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) else - call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, R_u_stat, R_mu_stat, F_IMET_stat) + call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) end if end subroutine s_write_data_files @@ -790,15 +790,15 @@ contains !! @param q_prim_vf Cell-average primitive variables !! @param t_step Current time-step !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, R_u_stat, R_mu_stat, F_IMET_stat) + subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta - type(scalar_field), dimension(2:4), intent(inout), optional :: R_u_stat - type(scalar_field), dimension(2:4), intent(inout), optional :: R_mu_stat - type(scalar_field), dimension(2:4), intent(inout), optional :: F_IMET_stat + type(scalar_field), dimension(2:4), intent(inout), optional :: stat_reynolds_stress + type(scalar_field), dimension(2:4), intent(inout), optional :: stat_eff_visc + type(scalar_field), dimension(2:4), intent(inout), optional :: stat_int_mom_exch #ifdef MFC_MPI @@ -820,7 +820,7 @@ contains if (present(beta)) then alt_sys = sys_size + 1 - else if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + else if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then alt_sys = sys_size + 9 else alt_sys = sys_size @@ -905,9 +905,9 @@ contains ! Initialize MPI data I/O if (ib) then - if (present(R_u_stat) .and. present(R_mu_stat) .and. present(F_IMET_stat)) then + if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, & - R_u_stat=R_u_stat, R_mu_stat=R_mu_stat, F_IMET_stat=F_IMET_stat) + stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, stat_int_mom_exch=stat_int_mom_exch) else call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) end if @@ -965,7 +965,7 @@ contains mpi_p, status, ierr) end do end if - else if (fourier_transform_filtering) then + else if (volume_filtering_momentum_eqn) then do i = 1, alt_sys var_MOK = int(i, MPI_OFFSET_KIND) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index eaea7c04e3..a71e17a69d 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -508,7 +508,7 @@ module m_global_parameters real(wp) :: rho_inf_ref !< reference freestream density real(wp) :: T_inf_ref !< reference freestream temperature logical :: periodic_forcing - logical :: fourier_transform_filtering + logical :: volume_filtering_momentum_eqn logical :: store_levelset logical :: slab_domain_decomposition logical :: compute_autocorrelation @@ -797,7 +797,7 @@ contains rho_inf_ref = dflt_real T_inf_ref = dflt_real periodic_forcing = .false. - fourier_transform_filtering = .false. + volume_filtering_momentum_eqn = .false. store_levelset = .true. slab_domain_decomposition = .false. compute_autocorrelation = .false. @@ -1155,7 +1155,7 @@ contains elseif (bubbles_lagrange) then allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) - else if (fourier_transform_filtering) then + else if (volume_filtering_momentum_eqn) then allocate (MPI_IO_DATA%view(1:sys_size+9)) allocate (MPI_IO_DATA%var(1:sys_size+9)) else @@ -1177,7 +1177,7 @@ contains allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do - else if (fourier_transform_filtering) then + else if (volume_filtering_momentum_eqn) then do i = sys_size+1, sys_size+9 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() @@ -1354,7 +1354,7 @@ contains do i = 1, sys_size + 1 MPI_IO_DATA%var(i)%sf => null() end do - else if (fourier_transform_filtering) then + else if (volume_filtering_momentum_eqn) then do i = 1, sys_size+9 MPI_IO_DATA%var(i)%sf => null() end do diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 6171f9faf3..c2579cc057 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -92,7 +92,7 @@ contains & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage', & - & 'periodic_ibs', 'compute_CD', 'periodic_forcing', 'fourier_transform_filtering', & + & 'periodic_ibs', 'compute_CD', 'periodic_forcing', 'volume_filtering_momentum_eqn', & & 'store_levelset', 'slab_domain_decomposition', 'compute_autocorrelation' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index 884e6a07ad..eea4a49260 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -609,7 +609,7 @@ contains end subroutine s_initialize_rhs_module - subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb, rhs_pb, mv, rhs_mv, t_step, time_avg, div_pres_visc_stress) + subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb, rhs_pb, mv, rhs_mv, t_step, time_avg, pres_visc_stress) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), intent(inout) :: q_T_sf @@ -620,7 +620,7 @@ contains real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv, rhs_mv integer, intent(in) :: t_step real(wp), intent(inout) :: time_avg - type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: div_pres_visc_stress + type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: pres_visc_stress real(wp), dimension(0:m, 0:n, 0:p) :: nbub real(wp) :: t_start, t_finish @@ -811,7 +811,7 @@ contains q_cons_qp, & q_prim_qp, & flux_src_n(id), & - div_pres_visc_stress) + pres_visc_stress) call nvtxEndRange ! RHS additions for hypoelasticity @@ -831,7 +831,7 @@ contains dq_prim_dx_qp(1)%vf, & dq_prim_dy_qp(1)%vf, & dq_prim_dz_qp(1)%vf, & - div_pres_visc_stress) + pres_visc_stress) call nvtxEndRange end if @@ -938,14 +938,14 @@ contains end subroutine s_compute_rhs - subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, div_pres_visc_stress) + subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, pres_visc_stress) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(vector_field), intent(inout) :: q_cons_vf type(vector_field), intent(inout) :: q_prim_vf type(vector_field), intent(inout) :: flux_src_n_vf - type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: div_pres_visc_stress + type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: pres_visc_stress integer :: i, j, k, l, q @@ -999,14 +999,14 @@ contains end do ! particle forces loop, x-dir - if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n do i = 0, m !$acc loop seq do l = momxb, momxe - div_pres_visc_stress(l)%sf(i, j, k) = 1._wp/dx(i) * & + pres_visc_stress(l)%sf(i, j, k) = 1._wp/dx(i) * & (flux_n(1)%vf(l)%sf(i-1, j, k) - & flux_n(1)%vf(l)%sf(i, j, k)) - 0.5_wp/dx(i) * & (q_cons_vf%vf(2)%sf(i+1, j, k)*q_cons_vf%vf(l)%sf(i+1, j, k)/q_cons_vf%vf(1)%sf(i+1, j, k) - & @@ -1128,14 +1128,14 @@ contains end do ! particle forces loop, y-dir - if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n do i = 0, m !$acc loop seq do l = momxb, momxe - div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & + pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & (flux_n(2)%vf(l)%sf(i, j-1, k) - & flux_n(2)%vf(l)%sf(i, j, k)) - 0.5_wp/dy(j) * & (q_cons_vf%vf(3)%sf(i, j+1, k)*q_cons_vf%vf(l)%sf(i, j+1, k)/q_cons_vf%vf(1)%sf(i, j+1, k) - & @@ -1353,14 +1353,14 @@ contains end if ! particle forces loop, z-dir - if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n do i = 0, m !$acc loop seq do l = momxb, momxe - div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & + pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & (flux_n(3)%vf(l)%sf(i, j, k-1) - & flux_n(3)%vf(l)%sf(i, j, k)) - 0.5_wp/dz(k) * & (q_cons_vf%vf(4)%sf(i, j, k+1)*q_cons_vf%vf(l)%sf(i, j, k+1)/q_cons_vf%vf(1)%sf(i, j, k+1) - & @@ -1552,14 +1552,14 @@ contains end subroutine s_compute_advection_source_term subroutine s_compute_additional_physics_rhs(idir, q_prim_vf, rhs_vf, flux_src_n, & - dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf, div_pres_visc_stress) + dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf, pres_visc_stress) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(scalar_field), dimension(sys_size), intent(in) :: flux_src_n type(scalar_field), dimension(sys_size), intent(in) :: dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf - type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: div_pres_visc_stress + type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: pres_visc_stress integer :: i, j, k, l @@ -1596,14 +1596,14 @@ contains end do ! particle momentum exchange, viscous stress tensor, x-dir - if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n do i = 0, m !$acc loop seq do l = momxb, momxe - div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dx(i) * & + pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dx(i) * & (flux_src_n(l)%sf(i-1, j, k) - & flux_src_n(l)%sf(i, j, k)) end do @@ -1695,14 +1695,14 @@ contains end if ! particle momentum exchange, viscous stress tensor, y-dir - if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n do i = 0, m !$acc loop seq do l = momxb, momxe - div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & + pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & (flux_src_n(l)%sf(i, j-1, k) - & flux_src_n(l)%sf(i, j, k)) end do @@ -1797,14 +1797,14 @@ contains end do ! particle momentum exchange, viscous stress tensor, z-dir - if ((compute_CD .or. fourier_transform_filtering) .and. present(div_pres_visc_stress)) then + if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then !$acc parallel loop collapse(3) gang vector default(present) do k = 0, p do j = 0, n do i = 0, m !$acc loop seq do l = momxb, momxe - div_pres_visc_stress(l)%sf(i, j, k) = div_pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & + pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & (flux_src_n(l)%sf(i, j, k-1) - & flux_src_n(l)%sf(i, j, k)) end do diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 62b641da19..c34bd05321 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -95,6 +95,8 @@ module m_start_up use m_volume_filtering + use m_compute_statistics + implicit none private; public :: s_read_input_file, & @@ -188,7 +190,7 @@ contains hyperelasticity, R0ref, num_bc_patches, Bx0, powell, & cont_damage, tau_star, cont_damage_s, alpha_bar, & periodic_ibs, compute_CD, mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref, & - periodic_forcing, fourier_transform_filtering, store_levelset, & + periodic_forcing, volume_filtering_momentum_eqn, store_levelset, & slab_domain_decomposition, compute_autocorrelation ! Checking that an input file has been provided by the user. If it @@ -1341,6 +1343,11 @@ contains if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf) + ! Volume filter flow variables, compute unclosed terms and their statistics + if (volume_filtering_momentum_eqn) then + call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) + end if + ! Time-stepping loop controls t_step = t_step + 1 @@ -1421,9 +1428,9 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") do i = 2, 4 - !$acc update host(R_u_stat(i)%sf) - !$acc update host(R_mu_stat(i)%sf) - !$acc update host(F_IMET_stat(i)%sf) + !$acc update host(stat_reynolds_stress(i)%sf) + !$acc update host(stat_eff_visc(i)%sf) + !$acc update host(stat_int_mom_exch(i)%sf) end do do i = 1, sys_size !$acc update host(q_cons_ts(1)%vf(i)%sf) @@ -1457,9 +1464,9 @@ contains call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else - if (fourier_transform_filtering) then + if (volume_filtering_momentum_eqn) then call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, & - R_u_stat=R_u_stat, R_mu_stat=R_mu_stat, F_IMET_stat=F_IMET_stat) + stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, stat_int_mom_exch=stat_int_mom_exch) else call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count) end if @@ -1579,7 +1586,7 @@ contains call s_initialize_particle_forces_module() call s_initialize_additional_forcing_module() - if (fourier_transform_filtering) call s_initialize_fftw_explicit_filter_module() + if (volume_filtering_momentum_eqn) call s_initialize_fftw_explicit_filter_module() call s_initialize_statistics_module() @@ -1727,7 +1734,7 @@ contains call s_finalize_particle_forces_module() call s_finalize_additional_forcing_module() - if (fourier_transform_filtering) call s_finalize_fftw_explicit_filter_module + if (volume_filtering_momentum_eqn) call s_finalize_fftw_explicit_filter_module ! Terminating MPI execution environment call s_mpi_finalize() diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index b88766eccd..9fdbb519e0 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -87,26 +87,13 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme - type(scalar_field), allocatable, dimension(:) :: div_pres_visc_stress - - type(scalar_field), allocatable, dimension(:) :: q_cons_filtered - - type(vector_field), allocatable, dimension(:) :: pt_Re_stress - type(vector_field), allocatable, dimension(:) :: R_mu - type(scalar_field), allocatable, dimension(:) :: pres_visc_stress_filtered - - type(scalar_field) :: mag_div_Ru - type(scalar_field) :: mag_div_R_mu - type(scalar_field) :: mag_F_IMET - - type(scalar_field), allocatable, dimension(:) :: R_u_stat - type(scalar_field), allocatable, dimension(:) :: R_mu_stat - type(scalar_field), allocatable, dimension(:) :: F_IMET_stat + type(scalar_field), allocatable, dimension(:) :: stat_reynolds_stress + type(scalar_field), allocatable, dimension(:) :: stat_eff_visc + type(scalar_field), allocatable, dimension(:) :: stat_int_mom_exch !$acc declare create(q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, rhs_ts_rkck, q_prim_ts, rhs_mv, rhs_pb, max_dt) - !$acc declare create(div_pres_visc_stress, q_cons_filtered, pt_Re_stress, R_mu, pres_visc_stress_filtered) - !$acc declare create(mag_div_Ru, mag_div_R_mu, mag_F_IMET) - !$acc declare create(R_u_stat, R_mu_stat, F_IMET_stat) + + !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) contains @@ -382,89 +369,30 @@ contains end do end do - if (compute_CD .or. fourier_transform_filtering) then - @:ALLOCATE(div_pres_visc_stress(momxb:momxe)) + if (compute_CD .or. volume_filtering_momentum_eqn) then + @:ALLOCATE(pres_visc_stress(momxb:momxe)) do i = momxb, momxe - @:ALLOCATE(div_pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + @:ALLOCATE(pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & idwbuff(2)%beg:idwbuff(2)%end, & idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(div_pres_visc_stress(i)) + @:ACC_SETUP_SFs(pres_visc_stress(i)) end do end if - if (fourier_transform_filtering) then - @:ALLOCATE(q_cons_filtered(1:sys_size)) - do i = 1, sys_size - @:ALLOCATE(q_cons_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(q_cons_filtered(i)) - end do - - @:ALLOCATE(pt_Re_stress(1:num_dims)) - do i = 1, num_dims - @:ALLOCATE(pt_Re_stress(i)%vf(1:num_dims)) - end do - do i = 1, num_dims - do j = 1, num_dims - @:ALLOCATE(pt_Re_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - end do - @:ACC_SETUP_VFs(pt_Re_stress(i)) - end do - - @:ALLOCATE(R_mu(1:num_dims)) - do i = 1, num_dims - @:ALLOCATE(R_mu(i)%vf(1:num_dims)) - end do - do i = 1, num_dims - do j = 1, num_dims - @:ALLOCATE(R_mu(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - end do - @:ACC_SETUP_VFs(R_mu(i)) - end do - - @:ALLOCATE(pres_visc_stress_filtered(1:num_dims)) - do i = 1, num_dims - @:ALLOCATE(pres_visc_stress_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(pres_visc_stress_filtered(i)) - end do - - @:ALLOCATE(mag_div_Ru%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(mag_div_Ru) - - @:ALLOCATE(mag_div_R_mu%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(mag_div_R_mu) - - @:ALLOCATE(mag_F_IMET%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(mag_F_IMET) - end if - - @:ALLOCATE(R_u_stat(2:4)) + @:ALLOCATE(stat_reynolds_stress(2:4)) do i = 2, 4 - @:ALLOCATE(R_u_stat(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(R_u_stat(i)) + @:ALLOCATE(stat_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_reynolds_stress(i)) end do - @:ALLOCATE(R_mu_stat(2:4)) + @:ALLOCATE(stat_eff_visc(2:4)) do i = 2, 4 - @:ALLOCATE(R_mu_stat(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(R_mu_stat(i)) + @:ALLOCATE(stat_eff_visc(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_eff_visc(i)) end do - @:ALLOCATE(F_IMET_stat(2:4)) + @:ALLOCATE(stat_int_mom_exch(2:4)) do i = 2, 4 - @:ALLOCATE(F_IMET_stat(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(F_IMET_stat(i)) + @:ALLOCATE(stat_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_int_mom_exch(i)) end do end subroutine s_initialize_time_steppers_module @@ -789,35 +717,35 @@ contains call s_compute_periodic_forcing(q_cons_ts(1)%vf) end if - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, div_pres_visc_stress) + call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, pres_visc_stress) - if (fourier_transform_filtering) then - call s_apply_fftw_filter_cons(q_cons_ts(1)%vf, q_cons_filtered) - call s_setup_terms_filtering(q_cons_ts(1)%vf, pt_Re_stress, R_mu) - call s_apply_fftw_filter_tensor(pt_Re_stress, R_mu, q_cons_filtered, div_pres_visc_stress, pres_visc_stress_filtered) - call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, pt_Re_stress, mag_div_Ru) - call s_compute_R_mu(q_cons_filtered, R_mu, mag_div_R_mu) - call s_compute_interphase_momentum_exchange_term(pres_visc_stress_filtered, mag_F_IMET) - end if + ! if (volume_filtering_momentum_eqn) then + ! call s_apply_fftw_filter_cons(q_cons_ts(1)%vf, q_cons_filtered) + ! call s_setup_terms_filtering(q_cons_ts(1)%vf, reynolds_stress, eff_visc) + ! call s_apply_fftw_filter_tensor(reynolds_stress, eff_visc, q_cons_filtered, pres_visc_stress, int_mom_exch) + ! call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) + ! call s_compute_eff_visc(q_cons_filtered, eff_visc, mag_eff_visc) + ! call s_compute_interphase_momentum_exchange_term(int_mom_exch, mag_int_mom_exch) + ! end if - call s_autocorrelation_function(t_step+1, q_cons_ts(1)%vf) - if (t_step > 10) then - n_step = t_step - 10 - call s_compute_s_order_statistics(mag_div_Ru, n_step, R_u_stat, 1) - call s_compute_s_order_statistics(mag_div_R_mu, n_step, R_mu_stat, 2) - call s_compute_s_order_statistics(mag_F_IMET, n_step, F_IMET_stat, 3) - end if + ! call s_autocorrelation_function(t_step+1, q_cons_ts(1)%vf) + ! if (t_step > 10) then + ! n_step = t_step - 10 + ! call s_compute_s_order_statistics(mag_reynolds_stress, n_step, stat_reynolds_stress, 1) + ! call s_compute_s_order_statistics(mag_eff_visc, n_step, stat_eff_visc, 2) + ! call s_compute_s_order_statistics(mag_int_mom_exch, n_step, stat_int_mom_exch, 3) + ! end if - ! R_u_stat(2)%sf(0:m, 0:n, 0:p) = q_cons_filtered(6)%sf(0:m, 0:n, 0:p) - ! R_u_stat(3)%sf(0:m, 0:n, 0:p) = mag_div_Ru%sf(0:m, 0:n, 0:p) - ! R_u_stat(4)%sf(0:m, 0:n, 0:p) = mag_div_R_mu%sf(0:m, 0:n, 0:p) - ! R_mu_stat(2)%sf(0:m, 0:n, 0:p) = mag_F_IMET%sf(0:m, 0:n, 0:p) + ! stat_reynolds_stress(2)%sf(0:m, 0:n, 0:p) = q_cons_filtered(6)%sf(0:m, 0:n, 0:p) + ! stat_reynolds_stress(3)%sf(0:m, 0:n, 0:p) = mag_reynolds_stress%sf(0:m, 0:n, 0:p) + ! stat_reynolds_stress(4)%sf(0:m, 0:n, 0:p) = mag_eff_visc%sf(0:m, 0:n, 0:p) + ! stat_eff_visc(2)%sf(0:m, 0:n, 0:p) = mag_int_mom_exch%sf(0:m, 0:n, 0:p) if (compute_CD) then - call s_compute_drag_coefficient(div_pres_visc_stress) + call s_compute_drag_coefficient(pres_visc_stress) end if if (periodic_forcing) then @@ -1488,57 +1416,25 @@ contains @:DEALLOCATE(rhs_vf) end if - if (compute_CD .or. fourier_transform_filtering) then + if (compute_CD .or. volume_filtering_momentum_eqn) then do i = momxb, momxe - @:DEALLOCATE(div_pres_visc_stress(i)%sf) + @:DEALLOCATE(pres_visc_stress(i)%sf) end do - @:DEALLOCATE(div_pres_visc_stress) - end if - - if (fourier_transform_filtering) then - do i = 1, sys_size - @:DEALLOCATE(q_cons_filtered(i)%sf) - end do - @:DEALLOCATE(q_cons_filtered) - - do i = 1, num_dims - do j = 1, num_dims - @:DEALLOCATE(pt_Re_stress(i)%vf(j)%sf) - end do - @:DEALLOCATE(pt_Re_stress(i)%vf) - end do - @:DEALLOCATE(pt_Re_stress) - - do i = 1, num_dims - do j = 1, num_dims - @:DEALLOCATE(R_mu(i)%vf(j)%sf) - end do - @:DEALLOCATE(R_mu(i)%vf) - end do - @:DEALLOCATE(R_mu) - - do i = 1, num_dims - @:DEALLOCATE(pres_visc_stress_filtered(i)%sf) - end do - @:DEALLOCATE(pres_visc_stress_filtered) - - @:DEALLOCATE(mag_div_Ru%sf) - @:DEALLOCATE(mag_div_R_mu%sf) - @:DEALLOCATE(mag_F_IMET%sf) + @:DEALLOCATE(pres_visc_stress) end if do i = 2, 4 - @:DEALLOCATE(R_u_stat(i)%sf) + @:DEALLOCATE(stat_reynolds_stress(i)%sf) end do - @:DEALLOCATE(R_u_stat) + @:DEALLOCATE(stat_reynolds_stress) do i = 2, 4 - @:DEALLOCATE(R_mu_stat(i)%sf) + @:DEALLOCATE(stat_eff_visc(i)%sf) end do - @:DEALLOCATE(R_mu_stat) + @:DEALLOCATE(stat_eff_visc) do i = 2, 4 - @:DEALLOCATE(F_IMET_stat(i)%sf) + @:DEALLOCATE(stat_int_mom_exch(i)%sf) end do - @:DEALLOCATE(F_IMET_stat) + @:DEALLOCATE(stat_int_mom_exch) ! Writing the footer of and closing the run-time information file if (proc_rank == 0 .and. run_time_info) then diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index adb102df35..6b1e981bfc 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -25,11 +25,11 @@ module m_volume_filtering implicit none private; public :: s_initialize_fftw_explicit_filter_module, & - s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, s_initialize_filtered_fluid_indicator_function, & + s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & s_finalize_fftw_explicit_filter_module, & s_apply_fftw_filter_cons, s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield, & s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & - s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_R_mu, s_compute_interphase_momentum_exchange_term + s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity, s_compute_interphase_momentum_exchange #if !defined(MFC_OpenACC) include 'fftw3.f03' @@ -38,9 +38,25 @@ module m_volume_filtering integer :: ierr ! fluid indicator function (1 = fluid, 0 = otherwise) - type(scalar_field), public :: fluid_indicator_function_I + type(scalar_field), public :: fluid_indicator_function + type(scalar_field) :: filtered_fluid_indicator_function - !$acc declare create(fluid_indicator_function_I) + ! volume filtered conservative variables + type(scalar_field), allocatable, dimension(:) :: q_cons_filtered + + ! unclosed terms in momentum eqn + type(scalar_field), allocatable, dimension(:) :: pres_visc_stress + type(vector_field), allocatable, dimension(:) :: reynolds_stress + type(vector_field), allocatable, dimension(:) :: eff_visc + type(scalar_field), allocatable, dimension(:) :: int_mom_exch + + ! magnitude of unclosed terms in momentum eqn + type(scalar_field) :: mag_reynolds_stress + type(scalar_field) :: mag_eff_visc + type(scalar_field) :: mag_int_mom_exch + + !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, q_cons_filtered) + !$acc declare create(pres_visc_stress, reynolds_stress, eff_visc, int_mom_exch, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) #if defined(MFC_OpenACC) ! GPU plans @@ -80,8 +96,72 @@ contains !< create fft plans to be used for explicit filtering of data subroutine s_initialize_fftw_explicit_filter_module + integer :: i, j, k integer :: size_n(1), inembed(1), onembed(1) + @:ALLOCATE(fluid_indicator_function%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(fluid_indicator_function) + + @:ALLOCATE(filtered_fluid_indicator_function%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(filtered_fluid_indicator_function) + + @:ALLOCATE(q_cons_filtered(1:sys_size)) + do i = 1, sys_size + @:ALLOCATE(q_cons_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(q_cons_filtered(i)) + end do + + @:ALLOCATE(reynolds_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(reynolds_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(reynolds_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(reynolds_stress(i)) + end do + + @:ALLOCATE(eff_visc(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(eff_visc(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(eff_visc(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(eff_visc(i)) + end do + + @:ALLOCATE(int_mom_exch(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(int_mom_exch(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(int_mom_exch(i)) + end do + + @:ALLOCATE(mag_reynolds_stress%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(mag_reynolds_stress) + + @:ALLOCATE(mag_eff_visc%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(mag_eff_visc) + + @:ALLOCATE(mag_int_mom_exch%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(mag_int_mom_exch) + !< global sizes Nx = m_glb + 1 Ny = n_glb + 1 @@ -320,41 +400,30 @@ contains ! return cmplx_kernelG1d: 1D z, x, y end subroutine s_initialize_filtering_kernel - !< initialize fluid indicator function + !< initialize fluid indicator function and filtered fluid indicator function subroutine s_initialize_fluid_indicator_function integer :: i, j, k - @:ALLOCATE(fluid_indicator_function_I%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(fluid_indicator_function_I) - ! define fluid indicator function !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m do j = 0, n do k = 0, p if (ib_markers%sf(i, j, k) == 0) then - fluid_indicator_function_I%sf(i, j, k) = 1.0_dp + fluid_indicator_function%sf(i, j, k) = 1.0_dp else - fluid_indicator_function_I%sf(i, j, k) = 0.0_dp + fluid_indicator_function%sf(i, j, k) = 0.0_dp end if end do end do end do - end subroutine s_initialize_fluid_indicator_function - - !< compute the filtered fluid indicator function counterpart - subroutine s_initialize_filtered_fluid_indicator_function(filtered_fluid_indicator_function) - type(scalar_field) :: filtered_fluid_indicator_function - - integer :: i, j, k - - ! filter fluid indicator function -> stored in q_cons_vf(advxb) + ! filter fluid indicator function !$acc parallel loop collapse(3) gang vector default(present) do i = 1, Nx do j = 1, Ny do k = 1, Nzloc - data_real_3D_slabz(i, j, k) = fluid_indicator_function_I%sf(i-1, j-1, k-1) + data_real_3D_slabz(i, j, k) = fluid_indicator_function%sf(i-1, j-1, k-1) end do end do end do @@ -381,7 +450,7 @@ contains end do end do - end subroutine s_initialize_filtered_fluid_indicator_function + end subroutine s_initialize_fluid_indicator_function !< apply the gaussian filter to the conservative variables and compute their filtered components subroutine s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) @@ -390,12 +459,26 @@ contains integer :: l - do l = 1, sys_size-1 - call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .true., q_cons_vf(l), q_cons_filtered(l)) + do l = contxb, momxe + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(l), q_cons_filtered(l)) end do end subroutine s_apply_fftw_filter_cons + !< calculate the unclosed terms present in the volume filtered momentum equation + subroutine s_volume_filter_momentum_eqn(q_cons_vf) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + integer :: i, j, k + + call s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) + call s_setup_terms_filtering(q_cons_vf, reynolds_stress, eff_visc) + call s_apply_fftw_filter_tensor(reynolds_stress, eff_visc, q_cons_filtered, pres_visc_stress, int_mom_exch) + call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) + call s_compute_effective_viscosity(q_cons_filtered, eff_visc, mag_eff_visc) + call s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) + + end subroutine s_volume_filter_momentum_eqn + !< applies the gaussian filter to an arbitrary scalar field subroutine s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, fluid_quantity, q_temp_in, q_temp_out) type(scalar_field), intent(in) :: filtered_fluid_indicator_function @@ -411,7 +494,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - data_real_3D_slabz(i+1, j+1, k+1) = q_temp_in%sf(i, j, k) * fluid_indicator_function_I%sf(i, j, k) + data_real_3D_slabz(i+1, j+1, k+1) = q_temp_in%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) end do end do end do @@ -420,7 +503,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - data_real_3D_slabz(i+1, j+1, k+1) = q_temp_in%sf(i, j, k) * (1.0_dp - fluid_indicator_function_I%sf(i, j, k)) + data_real_3D_slabz(i+1, j+1, k+1) = q_temp_in%sf(i, j, k) * (1.0_dp - fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -462,36 +545,77 @@ contains end subroutine s_apply_fftw_filter_scalarfield !< apply the gaussian filter to the requisite tensors to compute unclosed terms of interest - subroutine s_apply_fftw_filter_tensor(pt_Re_stress, R_mu, q_cons_filtered, div_pres_visc_stress, pres_visc_stress_filtered) - type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress - type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu + subroutine s_apply_fftw_filter_tensor(reynolds_stress, eff_visc, q_cons_filtered, pres_visc_stress, int_mom_exch) + type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered - type(scalar_field), dimension(momxb:momxe), intent(inout) :: div_pres_visc_stress - type(scalar_field), dimension(1:num_dims), intent(inout) :: pres_visc_stress_filtered + type(scalar_field), dimension(momxb:momxe), intent(inout) :: pres_visc_stress + type(scalar_field), dimension(1:num_dims), intent(inout) :: int_mom_exch integer :: i, j, k, l, q ! pseudo turbulent reynolds stress do l = 1, num_dims do q = 1, num_dims - call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .true., pt_Re_stress(l)%vf(q)) + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(l)%vf(q)) end do end do ! effective viscosity do l = 1, num_dims do q = 1, num_dims - call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .true., R_mu(l)%vf(q)) + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., eff_visc(l)%vf(q)) end do end do ! interphase momentum exchange do l = 1, num_dims - call s_apply_fftw_filter_scalarfield(q_cons_filtered(advxb), .false., div_pres_visc_stress(momxb-1+l), pres_visc_stress_filtered(l)) + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .false., pres_visc_stress(momxb-1+l), int_mom_exch(l)) end do end subroutine s_apply_fftw_filter_tensor + ! compute pressure and viscous stress tensors + subroutine s_compute_stress_tensor(q_cons_vf) + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + real(wp) :: dudx, dudy, dudz, dvdx, dvdy, dvdz, dqdx, dwdy, dwdz ! spatial velocity derivatives + integer :: i, j, k + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + ! local to each process + dudx = ( q_cons_vf(2)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(2)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dudy = ( q_cons_vf(2)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(2)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dudz = ( q_cons_vf(2)%vf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + dvdx = ( q_cons_vf(3)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(3)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dvdy = ( q_cons_vf(3)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(3)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dvdz = ( q_cons_vf(3)%vf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(3)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + dwdx = ( q_cons_vf(4)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(4)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dwdy = ( q_cons_vf(4)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(4)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dwdz = ( q_cons_vf(4)%vf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(4)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + ! viscous stress tensor, tau(row, column) + tau(1)%vf(1) = mu * (4._wp/3._wp * dudx - 2._wp/3._wp * (dvdy + dwdz)) + tau(1)%vf(2) = mu * (dudy + dvdx) + tau(1)%vf(3) = mu * (dudz + dwdx) + tau(2)%vf(1) = mu * (dvdx + dudy) + tau(2)%vf(2) = mu * (4._wp/3._wp * dvdy - 2._wp/3._wp * (dudx + dwdz)) + tau(2)%vf(3) = mu * (dvdz + dwdy) + tau(3)%vf(1) = mu * (dwdx + dudz) + tau(3)%vf(2) = mu * (dwdy + dvdz) + tau(3)%vf(3) = mu * (4._wp/3._wp * dwdz - 2._wp/3._wp * (dudx + dvdy)) + + + end do + end do + end do + + end subroutine s_compute_stress_tensor + !< transpose domain from z-slabs to y-slabs on each processor subroutine s_mpi_transpose_slabZ2Y complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) @@ -708,10 +832,10 @@ contains end subroutine s_mpi_FFT_bwd !< setup for calculation of unclosed terms in volume filtered momentum eqn - subroutine s_setup_terms_filtering(q_cons_vf, pt_Re_stress, R_mu) + subroutine s_setup_terms_filtering(q_cons_vf, reynolds_stress, eff_visc) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress - type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu + type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc integer :: i, j, k, l, q @@ -724,7 +848,7 @@ contains do l = 1, num_dims !$acc loop seq do q = 1, num_dims - pt_Re_stress(l)%vf(q)%sf(i, j, k) = (q_cons_vf(momxb-1+l)%sf(i, j, k) * q_cons_vf(momxb-1+q)%sf(i, j, k)) / q_cons_vf(1)%sf(i, j, k) ! (rho*u x rho*u)/rho = rho*(u x u) + reynolds_stress(l)%vf(q)%sf(i, j, k) = (q_cons_vf(momxb-1+l)%sf(i, j, k) * q_cons_vf(momxb-1+q)%sf(i, j, k)) / q_cons_vf(1)%sf(i, j, k) ! (rho*u x rho*u)/rho = rho*(u x u) end do end do end do @@ -749,50 +873,50 @@ contains end do #endif - ! R_mu setup + ! effective viscosity setup !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m do j = 0, n do k = 0, p - R_mu(1)%vf(1)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + eff_visc(1)%vf(1)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - R_mu(2)%vf(2)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + eff_visc(2)%vf(2)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - R_mu(3)%vf(3)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)) & + eff_visc(3)%vf(3)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)) & - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - R_mu(1)%vf(2)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k) & + eff_visc(1)%vf(2)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k) & + (q_cons_vf(momxb+1)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb+1)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_vf(1)%sf(i, j, k)) - R_mu(2)%vf(1)%sf(i, j, k) = R_mu(1)%vf(2)%sf(i, j, k) + eff_visc(2)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(2)%sf(i, j, k) - R_mu(1)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & + eff_visc(1)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & + (q_cons_vf(momxb+2)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb+2)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_vf(1)%sf(i, j, k)) - R_mu(3)%vf(1)%sf(i, j, k) = R_mu(1)%vf(3)%sf(i, j, k) + eff_visc(3)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(3)%sf(i, j, k) - R_mu(2)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb+1)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+1)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & + eff_visc(2)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb+1)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+1)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & + (q_cons_vf(momxb+2)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+2)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k)) - R_mu(3)%vf(2)%sf(i, j, k) = R_mu(2)%vf(3)%sf(i, j, k) + eff_visc(3)%vf(2)%sf(i, j, k) = eff_visc(2)%vf(3)%sf(i, j, k) end do end do end do end subroutine s_setup_terms_filtering - subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, pt_Re_stress, mag_div_Ru) + subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered - type(vector_field), dimension(1:num_dims), intent(inout) :: pt_Re_stress - type(scalar_field), intent(inout) :: mag_div_Ru + type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + type(scalar_field), intent(inout) :: mag_reynolds_stress real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_Ru integer :: i, j, k, l, q @@ -804,7 +928,7 @@ contains do l = 1, num_dims !$acc loop seq do q = 1, num_dims - pt_Re_stress(l)%vf(q)%sf(i, j, k) = pt_Re_stress(l)%vf(q)%sf(i, j, k) & + reynolds_stress(l)%vf(q)%sf(i, j, k) = reynolds_stress(l)%vf(q)%sf(i, j, k) & - (q_cons_filtered(momxb-1+l)%sf(i, j, k) * q_cons_filtered(momxb-1+q)%sf(i, j, k) / q_cons_filtered(1)%sf(i, j, k)) end do end do @@ -820,7 +944,7 @@ contains do l = 1, num_dims !$acc loop seq do q = 1, num_dims - pt_Re_stress(l)%vf(q)%sf(i, j, k) = pt_Re_stress(l)%vf(q)%sf(i, j, k) * q_cons_filtered(advxb)%sf(i, j, k) + reynolds_stress(l)%vf(q)%sf(i, j, k) = reynolds_stress(l)%vf(q)%sf(i, j, k) * filtered_fluid_indicator_function%sf(i, j, k) end do end do end do @@ -831,20 +955,20 @@ contains #ifdef MFC_MPI do l = 1, num_dims do q = 1, num_dims - call s_populate_scalarfield_buffers(pt_Re_stress(l)%vf(q)) + call s_populate_scalarfield_buffers(reynolds_stress(l)%vf(q)) end do end do #else do l = 1, num_dims do q = 1, num_dims - pt_Re_stress(l)%vf(q)%sf(-buff_size:-1, :, :) = pt_Re_stress(l)%vf(q)%sf(m-buff_size+1:m, :, :) - pt_Re_stress(l)%vf(q)%sf(m+1:m+buff_size, :, :) = pt_Re_stress(l)%vf(q)%sf(0:buff_size-1, :, :) + reynolds_stress(l)%vf(q)%sf(-buff_size:-1, :, :) = reynolds_stress(l)%vf(q)%sf(m-buff_size+1:m, :, :) + reynolds_stress(l)%vf(q)%sf(m+1:m+buff_size, :, :) = reynolds_stress(l)%vf(q)%sf(0:buff_size-1, :, :) - pt_Re_stress(l)%vf(q)%sf(:, -buff_size:-1, :) = pt_Re_stress(l)%vf(q)%sf(:, n-buff_size+1:n, :) - pt_Re_stress(l)%vf(q)%sf(:, n+1:n+buff_size, :) = pt_Re_stress(l)%vf(q)%sf(:, 0:buff_size-1, :) + reynolds_stress(l)%vf(q)%sf(:, -buff_size:-1, :) = reynolds_stress(l)%vf(q)%sf(:, n-buff_size+1:n, :) + reynolds_stress(l)%vf(q)%sf(:, n+1:n+buff_size, :) = reynolds_stress(l)%vf(q)%sf(:, 0:buff_size-1, :) - pt_Re_stress(l)%vf(q)%sf(:, :, -buff_size:-1) = pt_Re_stress(l)%vf(q)%sf(:, :, p-buff_size+1:p) - pt_Re_stress(l)%vf(q)%sf(:, :, p+1:p+buff_size) = pt_Re_stress(l)%vf(q)%sf(:, :, 0:buff_size-1) + reynolds_stress(l)%vf(q)%sf(:, :, -buff_size:-1) = reynolds_stress(l)%vf(q)%sf(:, :, p-buff_size+1:p) + reynolds_stress(l)%vf(q)%sf(:, :, p+1:p+buff_size) = reynolds_stress(l)%vf(q)%sf(:, :, 0:buff_size-1) end do end do #endif @@ -856,9 +980,9 @@ contains do k = 0, p !$acc loop seq do l = 1, num_dims - div_Ru(l, i, j, k) = (pt_Re_stress(l)%vf(1)%sf(i+1, j, k) - pt_Re_stress(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (pt_Re_stress(l)%vf(2)%sf(i, j+1, k) - pt_Re_stress(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (pt_Re_stress(l)%vf(3)%sf(i, j, k+1) - pt_Re_stress(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) + div_Ru(l, i, j, k) = (reynolds_stress(l)%vf(1)%sf(i+1, j, k) - reynolds_stress(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (reynolds_stress(l)%vf(2)%sf(i, j+1, k) - reynolds_stress(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (reynolds_stress(l)%vf(3)%sf(i, j, k+1) - reynolds_stress(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) end do end do end do @@ -868,18 +992,18 @@ contains do i = 0, m do j = 0, n do k = 0, p - mag_div_Ru%sf(i, j, k) = sqrt(div_Ru(1, i, j, k)**2 + div_Ru(2, i, j, k)**2 + div_Ru(3, i, j, k)**2) + mag_reynolds_stress%sf(i, j, k) = sqrt(div_Ru(1, i, j, k)**2 + div_Ru(2, i, j, k)**2 + div_Ru(3, i, j, k)**2) end do end do end do end subroutine s_compute_pseudo_turbulent_reynolds_stress - subroutine s_compute_R_mu(q_cons_filtered, R_mu, mag_div_R_mu) + subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, mag_eff_visc) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered - type(vector_field), dimension(1:num_dims), intent(inout) :: R_mu - type(scalar_field), intent(inout) :: mag_div_R_mu - real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_R_mu + type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc + type(scalar_field), intent(inout) :: mag_eff_visc + real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_eff_visc integer :: i, j, k, l, q @@ -901,40 +1025,40 @@ contains end do #endif - ! calculate R_mu + ! calculate eff_visc !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m do j = 0, n do k = 0, p - R_mu(1)%vf(1)%sf(i, j, k) = R_mu(1)%vf(1)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + eff_visc(1)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(1)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - R_mu(2)%vf(2)%sf(i, j, k) = R_mu(2)%vf(2)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + eff_visc(2)%vf(2)%sf(i, j, k) = eff_visc(2)%vf(2)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - R_mu(3)%vf(3)%sf(i, j, k) = R_mu(3)%vf(3)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)) & + eff_visc(3)%vf(3)%sf(i, j, k) = eff_visc(3)%vf(3)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)) & - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - R_mu(1)%vf(2)%sf(i, j, k) = R_mu(1)%vf(2)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k) & + eff_visc(1)%vf(2)%sf(i, j, k) = eff_visc(1)%vf(2)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k) & + (q_cons_filtered(momxb+1)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb+1)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_filtered(1)%sf(i, j, k)) - R_mu(2)%vf(1)%sf(i, j, k) = R_mu(1)%vf(2)%sf(i, j, k) + eff_visc(2)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(2)%sf(i, j, k) - R_mu(1)%vf(3)%sf(i, j, k) = R_mu(1)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & + eff_visc(1)%vf(3)%sf(i, j, k) = eff_visc(1)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & + (q_cons_filtered(momxb+2)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb+2)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_filtered(1)%sf(i, j, k)) - R_mu(3)%vf(1)%sf(i, j, k) = R_mu(1)%vf(3)%sf(i, j, k) + eff_visc(3)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(3)%sf(i, j, k) - R_mu(2)%vf(3)%sf(i, j, k) = R_mu(2)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb+1)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+1)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & + eff_visc(2)%vf(3)%sf(i, j, k) = eff_visc(2)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb+1)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+1)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & + (q_cons_filtered(momxb+2)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+2)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k)) - R_mu(3)%vf(2)%sf(i, j, k) = R_mu(2)%vf(3)%sf(i, j, k) + eff_visc(3)%vf(2)%sf(i, j, k) = eff_visc(2)%vf(3)%sf(i, j, k) end do end do @@ -948,7 +1072,7 @@ contains do l = 1, num_dims !$acc loop seq do q = 1, num_dims - R_mu(l)%vf(q)%sf(i, j, k) = R_mu(l)%vf(q)%sf(i, j, k) * q_cons_filtered(advxb)%sf(i, j, k) + eff_visc(l)%vf(q)%sf(i, j, k) = eff_visc(l)%vf(q)%sf(i, j, k) * filtered_fluid_indicator_function%sf(i, j, k) end do end do end do @@ -959,53 +1083,53 @@ contains #ifdef MFC_MPI do l = 1, num_dims do q = 1, num_dims - call s_populate_scalarfield_buffers(R_mu(l)%vf(q)) + call s_populate_scalarfield_buffers(eff_visc(l)%vf(q)) end do end do #else do l = 1, num_dims do q = 1, num_dims - R_mu(l)%vf(q)%sf(-buff_size:-1, :, :) = R_mu(l)%vf(q)%sf(m-buff_size+1:m, :, :) - R_mu(l)%vf(q)%sf(m+1:m+buff_size, :, :) = R_mu(l)%vf(q)%sf(0:buff_size-1, :, :) + eff_visc(l)%vf(q)%sf(-buff_size:-1, :, :) = eff_visc(l)%vf(q)%sf(m-buff_size+1:m, :, :) + eff_visc(l)%vf(q)%sf(m+1:m+buff_size, :, :) = eff_visc(l)%vf(q)%sf(0:buff_size-1, :, :) - R_mu(l)%vf(q)%sf(:, -buff_size:-1, :) = R_mu(l)%vf(q)%sf(:, n-buff_size+1:n, :) - R_mu(l)%vf(q)%sf(:, n+1:n+buff_size, :) = R_mu(l)%vf(q)%sf(:, 0:buff_size-1, :) + eff_visc(l)%vf(q)%sf(:, -buff_size:-1, :) = eff_visc(l)%vf(q)%sf(:, n-buff_size+1:n, :) + eff_visc(l)%vf(q)%sf(:, n+1:n+buff_size, :) = eff_visc(l)%vf(q)%sf(:, 0:buff_size-1, :) - R_mu(l)%vf(q)%sf(:, :, -buff_size:-1) = R_mu(l)%vf(q)%sf(:, :, p-buff_size+1:p) - R_mu(l)%vf(q)%sf(:, :, p+1:p+buff_size) = R_mu(l)%vf(q)%sf(:, :, 0:buff_size-1) + eff_visc(l)%vf(q)%sf(:, :, -buff_size:-1) = eff_visc(l)%vf(q)%sf(:, :, p-buff_size+1:p) + eff_visc(l)%vf(q)%sf(:, :, p+1:p+buff_size) = eff_visc(l)%vf(q)%sf(:, :, 0:buff_size-1) end do end do #endif - ! div(R_mu), using CD2 FD scheme - !$acc parallel loop collapse(3) gang vector default(present) copy(div_R_mu) + ! div(eff_visc), using CD2 FD scheme + !$acc parallel loop collapse(3) gang vector default(present) copy(div_eff_visc) do i = 0, m do j = 0, n do k = 0, p !$acc loop seq do l = 1, num_dims - div_R_mu(l, i, j, k) = (R_mu(l)%vf(1)%sf(i+1, j, k) - R_mu(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (R_mu(l)%vf(2)%sf(i, j+1, k) - R_mu(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (R_mu(l)%vf(3)%sf(i, j, k+1) - R_mu(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) + div_eff_visc(l, i, j, k) = (eff_visc(l)%vf(1)%sf(i+1, j, k) - eff_visc(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & + + (eff_visc(l)%vf(2)%sf(i, j+1, k) - eff_visc(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (eff_visc(l)%vf(3)%sf(i, j, k+1) - eff_visc(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) end do end do end do end do - !$acc parallel loop collapse(3) gang vector default(present) copyin(div_R_mu) + !$acc parallel loop collapse(3) gang vector default(present) copyin(div_eff_visc) do i = 0, m do j = 0, n do k = 0, p - mag_div_R_mu%sf(i, j, k) = sqrt(div_R_mu(1, i, j, k)**2 + div_R_mu(2, i, j, k)**2 + div_R_mu(3, i, j, k)**2) + mag_eff_visc%sf(i, j, k) = sqrt(div_eff_visc(1, i, j, k)**2 + div_eff_visc(2, i, j, k)**2 + div_eff_visc(3, i, j, k)**2) end do end do end do - end subroutine s_compute_R_mu + end subroutine s_compute_effective_viscosity - subroutine s_compute_interphase_momentum_exchange_term(pres_visc_stress_filtered, mag_F_IMET) - type(scalar_field), dimension(1:num_dims), intent(in) :: pres_visc_stress_filtered - type(scalar_field), intent(inout) :: mag_F_IMET + subroutine s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) + type(scalar_field), dimension(1:num_dims), intent(in) :: int_mom_exch + type(scalar_field), intent(inout) :: mag_int_mom_exch integer :: i, j, k, l, q, ii @@ -1013,17 +1137,48 @@ contains do i = 0, m do j = 0, n do k = 0, p - mag_F_IMET%sf(i, j, k) = sqrt(pres_visc_stress_filtered(1)%sf(i, j, k)**2 & - + pres_visc_stress_filtered(2)%sf(i, j, k)**2 & - + pres_visc_stress_filtered(3)%sf(i, j, k)**2) + mag_int_mom_exch%sf(i, j, k) = sqrt(int_mom_exch(1)%sf(i, j, k)**2 & + + int_mom_exch(2)%sf(i, j, k)**2 & + + int_mom_exch(3)%sf(i, j, k)**2) end do end do end do - end subroutine s_compute_interphase_momentum_exchange_term + end subroutine s_compute_interphase_momentum_exchange subroutine s_finalize_fftw_explicit_filter_module - @:DEALLOCATE(fluid_indicator_function_I%sf) + @:DEALLOCATE(fluid_indicator_function%sf) + @:DEALLOCATE(filtered_fluid_indicator_function%sf) + + do i = 1, sys_size + @:DEALLOCATE(q_cons_filtered(i)%sf) + end do + @:DEALLOCATE(q_cons_filtered) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(reynolds_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(reynolds_stress(i)%vf) + end do + @:DEALLOCATE(reynolds_stress) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(eff_visc(i)%vf(j)%sf) + end do + @:DEALLOCATE(eff_visc(i)%vf) + end do + @:DEALLOCATE(eff_visc) + + do i = 1, num_dims + @:DEALLOCATE(int_mom_exch(i)%sf) + end do + @:DEALLOCATE(int_mom_exch) + + @:DEALLOCATE(mag_reynolds_stress%sf) + @:DEALLOCATE(mag_eff_visc%sf) + @:DEALLOCATE(mag_int_mom_exch%sf) @:DEALLOCATE(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 7f07744d8b..4c3ae9b62b 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -56,9 +56,8 @@ program p_main call s_initialize_gpu_vars() call nvtxEndRange + if (volume_filtering_momentum_eqn) call s_initialize_filtering_kernel() call s_initialize_fluid_indicator_function() - if (fourier_transform_filtering) call s_initialize_filtering_kernel() - if (fourier_transform_filtering) call s_initialize_filtered_fluid_indicator_function(q_cons_filtered(advxb)) ! Setting the time-step iterator to the first time-step if (cfl_dt) then diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index a58d29869f..212d7a6cb6 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -306,7 +306,7 @@ def analytic(self): 'rho_inf_ref': ParamType.REAL, 'T_inf_ref': ParamType.REAL, 'periodic_forcing': ParamType.LOG, - 'fourier_transform_filtering': ParamType.LOG, + 'volume_filtering_momentum_eqn': ParamType.LOG, 'compute_autocorrelation': ParamType.LOG, }) From c35d5778c64ef7ca010b2ed7e0e6e2e67c2f698a Mon Sep 17 00:00:00 2001 From: conradd3 Date: Mon, 25 Aug 2025 11:33:52 -0500 Subject: [PATCH 06/30] statistics verification -> test gpus --- runs/3d_1sphere_filtering/case.py | 9 +- runs/3d_1sphere_periodic/case.py | 1 - runs/3d_drag_test/case.py | 1 - .../{ => centered}/case.py | 35 +- runs/3d_periodic_ibs_test/off-center/case.py | 146 +++ runs/phi01/case.py | 1 - src/common/m_mpi_common.fpp | 6 +- src/simulation/m_compute_statistics.fpp | 258 +++--- src/simulation/m_global_parameters.fpp | 6 +- src/simulation/m_mpi_proxy.fpp | 4 +- src/simulation/m_rhs.fpp | 127 +-- src/simulation/m_start_up.fpp | 38 +- src/simulation/m_time_steppers.fpp | 100 +- src/simulation/m_volume_filtering.fpp | 865 ++++++++++-------- src/simulation/p_main.fpp | 5 +- toolchain/mfc/run/case_dicts.py | 2 +- 16 files changed, 832 insertions(+), 772 deletions(-) rename runs/3d_periodic_ibs_test/{ => centered}/case.py (82%) create mode 100644 runs/3d_periodic_ibs_test/off-center/case.py diff --git a/runs/3d_1sphere_filtering/case.py b/runs/3d_1sphere_filtering/case.py index 0964ea5dd4..c6d138c110 100644 --- a/runs/3d_1sphere_filtering/case.py +++ b/runs/3d_1sphere_filtering/case.py @@ -25,8 +25,8 @@ #print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow dt = 4.0E-06 -Nt = 31 -t_save = 1 +Nt = 1000 +t_save = 10 Nx = 63 Ny = 63 @@ -65,6 +65,7 @@ "t_step_start": 0, "t_step_stop": Nt, # 3000 "t_step_save": t_save, # 10 + "t_step_stat_start": 100, # Simulation Algorithm Parameters # Only one patches are necessary, the air tube "num_patches": 1, @@ -132,18 +133,16 @@ # Fluids Physical Parameters "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) "fluid_pp(1)%pi_inf": 0, - "fluid_pp(1)%Re(1)": Re, + "fluid_pp(1)%Re(1)": 1.0 / mu, # new case additions "periodic_forcing": "T", "periodic_ibs": "T", - "compute_CD": "F", "volume_filtering_momentum_eqn": "T", "u_inf_ref": v1, "rho_inf_ref": rho, "T_inf_ref": T, - "mu_visc": mu, "store_levelset": "F", "slab_domain_decomposition": "T", diff --git a/runs/3d_1sphere_periodic/case.py b/runs/3d_1sphere_periodic/case.py index f4512b5f00..d8760b7909 100644 --- a/runs/3d_1sphere_periodic/case.py +++ b/runs/3d_1sphere_periodic/case.py @@ -143,7 +143,6 @@ "u_inf_ref": v1, "rho_inf_ref": rho, "T_inf_ref": T, - "mu_visc": mu, "store_levelset": "F", "slab_domain_decomposition": "T", diff --git a/runs/3d_drag_test/case.py b/runs/3d_drag_test/case.py index 2eb50ebc62..9a78272a88 100644 --- a/runs/3d_drag_test/case.py +++ b/runs/3d_drag_test/case.py @@ -134,7 +134,6 @@ # new case additions "compute_CD": "T", - "mu_visc": mu, "u_inf_ref": v1, "rho_inf_ref": rho, "T_inf_ref": T, diff --git a/runs/3d_periodic_ibs_test/case.py b/runs/3d_periodic_ibs_test/centered/case.py similarity index 82% rename from runs/3d_periodic_ibs_test/case.py rename to runs/3d_periodic_ibs_test/centered/case.py index 9a63a3f4a3..a3199b3933 100644 --- a/runs/3d_periodic_ibs_test/case.py +++ b/runs/3d_periodic_ibs_test/centered/case.py @@ -15,7 +15,7 @@ M = 1.2 Re = 1500.0 -v1 = M*(gam_a*P/rho)**(1.0/2.0) +v1 = M*np.sqrt((gam_a*P/rho)) mu = rho*v1*D/Re # dynamic viscosity for current case @@ -25,8 +25,8 @@ #print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow dt = 4.0E-06 -Nt = 5 -t_save = 1 +Nt = 100 +t_save = 10 Nx = 63 Ny = 63 @@ -36,25 +36,11 @@ ib_dict = {} ib_dict.update({ f"patch_ib({1})%geometry": 8, - f"patch_ib({1})%x_centroid": 0.5, - f"patch_ib({1})%y_centroid": 0.5, - f"patch_ib({1})%z_centroid": 0.5, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, f"patch_ib({1})%radius": D / 2, f"patch_ib({1})%slip": "F", - - f"patch_ib({2})%geometry": 8, - f"patch_ib({2})%x_centroid": 0.0, - f"patch_ib({2})%y_centroid": 0.0, - f"patch_ib({2})%z_centroid": 0.0, - f"patch_ib({2})%radius": D / 2, - f"patch_ib({2})%slip": "F", - - f"patch_ib({3})%geometry": 8, - f"patch_ib({3})%x_centroid": 0.0, - f"patch_ib({3})%y_centroid": 0.5, - f"patch_ib({3})%z_centroid": 0.25, - f"patch_ib({3})%radius": D / 2, - f"patch_ib({3})%slip": "F", }) # Configuring case dictionary @@ -116,7 +102,7 @@ "bc_z%end": -1, # Set IB to True and add 1 patch "ib": "T", - "num_ibs": 3, + "num_ibs": 1, "viscous": "T", # Formatted Database Files Structure Parameters "format": 1, @@ -144,10 +130,15 @@ # Fluids Physical Parameters "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) "fluid_pp(1)%pi_inf": 0, - "fluid_pp(1)%Re(1)": Re, + "fluid_pp(1)%Re(1)": 1.0 / mu, # new case additions + "periodic_forcing": "T", "periodic_ibs": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, } case_dict.update(ib_dict) diff --git a/runs/3d_periodic_ibs_test/off-center/case.py b/runs/3d_periodic_ibs_test/off-center/case.py new file mode 100644 index 0000000000..ecd3d7c9f5 --- /dev/null +++ b/runs/3d_periodic_ibs_test/off-center/case.py @@ -0,0 +1,146 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 10 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 15.0 * D, + f"patch_ib({1})%y_centroid": 15.0 * D, + f"patch_ib({1})%z_centroid": 15.0 * D, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": 5.0 * D, + "x_domain%end": 15.0 * D, + # y direction + "y_domain%beg": 5.0 * D, + "y_domain%end": 15.0 * D, + # z direction + "z_domain%beg": 5.0 * D, + "z_domain%end": 15.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 10.0*D, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 10.0*D, + "patch_icpp(1)%z_centroid": 10.0*D, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": 1.0 / mu, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/phi01/case.py b/runs/phi01/case.py index 8e7a5bff4b..1d5b26d462 100644 --- a/runs/phi01/case.py +++ b/runs/phi01/case.py @@ -148,7 +148,6 @@ "u_inf_ref": v1, "rho_inf_ref": rho, "T_inf_ref": T, - "mu_visc": mu, "store_levelset": "F", "slab_domain_decomposition": "T", diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 662d096665..d3dcab1ac7 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -1569,8 +1569,10 @@ contains #ifdef MFC_MPI deallocate (buff_send, buff_recv) #ifdef MFC_SIMULATION - @:DEALLOCATE(buff_send_scalarfield) - @:DEALLOCATE(buff_recv_scalarfield) + if (volume_filtering_momentum_eqn) then + @:DEALLOCATE(buff_send_scalarfield) + @:DEALLOCATE(buff_recv_scalarfield) + end if #endif #endif diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp index 1721b0706c..93b8d6502d 100644 --- a/src/simulation/m_compute_statistics.fpp +++ b/src/simulation/m_compute_statistics.fpp @@ -12,165 +12,169 @@ module m_compute_statistics implicit none private; public :: s_initialize_statistics_module, s_finalize_statistics_module, & - s_compute_s_order_statistics, s_autocorrelation_function + s_compute_statistics_momentum_unclosed_terms, s_update_statistics, & + s_compute_234_order_statistics + + ! terms for computing 1st, 2nd, 3rd, and 4th order statistical moments + type(scalar_field), allocatable, dimension(:) :: Msn_reynolds_stress + type(scalar_field), allocatable, dimension(:) :: Msn_eff_visc + type(scalar_field), allocatable, dimension(:) :: Msn_int_mom_exch - type(scalar_field), allocatable, dimension(:) :: xnbar_stat + ! 2nd, 3rd, and 4th statistical moments for unclosed terms in volume filtered momentum equation + type(scalar_field), allocatable, dimension(:), public :: stat_reynolds_stress + type(scalar_field), allocatable, dimension(:), public :: stat_eff_visc + type(scalar_field), allocatable, dimension(:), public :: stat_int_mom_exch - type(scalar_field), allocatable, dimension(:) :: delta_stat + !$acc declare create(Msn_reynolds_stress, Msn_eff_visc, Msn_int_mom_exch) - type(vector_field), allocatable, dimension(:) :: Msn_stat - - real(wp), allocatable, dimension(:) :: xm_th - - real(wp), allocatable, dimension(:) :: x_mom_autocorr - - !$acc declare create(xnbar_stat, delta_stat, Msn_stat) + !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) contains subroutine s_initialize_statistics_module - integer :: i, j - @:ALLOCATE(xnbar_stat(1:3)) - do i = 1, 3 - @:ALLOCATE(xnbar_stat(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(xnbar_stat(i)) + integer :: i + + @:ALLOCATE(Msn_reynolds_stress(1:4)) + do i = 1, 4 + @:ALLOCATE(Msn_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(Msn_reynolds_stress(i)) end do - @:ALLOCATE(delta_stat(1:3)) - do i = 1, 3 - @:ALLOCATE(delta_stat(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(delta_stat(i)) + @:ALLOCATE(Msn_eff_visc(1:4)) + do i = 1, 4 + @:ALLOCATE(Msn_eff_visc(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(Msn_eff_visc(i)) end do - @:ALLOCATE(Msn_stat(1:num_dims)) - do i = 1, 3 - @:ALLOCATE(Msn_stat(i)%vf(2:4)) + @:ALLOCATE(Msn_int_mom_exch(1:4)) + do i = 1, 4 + @:ALLOCATE(Msn_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(Msn_int_mom_exch(i)) end do - do i = 1, 3 - do j = 2, 4 - @:ALLOCATE(Msn_stat(i)%vf(j)%sf(0:m, 0:n, 0:p)) - end do - @:ACC_SETUP_VFs(Msn_stat(i)) + + @:ALLOCATE(stat_reynolds_stress(2:4)) + do i = 2, 4 + @:ALLOCATE(stat_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_reynolds_stress(i)) end do - if (compute_autocorrelation) then - @:ALLOCATE(xm_th(t_step_stop)) - @:ALLOCATE(x_mom_autocorr(t_step_stop)) - end if + @:ALLOCATE(stat_eff_visc(2:4)) + do i = 2, 4 + @:ALLOCATE(stat_eff_visc(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_eff_visc(i)) + end do + + @:ALLOCATE(stat_int_mom_exch(2:4)) + do i = 2, 4 + @:ALLOCATE(stat_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_int_mom_exch(i)) + end do end subroutine s_initialize_statistics_module - subroutine s_compute_s_order_statistics(q_temp, n_step, s_order_stat, id) - type(scalar_field), intent(in) :: q_temp + subroutine s_compute_statistics_momentum_unclosed_terms(n_step, reynolds_stress, eff_visc, int_mom_exch) + type(scalar_field), intent(in) :: reynolds_stress + type(scalar_field), intent(in) :: eff_visc + type(scalar_field), intent(in) :: int_mom_exch + integer, intent(in) :: n_step - type(scalar_field), dimension(2:4), intent(inout) :: s_order_stat - integer, intent(in) :: id - real(wp) :: ns - integer :: i, j, k, ii + real(wp) :: ns ns = real(n_step, wp) - if (n_step == 1) then - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - xnbar_stat(id)%sf(i, j, k) = q_temp%sf(i, j, k) - Msn_stat(id)%vf(2)%sf(i, j, k) = 0.0_wp - Msn_stat(id)%vf(3)%sf(i, j, k) = 0.0_wp - Msn_stat(id)%vf(4)%sf(i, j, k) = 0.0_wp - s_order_stat(2)%sf(i, j, k) = 0.0_wp - s_order_stat(3)%sf(i, j, k) = 0.0_wp - s_order_stat(4)%sf(i, j, k) = 0.0_wp - end do - end do - end do - else - !$acc parallel loop collapse(3) gang vector default(present) copyin(ns) - do i = 0, m - do j = 0, n - do k = 0, p - delta_stat(id)%sf(i, j, k) = q_temp%sf(i, j, k) - xnbar_stat(id)%sf(i, j, k) - - xnbar_stat(id)%sf(i, j, k) = xnbar_stat(id)%sf(i, j, k) + delta_stat(id)%sf(i, j, k)/ns - - Msn_stat(id)%vf(4)%sf(i, j, k) = Msn_stat(id)%vf(4)%sf(i, j, k) & - + (delta_stat(id)%sf(i, j, k)**4)*(ns - 1.0_wp)*(ns**2 - 3.0_wp*ns + 3.0_wp)/(ns**3) & - + 6.0_wp*(delta_stat(id)%sf(i, j, k)**2)*Msn_stat(id)%vf(2)%sf(i, j, k)/(ns**2) & - - 4.0_wp*delta_stat(id)%sf(i, j, k)*Msn_stat(id)%vf(3)%sf(i, j, k)/ns - - Msn_stat(id)%vf(3)%sf(i, j, k) = Msn_stat(id)%vf(3)%sf(i, j, k) & - + (delta_stat(id)%sf(i, j, k)**3)*(ns - 1.0_wp)*(ns - 2.0_wp)/(ns**2) & - - 3.0_wp*delta_stat(id)%sf(i, j, k)*Msn_stat(id)%vf(2)%sf(i, j, k)/ns - - Msn_stat(id)%vf(2)%sf(i, j, k) = Msn_stat(id)%vf(2)%sf(i, j, k) & - + (delta_stat(id)%sf(i, j, k)**2)*(ns - 1.0_wp)/ns - - s_order_stat(2)%sf(i, j, k) = Msn_stat(id)%vf(2)%sf(i, j, k)/(ns - 1.0_wp) - - s_order_stat(3)%sf(i, j, k) = sqrt(ns)*Msn_stat(id)%vf(3)%sf(i, j, k)/(Msn_stat(id)%vf(2)%sf(i, j, k)**1.5_wp) - - s_order_stat(4)%sf(i, j, k) = ns*Msn_stat(id)%vf(4)%sf(i, j, k)/(Msn_stat(id)%vf(2)%sf(i, j, k)**2) - 3.0_wp - end do - end do - end do + ! update M1, M2, M3, M4 + call s_update_statistics(ns, reynolds_stress, Msn_reynolds_stress) + call s_update_statistics(ns, eff_visc, Msn_eff_visc) + call s_update_statistics(ns, int_mom_exch, Msn_int_mom_exch) + + ! compute 2nd, 3rd, 4th order statistical moments + if (n_step > 3) then + call s_compute_234_order_statistics(ns, Msn_reynolds_stress, stat_reynolds_stress) + call s_compute_234_order_statistics(ns, Msn_eff_visc, stat_eff_visc) + call s_compute_234_order_statistics(ns, Msn_int_mom_exch, stat_int_mom_exch) end if - end subroutine s_compute_s_order_statistics + end subroutine s_compute_statistics_momentum_unclosed_terms - subroutine s_autocorrelation_function(n_step, q_cons_vf) - integer, intent(in) :: n_step - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - real(wp) :: q_avg, q_var, test - integer :: i, j, k, s, it - - !$acc update host(q_cons_vf(2)) - xm_th(n_step) = q_cons_vf(2)%sf(m/4, n/4, p/4) - - if (n_step > 1) then - ! compute average - q_avg = sum(xm_th(1:n_step)) / real(n_step, wp) - - ! compute variance - q_var = sum((xm_th(1:n_step) - q_avg)**2) / real(n_step, wp) - - ! compute autocorrelation function - do s = 0, n_step - 1 - x_mom_autocorr(s+1) = 0.0_wp - do it = 1, n_step - s - x_mom_autocorr(s+1) = x_mom_autocorr(s+1) + (xm_th(it) - q_avg) * (xm_th(it+s) - q_avg) - end do - x_mom_autocorr(s+1) = x_mom_autocorr(s+1) / ((n_step - s) * q_var) - end do - - print *, q_cons_vf(2)%sf(m/4, n/4, p/4) - print *, 'Autocorrelation at lag 0:', x_mom_autocorr(1) - print *, 'Autocorrelation at lag N/2:', x_mom_autocorr(n_step/2) - print *, 'Autocorrelation at max lag:', x_mom_autocorr(n_step) - - end if + subroutine s_update_statistics(ns, q_temp, Msn) + type(scalar_field), intent(in) :: q_temp + type(scalar_field), dimension(1:4), intent(inout) :: Msn + + real(wp), intent(in) :: ns + real(wp) :: delta, delta_n, delta_n2, delta_f + integer :: i, j, k + + !$acc parallel loop collapse(3) gang vector default(present) copyin(ns) private(delta, delta_n, delta_n2, delta_f) + do i = 0, m + do j = 0, n + do k = 0, p + delta = q_temp%sf(i, j, k) - Msn(1)%sf(i, j, k) + delta_n = delta / ns + delta_n2 = delta_n**2 + delta_f = delta * delta_n * (ns - 1._wp) + + Msn(1)%sf(i, j, k) = Msn(1)%sf(i, j, k) + delta_n + Msn(4)%sf(i, j, k) = Msn(4)%sf(i, j, k) + delta_f * delta_n2 * (ns**2 - 3._wp*ns + 3._wp) + 6._wp * delta_n2 * Msn(2)%sf(i, j, k) - 4._wp * delta_n * Msn(3)%sf(i, j, k) + Msn(3)%sf(i, j, k) = Msn(3)%sf(i, j, k) + delta_f * delta_n * (ns - 2._wp) - 3._wp * delta_n * Msn(2)%sf(i, j, k) + Msn(2)%sf(i, j, k) = Msn(2)%sf(i, j, k) + delta_f + end do + end do + end do + + end subroutine s_update_statistics + + subroutine s_compute_234_order_statistics(ns, Msn, q_stat) + type(scalar_field), dimension(1:4), intent(in) :: Msn + type(scalar_field), dimension(2:4), intent(inout) :: q_stat + + real(wp), intent(in) :: ns + integer :: i, j, k + + !$acc parallel loop collapse(3) gang vector default(present) copyin(ns) + do i = 0, m + do j = 0, n + do k = 0, p + q_stat(2)%sf(i, j, k) = Msn(2)%sf(i, j, k) / (ns - 1._wp) + q_stat(3)%sf(i, j, k) = sqrt(ns - 1._wp) / (ns - 2._wp) * ns * Msn(3)%sf(i, j, k) / (Msn(2)%sf(i, j, k)**1.5) + q_stat(4)%sf(i, j, k) = (ns - 1._wp) / ((ns - 2._wp) * (ns - 3._wp)) * ((ns + 1._wp) * (ns * Msn(4)%sf(i, j, k) / (Msn(2)%sf(i, j, k)**2) - 3._wp) + 6._wp) + end do + end do + end do - end subroutine s_autocorrelation_function + end subroutine s_compute_234_order_statistics subroutine s_finalize_statistics_module integer :: i, j - do i = 1, 3 - @:DEALLOCATE(xnbar_stat(i)%sf) + do i = 1, 4 + @:DEALLOCATE(Msn_reynolds_stress(i)%sf) + end do + @:DEALLOCATE(Msn_reynolds_stress) + + do i = 1, 4 + @:DEALLOCATE(Msn_eff_visc(i)%sf) + end do + @:DEALLOCATE(Msn_eff_visc) + + do i = 1, 4 + @:DEALLOCATE(Msn_int_mom_exch(i)%sf) end do - @:DEALLOCATE(xnbar_stat) + @:DEALLOCATE(Msn_int_mom_exch) - do i = 1, 3 - @:DEALLOCATE(delta_stat(i)%sf) + do i = 2, 4 + @:DEALLOCATE(stat_reynolds_stress(i)%sf) end do - @:DEALLOCATE(delta_stat) + @:DEALLOCATE(stat_reynolds_stress) - do i = 1, 3 - do j = 2, 4 - @:DEALLOCATE(Msn_stat(i)%vf(j)%sf) - end do - @:DEALLOCATE(Msn_stat(i)%vf) + do i = 2, 4 + @:DEALLOCATE(stat_eff_visc(i)%sf) end do - @:DEALLOCATE(Msn_stat) + @:DEALLOCATE(stat_eff_visc) + + do i = 2, 4 + @:DEALLOCATE(stat_int_mom_exch(i)%sf) + end do + @:DEALLOCATE(stat_int_mom_exch) + end subroutine s_finalize_statistics_module -end module m_compute_statistics \ No newline at end of file +end module m_compute_statistics diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index a71e17a69d..bcd8c74dec 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -503,7 +503,6 @@ module m_global_parameters logical :: periodic_ibs logical :: compute_CD - real(wp) :: mu_visc !< reference viscosity real(wp) :: u_inf_ref !< reference freestream velocity real(wp) :: rho_inf_ref !< reference freestream density real(wp) :: T_inf_ref !< reference freestream temperature @@ -512,8 +511,9 @@ module m_global_parameters logical :: store_levelset logical :: slab_domain_decomposition logical :: compute_autocorrelation + integer :: t_step_stat_start - !$acc declare create(mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref) + !$acc declare create(u_inf_ref, rho_inf_ref, T_inf_ref) contains @@ -792,7 +792,6 @@ contains periodic_ibs = .false. compute_CD = .false. - mu_visc = dflt_real u_inf_ref = dflt_real rho_inf_ref = dflt_real T_inf_ref = dflt_real @@ -801,6 +800,7 @@ contains store_levelset = .true. slab_domain_decomposition = .false. compute_autocorrelation = .false. + t_step_stat_start = dflt_int end subroutine s_assign_default_values_to_user_inputs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index c2579cc057..730f5ead50 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -132,8 +132,8 @@ contains & 'x_domain%beg', 'x_domain%end', 'y_domain%beg', 'y_domain%end', & & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0', & - & 'tau_star', 'cont_damage_s', 'alpha_bar', 'mu_visc', 'u_inf_ref', & - & 'rho_inf_ref', 'T_inf_ref' ] + & 'tau_star', 'cont_damage_s', 'alpha_bar', 'u_inf_ref', & + & 'rho_inf_ref', 'T_inf_ref', 't_step_stat_start' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index eea4a49260..fd25b1c017 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -61,6 +61,8 @@ module m_rhs use m_mhd + use m_additional_forcing + implicit none private; public :: s_initialize_rhs_module, & @@ -609,7 +611,7 @@ contains end subroutine s_initialize_rhs_module - subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb, rhs_pb, mv, rhs_mv, t_step, time_avg, pres_visc_stress) + subroutine s_compute_rhs(q_cons_vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb, rhs_pb, mv, rhs_mv, t_step, time_avg) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(scalar_field), intent(inout) :: q_T_sf @@ -620,7 +622,6 @@ contains real(wp), dimension(idwbuff(1)%beg:, idwbuff(2)%beg:, idwbuff(3)%beg:, 1:, 1:), intent(inout) :: mv, rhs_mv integer, intent(in) :: t_step real(wp), intent(inout) :: time_avg - type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: pres_visc_stress real(wp), dimension(0:m, 0:n, 0:p) :: nbub real(wp) :: t_start, t_finish @@ -810,8 +811,7 @@ contains rhs_vf, & q_cons_qp, & q_prim_qp, & - flux_src_n(id), & - pres_visc_stress) + flux_src_n(id)) call nvtxEndRange ! RHS additions for hypoelasticity @@ -830,8 +830,7 @@ contains flux_src_n(id)%vf, & dq_prim_dx_qp(1)%vf, & dq_prim_dy_qp(1)%vf, & - dq_prim_dz_qp(1)%vf, & - pres_visc_stress) + dq_prim_dz_qp(1)%vf) call nvtxEndRange end if @@ -911,6 +910,8 @@ contains if (cont_damage) call s_compute_damage_state(q_cons_qp%vf, rhs_vf) + if (periodic_forcing) call s_add_periodic_forcing(rhs_vf) + ! END: Additional physics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then @@ -938,14 +939,13 @@ contains end subroutine s_compute_rhs - subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf, pres_visc_stress) + subroutine s_compute_advection_source_term(idir, rhs_vf, q_cons_vf, q_prim_vf, flux_src_n_vf) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(vector_field), intent(inout) :: q_cons_vf type(vector_field), intent(inout) :: q_prim_vf type(vector_field), intent(inout) :: flux_src_n_vf - type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: pres_visc_stress integer :: i, j, k, l, q @@ -998,25 +998,6 @@ contains end do end do - ! particle forces loop, x-dir - if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - !$acc loop seq - do l = momxb, momxe - pres_visc_stress(l)%sf(i, j, k) = 1._wp/dx(i) * & - (flux_n(1)%vf(l)%sf(i-1, j, k) - & - flux_n(1)%vf(l)%sf(i, j, k)) - 0.5_wp/dx(i) * & - (q_cons_vf%vf(2)%sf(i+1, j, k)*q_cons_vf%vf(l)%sf(i+1, j, k)/q_cons_vf%vf(1)%sf(i+1, j, k) - & - q_cons_vf%vf(2)%sf(i-1, j, k)*q_cons_vf%vf(l)%sf(i-1, j, k)/q_cons_vf%vf(1)%sf(i-1, j, k)) - end do - end do - end do - end do - end if - if (model_eqns == 3) then !$acc parallel loop collapse(4) gang vector default(present) do l = 0, p @@ -1127,25 +1108,6 @@ contains end do end do - ! particle forces loop, y-dir - if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - !$acc loop seq - do l = momxb, momxe - pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & - (flux_n(2)%vf(l)%sf(i, j-1, k) - & - flux_n(2)%vf(l)%sf(i, j, k)) - 0.5_wp/dy(j) * & - (q_cons_vf%vf(3)%sf(i, j+1, k)*q_cons_vf%vf(l)%sf(i, j+1, k)/q_cons_vf%vf(1)%sf(i, j+1, k) - & - q_cons_vf%vf(3)%sf(i, j-1, k)*q_cons_vf%vf(l)%sf(i, j-1, k)/q_cons_vf%vf(1)%sf(i, j-1, k)) - end do - end do - end do - end do - end if - if (model_eqns == 3) then !$acc parallel loop collapse(4) gang vector default(present) do l = 0, p @@ -1352,25 +1314,6 @@ contains end do end if - ! particle forces loop, z-dir - if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - !$acc loop seq - do l = momxb, momxe - pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & - (flux_n(3)%vf(l)%sf(i, j, k-1) - & - flux_n(3)%vf(l)%sf(i, j, k)) - 0.5_wp/dz(k) * & - (q_cons_vf%vf(4)%sf(i, j, k+1)*q_cons_vf%vf(l)%sf(i, j, k+1)/q_cons_vf%vf(1)%sf(i, j, k+1) - & - q_cons_vf%vf(4)%sf(i, j, k-1)*q_cons_vf%vf(l)%sf(i, j, k-1)/q_cons_vf%vf(1)%sf(i, j, k-1)) - end do - end do - end do - end do - end if - if (model_eqns == 3) then !$acc parallel loop collapse(4) gang vector default(present) do l = 0, p @@ -1552,14 +1495,13 @@ contains end subroutine s_compute_advection_source_term subroutine s_compute_additional_physics_rhs(idir, q_prim_vf, rhs_vf, flux_src_n, & - dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf, pres_visc_stress) + dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf) integer, intent(in) :: idir type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf type(scalar_field), dimension(sys_size), intent(inout) :: rhs_vf type(scalar_field), dimension(sys_size), intent(in) :: flux_src_n type(scalar_field), dimension(sys_size), intent(in) :: dq_prim_dx_vf, dq_prim_dy_vf, dq_prim_dz_vf - type(scalar_field), dimension(momxb:momxe), optional, intent(inout) :: pres_visc_stress integer :: i, j, k, l @@ -1595,23 +1537,6 @@ contains end do end do - ! particle momentum exchange, viscous stress tensor, x-dir - if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - !$acc loop seq - do l = momxb, momxe - pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dx(i) * & - (flux_src_n(l)%sf(i-1, j, k) - & - flux_src_n(l)%sf(i, j, k)) - end do - end do - end do - end do - end if - elseif (idir == 2) then ! y-direction if (surface_tension) then @@ -1694,23 +1619,6 @@ contains end do end if - ! particle momentum exchange, viscous stress tensor, y-dir - if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - !$acc loop seq - do l = momxb, momxe - pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dy(j) * & - (flux_src_n(l)%sf(i, j-1, k) - & - flux_src_n(l)%sf(i, j, k)) - end do - end do - end do - end do - end if - ! Applying the geometrical viscous Riemann source fluxes calculated as average ! of values at cell boundaries if (cyl_coord) then @@ -1796,23 +1704,6 @@ contains end do end do - ! particle momentum exchange, viscous stress tensor, z-dir - if ((compute_CD .or. volume_filtering_momentum_eqn) .and. present(pres_visc_stress)) then - !$acc parallel loop collapse(3) gang vector default(present) - do k = 0, p - do j = 0, n - do i = 0, m - !$acc loop seq - do l = momxb, momxe - pres_visc_stress(l)%sf(i, j, k) = pres_visc_stress(l)%sf(i, j, k) + 1._wp/dz(k) * & - (flux_src_n(l)%sf(i, j, k-1) - & - flux_src_n(l)%sf(i, j, k)) - end do - end do - end do - end do - end if - if (grid_geometry == 3) then !$acc parallel loop collapse(3) gang vector default(present) do l = 0, p diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index c34bd05321..5ac4e4dad6 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -189,9 +189,9 @@ contains rkck_adap_dt, rkck_tolerance, & hyperelasticity, R0ref, num_bc_patches, Bx0, powell, & cont_damage, tau_star, cont_damage_s, alpha_bar, & - periodic_ibs, compute_CD, mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref, & + periodic_ibs, compute_CD, u_inf_ref, rho_inf_ref, T_inf_ref, & periodic_forcing, volume_filtering_momentum_eqn, store_levelset, & - slab_domain_decomposition, compute_autocorrelation + slab_domain_decomposition, compute_autocorrelation, t_step_stat_start ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. @@ -1346,6 +1346,29 @@ contains ! Volume filter flow variables, compute unclosed terms and their statistics if (volume_filtering_momentum_eqn) then call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) + + if (t_step > t_step_stat_start) then + call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + + ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) + ! write(101, *) stat_reynolds_stress(2)%sf(10, 10, 10), stat_reynolds_stress(3)%sf(10, 10, 10), stat_reynolds_stress(4)%sf(10, 10, 10) + end if + + ! TEMPORARY, for v+v + ! if (t_step == 1) then + ! open(unit=100, file='dat_reynolds_stress.txt', status='replace', action='write') + ! open(unit=101, file='stat_reynolds_stress.txt', status='replace', action='write') + ! end if + ! if (t_step == 999) then + ! close(100) + ! close(101) + ! end if + + end if + + if (periodic_forcing) then + call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) + call s_compute_periodic_forcing(q_cons_ts(1)%vf) end if ! Time-stepping loop controls @@ -1585,10 +1608,11 @@ contains if (mhd .and. powell) call s_initialize_mhd_powell_module call s_initialize_particle_forces_module() - call s_initialize_additional_forcing_module() - if (volume_filtering_momentum_eqn) call s_initialize_fftw_explicit_filter_module() - - call s_initialize_statistics_module() + if (periodic_forcing) call s_initialize_additional_forcing_module() + if (volume_filtering_momentum_eqn) then + call s_initialize_fftw_explicit_filter_module() + call s_initialize_statistics_module() + end if end subroutine s_initialize_modules @@ -1702,7 +1726,7 @@ contains !$acc update device(ib_markers%sf) end if - !$acc update device(mu_visc, u_inf_ref, rho_inf_ref, T_inf_ref) + !$acc update device(u_inf_ref, rho_inf_ref, T_inf_ref) end subroutine s_initialize_gpu_vars diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 9fdbb519e0..5132efbb23 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -87,14 +87,8 @@ module m_time_steppers integer, private :: num_ts !< !! Number of time stages in the time-stepping scheme - type(scalar_field), allocatable, dimension(:) :: stat_reynolds_stress - type(scalar_field), allocatable, dimension(:) :: stat_eff_visc - type(scalar_field), allocatable, dimension(:) :: stat_int_mom_exch - !$acc declare create(q_cons_ts, q_prim_vf, q_T_sf, rhs_vf, rhs_ts_rkck, q_prim_ts, rhs_mv, rhs_pb, max_dt) - !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) - contains !> The computation of parameters, the allocation of memory, @@ -369,32 +363,6 @@ contains end do end do - if (compute_CD .or. volume_filtering_momentum_eqn) then - @:ALLOCATE(pres_visc_stress(momxb:momxe)) - do i = momxb, momxe - @:ALLOCATE(pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(pres_visc_stress(i)) - end do - end if - - @:ALLOCATE(stat_reynolds_stress(2:4)) - do i = 2, 4 - @:ALLOCATE(stat_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(stat_reynolds_stress(i)) - end do - @:ALLOCATE(stat_eff_visc(2:4)) - do i = 2, 4 - @:ALLOCATE(stat_eff_visc(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(stat_eff_visc(i)) - end do - @:ALLOCATE(stat_int_mom_exch(2:4)) - do i = 2, 4 - @:ALLOCATE(stat_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(stat_int_mom_exch(i)) - end do - end subroutine s_initialize_time_steppers_module !> 1st order TVD RK time-stepping algorithm @@ -712,45 +680,7 @@ contains call nvtxStartRange("TIMESTEP") end if - if (periodic_forcing) then - call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) - call s_compute_periodic_forcing(q_cons_ts(1)%vf) - end if - - call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg, pres_visc_stress) - - ! if (volume_filtering_momentum_eqn) then - ! call s_apply_fftw_filter_cons(q_cons_ts(1)%vf, q_cons_filtered) - ! call s_setup_terms_filtering(q_cons_ts(1)%vf, reynolds_stress, eff_visc) - ! call s_apply_fftw_filter_tensor(reynolds_stress, eff_visc, q_cons_filtered, pres_visc_stress, int_mom_exch) - ! call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) - ! call s_compute_eff_visc(q_cons_filtered, eff_visc, mag_eff_visc) - ! call s_compute_interphase_momentum_exchange_term(int_mom_exch, mag_int_mom_exch) - ! end if - - - ! call s_autocorrelation_function(t_step+1, q_cons_ts(1)%vf) - ! if (t_step > 10) then - ! n_step = t_step - 10 - ! call s_compute_s_order_statistics(mag_reynolds_stress, n_step, stat_reynolds_stress, 1) - ! call s_compute_s_order_statistics(mag_eff_visc, n_step, stat_eff_visc, 2) - ! call s_compute_s_order_statistics(mag_int_mom_exch, n_step, stat_int_mom_exch, 3) - ! end if - - - ! stat_reynolds_stress(2)%sf(0:m, 0:n, 0:p) = q_cons_filtered(6)%sf(0:m, 0:n, 0:p) - ! stat_reynolds_stress(3)%sf(0:m, 0:n, 0:p) = mag_reynolds_stress%sf(0:m, 0:n, 0:p) - ! stat_reynolds_stress(4)%sf(0:m, 0:n, 0:p) = mag_eff_visc%sf(0:m, 0:n, 0:p) - ! stat_eff_visc(2)%sf(0:m, 0:n, 0:p) = mag_int_mom_exch%sf(0:m, 0:n, 0:p) - - - if (compute_CD) then - call s_compute_drag_coefficient(pres_visc_stress) - end if - - if (periodic_forcing) then - call s_add_periodic_forcing(rhs_vf) - end if + call s_compute_rhs(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(1)%sf, rhs_pb, mv_ts(1)%sf, rhs_mv, t_step, time_avg) if (run_time_info) then call s_write_run_time_information(q_prim_vf, t_step) @@ -841,10 +771,6 @@ contains call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg) - if (periodic_forcing) then - call s_add_periodic_forcing(rhs_vf) - end if - if (bubbles_lagrange) then call s_compute_EL_coupled_solver(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, stage=2) call s_update_lagrange_tdv_rk(stage=2) @@ -921,10 +847,6 @@ contains ! Stage 3 of 3 call s_compute_rhs(q_cons_ts(2)%vf, q_T_sf, q_prim_vf, bc_type, rhs_vf, pb_ts(2)%sf, rhs_pb, mv_ts(2)%sf, rhs_mv, t_step, time_avg) - if (periodic_forcing) then - call s_add_periodic_forcing(rhs_vf) - end if - if (bubbles_lagrange) then call s_compute_EL_coupled_solver(q_cons_ts(2)%vf, q_prim_vf, rhs_vf, stage=3) call s_update_lagrange_tdv_rk(stage=3) @@ -1416,26 +1338,6 @@ contains @:DEALLOCATE(rhs_vf) end if - if (compute_CD .or. volume_filtering_momentum_eqn) then - do i = momxb, momxe - @:DEALLOCATE(pres_visc_stress(i)%sf) - end do - @:DEALLOCATE(pres_visc_stress) - end if - - do i = 2, 4 - @:DEALLOCATE(stat_reynolds_stress(i)%sf) - end do - @:DEALLOCATE(stat_reynolds_stress) - do i = 2, 4 - @:DEALLOCATE(stat_eff_visc(i)%sf) - end do - @:DEALLOCATE(stat_eff_visc) - do i = 2, 4 - @:DEALLOCATE(stat_int_mom_exch(i)%sf) - end do - @:DEALLOCATE(stat_int_mom_exch) - ! Writing the footer of and closing the run-time information file if (proc_rank == 0 .and. run_time_info) then call s_close_run_time_information_file() diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 6b1e981bfc..fa44071328 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -14,6 +14,8 @@ module m_volume_filtering use m_boundary_common + use m_nvtx + #ifdef MFC_MPI use mpi !< Message passing interface (MPI) module #endif @@ -26,8 +28,9 @@ module m_volume_filtering private; public :: s_initialize_fftw_explicit_filter_module, & s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & - s_finalize_fftw_explicit_filter_module, & - s_apply_fftw_filter_cons, s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield, & + s_initialize_filtered_fluid_indicator_function, s_finalize_fftw_explicit_filter_module, & + s_apply_fftw_filter_cons, s_volume_filter_momentum_eqn, s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield, & + s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, & s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity, s_compute_interphase_momentum_exchange @@ -39,24 +42,34 @@ module m_volume_filtering ! fluid indicator function (1 = fluid, 0 = otherwise) type(scalar_field), public :: fluid_indicator_function - type(scalar_field) :: filtered_fluid_indicator_function + type(scalar_field), public :: filtered_fluid_indicator_function ! volume filtered conservative variables type(scalar_field), allocatable, dimension(:) :: q_cons_filtered - ! unclosed terms in momentum eqn - type(scalar_field), allocatable, dimension(:) :: pres_visc_stress + ! viscous and pressure+viscous stress tensors + type(vector_field), allocatable, dimension(:) :: visc_stress + type(vector_field), allocatable, dimension(:) :: pres_visc_stress + + ! divergence of stress tensor + type(scalar_field), allocatable, dimension(:) :: div_pres_visc_stress + + ! unclosed terms in volume filtered momentum equation type(vector_field), allocatable, dimension(:) :: reynolds_stress type(vector_field), allocatable, dimension(:) :: eff_visc type(scalar_field), allocatable, dimension(:) :: int_mom_exch - ! magnitude of unclosed terms in momentum eqn - type(scalar_field) :: mag_reynolds_stress - type(scalar_field) :: mag_eff_visc - type(scalar_field) :: mag_int_mom_exch + ! magnitude of unclosed terms in momentum equation + type(scalar_field), public :: mag_reynolds_stress + type(scalar_field), public :: mag_eff_visc + type(scalar_field), public :: mag_int_mom_exch + + real(wp), allocatable, dimension(:, :) :: Res !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, q_cons_filtered) - !$acc declare create(pres_visc_stress, reynolds_stress, eff_visc, int_mom_exch, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + !$acc declare create(visc_stress, pres_visc_stress, div_pres_visc_stress) + !$acc declare create(reynolds_stress, eff_visc, int_mom_exch, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + !$acc declare create(Res) #if defined(MFC_OpenACC) ! GPU plans @@ -98,12 +111,6 @@ contains subroutine s_initialize_fftw_explicit_filter_module integer :: i, j, k integer :: size_n(1), inembed(1), onembed(1) - - @:ALLOCATE(fluid_indicator_function%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(fluid_indicator_function) - - @:ALLOCATE(filtered_fluid_indicator_function%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(filtered_fluid_indicator_function) @:ALLOCATE(q_cons_filtered(1:sys_size)) do i = 1, sys_size @@ -113,18 +120,52 @@ contains @:ACC_SETUP_SFs(q_cons_filtered(i)) end do + @:ALLOCATE(visc_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(visc_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(visc_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(visc_stress(i)) + end do + + @:ALLOCATE(pres_visc_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(pres_visc_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(pres_visc_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(pres_visc_stress(i)) + end do + + @:ALLOCATE(div_pres_visc_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(div_pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(div_pres_visc_stress(i)) + end do + @:ALLOCATE(reynolds_stress(1:num_dims)) - do i = 1, num_dims - @:ALLOCATE(reynolds_stress(i)%vf(1:num_dims)) - end do - do i = 1, num_dims - do j = 1, num_dims - @:ALLOCATE(reynolds_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - end do - @:ACC_SETUP_VFs(reynolds_stress(i)) + do i = 1, num_dims + @:ALLOCATE(reynolds_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(reynolds_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) end do + @:ACC_SETUP_VFs(reynolds_stress(i)) + end do @:ALLOCATE(eff_visc(1:num_dims)) do i = 1, num_dims @@ -162,6 +203,19 @@ contains idwbuff(3)%beg:idwbuff(3)%end)) @:ACC_SETUP_SFs(mag_int_mom_exch) + if (viscous) then + @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) + end if + + if (viscous) then + do i = 1, 2 + do j = 1, Re_size(i) + Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + end do + end do + !$acc update device(Res, Re_idx, Re_size) + end if + !< global sizes Nx = m_glb + 1 Ny = n_glb + 1 @@ -404,6 +458,9 @@ contains subroutine s_initialize_fluid_indicator_function integer :: i, j, k + @:ALLOCATE(fluid_indicator_function%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(fluid_indicator_function) + ! define fluid indicator function !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m @@ -417,6 +474,14 @@ contains end do end do end do + + end subroutine s_initialize_fluid_indicator_function + + subroutine s_initialize_filtered_fluid_indicator_function + integer :: i, j, k + + @:ALLOCATE(filtered_fluid_indicator_function%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(filtered_fluid_indicator_function) ! filter fluid indicator function !$acc parallel loop collapse(3) gang vector default(present) @@ -450,32 +515,24 @@ contains end do end do - end subroutine s_initialize_fluid_indicator_function - - !< apply the gaussian filter to the conservative variables and compute their filtered components - subroutine s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered - - integer :: l - - do l = contxb, momxe - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(l), q_cons_filtered(l)) - end do - - end subroutine s_apply_fftw_filter_cons + end subroutine s_initialize_filtered_fluid_indicator_function !< calculate the unclosed terms present in the volume filtered momentum equation subroutine s_volume_filter_momentum_eqn(q_cons_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: i, j, k + call nvtxStartRange("FILTER-CONSERVATIVE-VARIABLES") call s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) - call s_setup_terms_filtering(q_cons_vf, reynolds_stress, eff_visc) - call s_apply_fftw_filter_tensor(reynolds_stress, eff_visc, q_cons_filtered, pres_visc_stress, int_mom_exch) + call nvtxEndRange + + call nvtxStartRange("COMPUTE-MOMENTUM-UNCLOSED-TERMS") + call s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) + call s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) - call s_compute_effective_viscosity(q_cons_filtered, eff_visc, mag_eff_visc) + call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, mag_eff_visc) call s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) + call nvtxEndRange end subroutine s_volume_filter_momentum_eqn @@ -544,372 +601,196 @@ contains end subroutine s_apply_fftw_filter_scalarfield + !< apply the gaussian filter to the conservative variables and compute their filtered components + subroutine s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered + + integer :: i + + do i = contxb, momxe + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) + end do + + end subroutine s_apply_fftw_filter_cons + !< apply the gaussian filter to the requisite tensors to compute unclosed terms of interest - subroutine s_apply_fftw_filter_tensor(reynolds_stress, eff_visc, q_cons_filtered, pres_visc_stress, int_mom_exch) + subroutine s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered - type(scalar_field), dimension(momxb:momxe), intent(inout) :: pres_visc_stress + type(scalar_field), dimension(1:num_dims), intent(inout) :: div_pres_visc_stress type(scalar_field), dimension(1:num_dims), intent(inout) :: int_mom_exch - integer :: i, j, k, l, q + integer :: i, j ! pseudo turbulent reynolds stress - do l = 1, num_dims - do q = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(l)%vf(q)) + do i = 1, num_dims + do j = 1, num_dims + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(i)%vf(j)) end do end do ! effective viscosity - do l = 1, num_dims - do q = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., eff_visc(l)%vf(q)) + do i = 1, num_dims + do j = 1, num_dims + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., visc_stress(i)%vf(j), eff_visc(i)%vf(j)) end do end do ! interphase momentum exchange - do l = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .false., pres_visc_stress(momxb-1+l), int_mom_exch(l)) + do i = 1, num_dims + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .false., div_pres_visc_stress(i), int_mom_exch(i)) end do end subroutine s_apply_fftw_filter_tensor - ! compute pressure and viscous stress tensors - subroutine s_compute_stress_tensor(q_cons_vf) + ! compute viscous stress tensor + subroutine s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) + type(vector_field), dimension(num_dims), intent(inout) :: visc_stress type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - real(wp) :: dudx, dudy, dudz, dvdx, dvdy, dvdz, dqdx, dwdy, dwdz ! spatial velocity derivatives + real(wp) :: dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz ! spatial velocity derivatives integer :: i, j, k - !$acc parallel loop collapse(3) gang vector default(present) + !$acc parallel loop collapse(3) gang vector default(present) private(dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz) do i = 0, m do j = 0, n do k = 0, p - ! local to each process + ! velocity gradients, local to each process dudx = ( q_cons_vf(2)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(2)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) dudy = ( q_cons_vf(2)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(2)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) - dudz = ( q_cons_vf(2)%vf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + dudz = ( q_cons_vf(2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) dvdx = ( q_cons_vf(3)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(3)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) dvdy = ( q_cons_vf(3)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(3)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) - dvdz = ( q_cons_vf(3)%vf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(3)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + dvdz = ( q_cons_vf(3)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(3)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) dwdx = ( q_cons_vf(4)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(4)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) dwdy = ( q_cons_vf(4)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(4)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) - dwdz = ( q_cons_vf(4)%vf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(4)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) - - ! viscous stress tensor, tau(row, column) - tau(1)%vf(1) = mu * (4._wp/3._wp * dudx - 2._wp/3._wp * (dvdy + dwdz)) - tau(1)%vf(2) = mu * (dudy + dvdx) - tau(1)%vf(3) = mu * (dudz + dwdx) - tau(2)%vf(1) = mu * (dvdx + dudy) - tau(2)%vf(2) = mu * (4._wp/3._wp * dvdy - 2._wp/3._wp * (dudx + dwdz)) - tau(2)%vf(3) = mu * (dvdz + dwdy) - tau(3)%vf(1) = mu * (dwdx + dudz) - tau(3)%vf(2) = mu * (dwdy + dvdz) - tau(3)%vf(3) = mu * (4._wp/3._wp * dwdz - 2._wp/3._wp * (dudx + dvdy)) - - + dwdz = ( q_cons_vf(4)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(4)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + ! viscous stress tensor, visc_stress(row, column) + visc_stress(1)%vf(1)%sf(i, j, k) = (4._wp/3._wp * dudx - 2._wp/3._wp * (dvdy + dwdz)) / Res(1, 1) + visc_stress(1)%vf(2)%sf(i, j, k) = (dudy + dvdx) / Res(1, 1) + visc_stress(1)%vf(3)%sf(i, j, k) = (dudz + dwdx) / Res(1, 1) + visc_stress(2)%vf(1)%sf(i, j, k) = (dvdx + dudy) / Res(1, 1) + visc_stress(2)%vf(2)%sf(i, j, k) = (4._wp/3._wp * dvdy - 2._wp/3._wp * (dudx + dwdz)) / Res(1, 1) + visc_stress(2)%vf(3)%sf(i, j, k) = (dvdz + dwdy) / Res(1, 1) + visc_stress(3)%vf(1)%sf(i, j, k) = (dwdx + dudz) / Res(1, 1) + visc_stress(3)%vf(2)%sf(i, j, k) = (dwdy + dvdz) / Res(1, 1) + visc_stress(3)%vf(3)%sf(i, j, k) = (4._wp/3._wp * dwdz - 2._wp/3._wp * (dudx + dvdy)) / Res(1, 1) end do end do end do - end subroutine s_compute_stress_tensor - - !< transpose domain from z-slabs to y-slabs on each processor - subroutine s_mpi_transpose_slabZ2Y - complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) - integer :: dest_rank, src_rank + end subroutine s_compute_viscous_stress_tensor + + subroutine s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf) + type(vector_field), dimension(num_dims), intent(inout) :: pres_visc_stress + type(vector_field), dimension(num_dims), intent(in) :: visc_stress + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + real(wp) :: pressure integer :: i, j, k - allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) - allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) - - !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) - do dest_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slabz(i, j+dest_rank*Nyloc, k) - end do - end do - end do - end do - - call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - - !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) - do src_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - data_cmplx_slaby(i, j, k+src_rank*Nzloc) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) - end do - end do + !$acc parallel loop collapse(3) gang vector default(present) private(pressure) + do i = 0, m + do j = 0, n + do k = 0, p + pressure = (q_cons_vf(E_idx)%sf(i, j, k) - 0.5_wp * (q_cons_vf(momxb)%sf(i, j, k)**2 + q_cons_vf(momxb+1)%sf(i, j, k)**2 + q_cons_vf(momxb+2)%sf(i, j, k)**2) & + / q_cons_vf(contxb)%sf(i, j, k) - pi_infs(1) - qvs(1)) / (gammas(1)) + + pres_visc_stress(1)%vf(1)%sf(i, j, k) = pressure - visc_stress(1)%vf(1)%sf(i, j, k) + pres_visc_stress(1)%vf(2)%sf(i, j, k) = - visc_stress(1)%vf(2)%sf(i, j, k) + pres_visc_stress(1)%vf(3)%sf(i, j, k) = - visc_stress(1)%vf(3)%sf(i, j, k) + pres_visc_stress(2)%vf(1)%sf(i, j, k) = - visc_stress(2)%vf(1)%sf(i, j, k) + pres_visc_stress(2)%vf(2)%sf(i, j, k) = pressure - visc_stress(2)%vf(2)%sf(i, j, k) + pres_visc_stress(2)%vf(3)%sf(i, j, k) = - visc_stress(2)%vf(3)%sf(i, j, k) + pres_visc_stress(3)%vf(1)%sf(i, j, k) = - visc_stress(3)%vf(1)%sf(i, j, k) + pres_visc_stress(3)%vf(2)%sf(i, j, k) = - visc_stress(3)%vf(2)%sf(i, j, k) + pres_visc_stress(3)%vf(3)%sf(i, j, k) = pressure - visc_stress(3)%vf(3)%sf(i, j, k) + end do end do - end do + end do - deallocate(sendbuf, recvbuf) - end subroutine s_mpi_transpose_slabZ2Y + end subroutine s_compute_stress_tensor - !< transpose domain from y-slabs to z-slabs on each processor - subroutine s_mpi_transpose_slabY2Z - complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) - integer :: dest_rank, src_rank + !< compute the divergence of the pressure-viscous stress tensor + subroutine s_compute_divergence_stress_tensor(div_stress_tensor, stress_tensor) + type(scalar_field), dimension(num_dims), intent(inout) :: div_stress_tensor + type(vector_field), dimension(num_dims), intent(in) :: stress_tensor integer :: i, j, k - allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) - allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + div_stress_tensor(1)%sf(i, j, k) = (stress_tensor(1)%vf(1)%sf(i+1, j, k) - stress_tensor(1)%vf(1)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & + + (stress_tensor(2)%vf(1)%sf(i, j+1, k) - stress_tensor(2)%vf(1)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & + + (stress_tensor(3)%vf(1)%sf(i, j, k+1) - stress_tensor(3)%vf(1)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) - !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) - do dest_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slaby(i, j, k+dest_rank*Nzloc) - end do + div_stress_tensor(2)%sf(i, j, k) = (stress_tensor(1)%vf(2)%sf(i+1, j, k) - stress_tensor(1)%vf(2)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & + + (stress_tensor(2)%vf(2)%sf(i, j+1, k) - stress_tensor(2)%vf(2)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & + + (stress_tensor(3)%vf(2)%sf(i, j, k+1) - stress_tensor(3)%vf(2)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) + + div_stress_tensor(3)%sf(i, j, k) = (stress_tensor(1)%vf(3)%sf(i+1, j, k) - stress_tensor(1)%vf(3)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & + + (stress_tensor(2)%vf(3)%sf(i, j+1, k) - stress_tensor(2)%vf(3)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & + + (stress_tensor(3)%vf(3)%sf(i, j, k+1) - stress_tensor(3)%vf(3)%sf(i, j ,k-1)) / (dz(k-1) + dz(k+1)) end do end do end do - call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + end subroutine s_compute_divergence_stress_tensor - !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) - do src_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - data_cmplx_slabz(i, j+src_rank*Nyloc, k) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) - end do - end do - end do - end do - - deallocate(sendbuf, recvbuf) - end subroutine s_mpi_transpose_slabY2Z + !< setup for calculation of unclosed terms in volume filtered momentum eqn + subroutine s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: pres_visc_stress + type(scalar_field), dimension(1:num_dims), intent(inout) :: div_pres_visc_stress - !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d - subroutine s_mpi_FFT_fwd - integer :: i, j, k + integer :: i, j, k, l, q - ! 3D z-slab -> 1D x, y, z + ! pseudo turbulent reynolds stress setup !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, Nx - do j = 1, Ny - do k = 1, Nzloc - data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) - end do + do i = 0, m + do j = 0, n + do k = 0, p + !$acc loop seq + do l = 1, num_dims + !$acc loop seq + do q = 1, num_dims + reynolds_stress(l)%vf(q)%sf(i, j, k) = (q_cons_vf(momxb-1+l)%sf(i, j, k) * q_cons_vf(momxb-1+q)%sf(i, j, k)) / q_cons_vf(1)%sf(i, j, k) ! (rho*u x rho*u)/rho = rho*(u x u) + end do + end do + end do end do end do - ! X FFT -#if defined(MFC_OpenACC) - ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) -#else - call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) -#endif - - ! 1D x, y, z -> 1D y, x, z (CMPLX) - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) - end do - end do + ! set density and momentum buffers +#ifdef MFC_MPI + do i = 1, momxe + call s_populate_scalarfield_buffers(q_cons_vf(i)) end do - - ! Y FFT -#if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) #else - call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) -#endif - - ! 1D y, x, z -> 3D z-slab - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_slabz(i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) - end do - end do - end do + do i = 1, momxe + q_cons_vf(i)%sf(-buff_size:-1, :, :) = q_cons_vf(i)%sf(m-buff_size+1:m, :, :) + q_cons_vf(i)%sf(m+1:m+buff_size, :, :) = q_cons_vf(i)%sf(0:buff_size-1, :, :) - ! transpose z-slab to y-slab - call s_mpi_transpose_slabZ2Y + q_cons_vf(i)%sf(:, -buff_size:-1, :) = q_cons_vf(i)%sf(:, n-buff_size+1:n, :) + q_cons_vf(i)%sf(:, n+1:n+buff_size, :) = q_cons_vf(i)%sf(:, 0:buff_size-1, :) - ! 3D y-slab -> 1D z, x, y - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Nyloc - do k = 1, Nz - data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby(i, j, k) - end do - end do + q_cons_vf(i)%sf(:, :, -buff_size:-1) = q_cons_vf(i)%sf(:, :, p-buff_size+1:p) + q_cons_vf(i)%sf(:, :, p+1:p+buff_size) = q_cons_vf(i)%sf(:, :, 0:buff_size-1) end do - - ! Z FFT -#if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) -#else - call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) #endif + + ! effective viscosity setup, return viscous stress tensor + call s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) - ! return data_cmplx_out1d: 1D z, x, y - end subroutine s_mpi_FFT_fwd + call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf) - !< compute inverse FFT, input: data_cmplx_out1d, output: data_real_3D_slabz - subroutine s_mpi_FFT_bwd - integer :: i, j, k - - ! Z inv FFT -#if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) -#else - call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) -#endif - - ! 1D z, x, y -> 3D y-slab - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Nyloc - do k = 1, Nz - data_cmplx_slaby(i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) - end do - end do - end do - - ! transpose y-slab to z-slab - call s_mpi_transpose_slabY2Z - - ! 3D z-slab -> 1D y, x, z - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz(i, j, k) - end do - end do - end do - - ! Y inv FFT -#if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) -#else - call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) -#endif - - ! 1D y, x, z -> 1D x, y, z - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) - end do - end do - end do - - ! X inv FFT -#if defined(MFC_OpenACC) - ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) -#else - call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) -#endif - - ! 1D x, y, z -> 3D z-slab - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, Nx - do j = 1, Ny - do k = 1, Nzloc - data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) - end do - end do - end do - - end subroutine s_mpi_FFT_bwd - - !< setup for calculation of unclosed terms in volume filtered momentum eqn - subroutine s_setup_terms_filtering(q_cons_vf, reynolds_stress, eff_visc) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress - type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc - - integer :: i, j, k, l, q - - ! pseudo turbulent reynolds stress setup - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - !$acc loop seq - do l = 1, num_dims - !$acc loop seq - do q = 1, num_dims - reynolds_stress(l)%vf(q)%sf(i, j, k) = (q_cons_vf(momxb-1+l)%sf(i, j, k) * q_cons_vf(momxb-1+q)%sf(i, j, k)) / q_cons_vf(1)%sf(i, j, k) ! (rho*u x rho*u)/rho = rho*(u x u) - end do - end do - end do - end do - end do - - ! set density and momentum buffers -#ifdef MFC_MPI - do i = 1, momxe - call s_populate_scalarfield_buffers(q_cons_vf(i)) - end do -#else - do i = 1, momxe - q_cons_vf(i)%sf(-buff_size:-1, :, :) = q_cons_vf(i)%sf(m-buff_size+1:m, :, :) - q_cons_vf(i)%sf(m+1:m+buff_size, :, :) = q_cons_vf(i)%sf(0:buff_size-1, :, :) - - q_cons_vf(i)%sf(:, -buff_size:-1, :) = q_cons_vf(i)%sf(:, n-buff_size+1:n, :) - q_cons_vf(i)%sf(:, n+1:n+buff_size, :) = q_cons_vf(i)%sf(:, 0:buff_size-1, :) - - q_cons_vf(i)%sf(:, :, -buff_size:-1) = q_cons_vf(i)%sf(:, :, p-buff_size+1:p) - q_cons_vf(i)%sf(:, :, p+1:p+buff_size) = q_cons_vf(i)%sf(:, :, 0:buff_size-1) - end do -#endif - - ! effective viscosity setup - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - eff_visc(1)%vf(1)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - - eff_visc(2)%vf(2)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - - eff_visc(3)%vf(3)%sf(i, j, k) = mu_visc * (2._wp*(q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)) & - - 2._wp/3._wp*((q_cons_vf(momxb)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (q_cons_vf(momxb+1)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+1)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (q_cons_vf(momxb+2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - - eff_visc(1)%vf(2)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k) & - + (q_cons_vf(momxb+1)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb+1)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_vf(1)%sf(i, j, k)) - - eff_visc(2)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(2)%sf(i, j, k) - - eff_visc(1)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & - + (q_cons_vf(momxb+2)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(momxb+2)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_vf(1)%sf(i, j, k)) - - eff_visc(3)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(3)%sf(i, j, k) - - eff_visc(2)%vf(3)%sf(i, j, k) = mu_visc * ((q_cons_vf(momxb+1)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(momxb+1)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_vf(1)%sf(i, j, k) & - + (q_cons_vf(momxb+2)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(momxb+2)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_vf(1)%sf(i, j, k)) - - eff_visc(3)%vf(2)%sf(i, j, k) = eff_visc(2)%vf(3)%sf(i, j, k) - end do - end do - end do + call s_compute_divergence_stress_tensor(div_pres_visc_stress, pres_visc_stress) end subroutine s_setup_terms_filtering @@ -929,7 +810,7 @@ contains !$acc loop seq do q = 1, num_dims reynolds_stress(l)%vf(q)%sf(i, j, k) = reynolds_stress(l)%vf(q)%sf(i, j, k) & - - (q_cons_filtered(momxb-1+l)%sf(i, j, k) * q_cons_filtered(momxb-1+q)%sf(i, j, k) / q_cons_filtered(1)%sf(i, j, k)) + - (q_cons_filtered(momxb-1+l)%sf(i, j, k) * q_cons_filtered(momxb-1+q)%sf(i, j, k) / q_cons_filtered(1)%sf(i, j, k)) end do end do end do @@ -999,9 +880,10 @@ contains end subroutine s_compute_pseudo_turbulent_reynolds_stress - subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, mag_eff_visc) + subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, mag_eff_visc) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc + type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress type(scalar_field), intent(inout) :: mag_eff_visc real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_eff_visc @@ -1025,41 +907,21 @@ contains end do #endif + ! calculate stress tensor with filtered quantities + call s_compute_viscous_stress_tensor(visc_stress, q_cons_filtered) + ! calculate eff_visc !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m do j = 0, n do k = 0, p - eff_visc(1)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(1)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - - eff_visc(2)%vf(2)%sf(i, j, k) = eff_visc(2)%vf(2)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - - eff_visc(3)%vf(3)%sf(i, j, k) = eff_visc(3)%vf(3)%sf(i, j, k) - mu_visc * (2._wp*(q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)) & - - 2._wp/3._wp*((q_cons_filtered(momxb)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (q_cons_filtered(momxb+1)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+1)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (q_cons_filtered(momxb+2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k)))) - - eff_visc(1)%vf(2)%sf(i, j, k) = eff_visc(1)%vf(2)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k) & - + (q_cons_filtered(momxb+1)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb+1)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_filtered(1)%sf(i, j, k)) - - eff_visc(2)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(2)%sf(i, j, k) - - eff_visc(1)%vf(3)%sf(i, j, k) = eff_visc(1)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & - + (q_cons_filtered(momxb+2)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(momxb+2)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k))/(2._wp*dx(i))/q_cons_filtered(1)%sf(i, j, k)) - - eff_visc(3)%vf(1)%sf(i, j, k) = eff_visc(1)%vf(3)%sf(i, j, k) - - eff_visc(2)%vf(3)%sf(i, j, k) = eff_visc(2)%vf(3)%sf(i, j, k) - mu_visc * ((q_cons_filtered(momxb+1)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(momxb+1)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1))/(2._wp*dz(k))/q_cons_filtered(1)%sf(i, j, k) & - + (q_cons_filtered(momxb+2)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(momxb+2)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k))/(2._wp*dy(j))/q_cons_filtered(1)%sf(i, j, k)) - - eff_visc(3)%vf(2)%sf(i, j, k) = eff_visc(2)%vf(3)%sf(i, j, k) - + !$acc loop seq + do l = 1, num_dims + !$acc loop seq + do q = 1, num_dims + eff_visc(l)%vf(q)%sf(i, j, k) = eff_visc(l)%vf(q)%sf(i, j, k) - visc_stress(l)%vf(q)%sf(i, j, k) + end do + end do end do end do end do @@ -1109,9 +971,9 @@ contains !$acc loop seq do l = 1, num_dims div_eff_visc(l, i, j, k) = (eff_visc(l)%vf(1)%sf(i+1, j, k) - eff_visc(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (eff_visc(l)%vf(2)%sf(i, j+1, k) - eff_visc(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (eff_visc(l)%vf(3)%sf(i, j, k+1) - eff_visc(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) - end do + + (eff_visc(l)%vf(2)%sf(i, j+1, k) - eff_visc(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & + + (eff_visc(l)%vf(3)%sf(i, j, k+1) - eff_visc(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) + end do end do end do end do @@ -1138,15 +1000,233 @@ contains do j = 0, n do k = 0, p mag_int_mom_exch%sf(i, j, k) = sqrt(int_mom_exch(1)%sf(i, j, k)**2 & - + int_mom_exch(2)%sf(i, j, k)**2 & - + int_mom_exch(3)%sf(i, j, k)**2) + + int_mom_exch(2)%sf(i, j, k)**2 & + + int_mom_exch(3)%sf(i, j, k)**2) end do end do end do end subroutine s_compute_interphase_momentum_exchange + + !< transpose domain from z-slabs to y-slabs on each processor + subroutine s_mpi_transpose_slabZ2Y + complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) + integer :: dest_rank, src_rank + integer :: i, j, k + + allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) + allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) + + !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slabz(i, j+dest_rank*Nyloc, k) + end do + end do + end do + end do + + call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + data_cmplx_slaby(i, j, k+src_rank*Nzloc) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + + deallocate(sendbuf, recvbuf) + end subroutine s_mpi_transpose_slabZ2Y + + !< transpose domain from y-slabs to z-slabs on each processor + subroutine s_mpi_transpose_slabY2Z + complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) + integer :: dest_rank, src_rank + integer :: i, j, k + + allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) + allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) + + !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slaby(i, j, k+dest_rank*Nzloc) + end do + end do + end do + end do + + call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + data_cmplx_slabz(i, j+src_rank*Nyloc, k) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + + deallocate(sendbuf, recvbuf) + end subroutine s_mpi_transpose_slabY2Z + + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_mpi_FFT_fwd + integer :: i, j, k + + ! 3D z-slab -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz(i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! transpose z-slab to y-slab + call s_mpi_transpose_slabZ2Y + + ! 3D y-slab -> 1D z, x, y + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby(i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! return data_cmplx_out1d: 1D z, x, y + end subroutine s_mpi_FFT_fwd + + !< compute inverse FFT, input: data_cmplx_out1d, output: data_real_3D_slabz + subroutine s_mpi_FFT_bwd + integer :: i, j, k + + ! Z inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! 1D z, x, y -> 3D y-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby(i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + ! transpose y-slab to z-slab + call s_mpi_transpose_slabY2Z + + ! 3D z-slab -> 1D y, x, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz(i, j, k) + end do + end do + end do + + ! Y inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! X inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + + ! 1D x, y, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + end do + end do + end do + + end subroutine s_mpi_FFT_bwd + subroutine s_finalize_fftw_explicit_filter_module + integer :: i, j + @:DEALLOCATE(fluid_indicator_function%sf) @:DEALLOCATE(filtered_fluid_indicator_function%sf) @@ -1155,6 +1235,27 @@ contains end do @:DEALLOCATE(q_cons_filtered) + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(visc_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(visc_stress(i)%vf) + end do + @:DEALLOCATE(visc_stress) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(pres_visc_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(pres_visc_stress(i)%vf) + end do + @:DEALLOCATE(pres_visc_stress) + + do i = 1, num_dims + @:DEALLOCATE(div_pres_visc_stress(i)%sf) + end do + @:DEALLOCATE(div_pres_visc_stress) + do i = 1, num_dims do j = 1, num_dims @:DEALLOCATE(reynolds_stress(i)%vf(j)%sf) diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 4c3ae9b62b..80b3e4ecf0 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -56,8 +56,11 @@ program p_main call s_initialize_gpu_vars() call nvtxEndRange - if (volume_filtering_momentum_eqn) call s_initialize_filtering_kernel() call s_initialize_fluid_indicator_function() + if (volume_filtering_momentum_eqn) then + call s_initialize_filtering_kernel() + call s_initialize_filtered_fluid_indicator_function() + end if ! Setting the time-step iterator to the first time-step if (cfl_dt) then diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 212d7a6cb6..2b46a4cb05 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -301,13 +301,13 @@ def analytic(self): 'cont_damage_s': ParamType.REAL, 'alpha_bar': ParamType.REAL, 'compute_CD': ParamType.LOG, - 'mu_visc': ParamType.REAL, 'u_inf_ref': ParamType.REAL, 'rho_inf_ref': ParamType.REAL, 'T_inf_ref': ParamType.REAL, 'periodic_forcing': ParamType.LOG, 'volume_filtering_momentum_eqn': ParamType.LOG, 'compute_autocorrelation': ParamType.LOG, + 't_step_stat_start': ParamType.INT, }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', From e004d229ac09a49f4d5e1fed81917c04f51540b2 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Wed, 27 Aug 2025 12:46:55 -0500 Subject: [PATCH 07/30] gpu bug fix with periodic forcing --- runs/3d_1sphere_periodic/case.py | 4 +- runs/3d_drag_test/case.py | 2 +- runs/phi01/case.py | 19 +++- src/common/m_mpi_common.fpp | 20 ++-- src/post_process/m_data_input.f90 | 26 ++--- src/post_process/m_start_up.f90 | 6 +- src/simulation/m_additional_forcing.fpp | 110 ++++++++----------- src/simulation/m_compute_particle_forces.fpp | 10 +- src/simulation/m_compute_statistics.fpp | 23 ++-- src/simulation/m_data_output.fpp | 14 +-- src/simulation/m_global_parameters.fpp | 8 +- src/simulation/m_mpi_proxy.fpp | 6 +- src/simulation/m_start_up.fpp | 79 +++++++------ src/simulation/m_volume_filtering.fpp | 51 +++++++-- toolchain/mfc/run/case_dicts.py | 3 +- toolchain/templates/delta.mako | 2 +- 16 files changed, 204 insertions(+), 179 deletions(-) diff --git a/runs/3d_1sphere_periodic/case.py b/runs/3d_1sphere_periodic/case.py index d8760b7909..05f7efc429 100644 --- a/runs/3d_1sphere_periodic/case.py +++ b/runs/3d_1sphere_periodic/case.py @@ -136,8 +136,8 @@ # new case additions "periodic_forcing": "T", "periodic_ibs": "T", - #"compute_CD_vi": "F", - #"compute_CD_si": "F", + #"compute_particle_drag_vi": "F", + #"compute_particle_drag_si": "F", #"volume_filtering_momentum_eqn": "T", "u_inf_ref": v1, diff --git a/runs/3d_drag_test/case.py b/runs/3d_drag_test/case.py index 9a78272a88..00eb6a3c30 100644 --- a/runs/3d_drag_test/case.py +++ b/runs/3d_drag_test/case.py @@ -133,7 +133,7 @@ "fluid_pp(1)%Re(1)": Re, # new case additions - "compute_CD": "T", + "compute_particle_drag": "T", "u_inf_ref": v1, "rho_inf_ref": rho, "T_inf_ref": T, diff --git a/runs/phi01/case.py b/runs/phi01/case.py index 1d5b26d462..3034d52b6d 100644 --- a/runs/phi01/case.py +++ b/runs/phi01/case.py @@ -2,6 +2,15 @@ import math import numpy as np +''' +need to store +full stats of unclosed term tensors (1, 2, 3, 4) - only at end time +stats of flow quantities - only at end time +flow quantities +filtered fluid indicator function +drag force on each particle +''' + Mu = 1.84e-05 gam_a = 1.4 R = 287.0 @@ -25,8 +34,8 @@ #print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow dt = 4.0E-06 -Nt = 10 -t_save = 1 +Nt = 200 +t_save = 10 Nx = 99 Ny = 99 @@ -70,6 +79,7 @@ "t_step_start": 0, "t_step_stop": Nt, # 3000 "t_step_save": t_save, # 10 + "t_step_stat_start": 50, # Simulation Algorithm Parameters # Only one patches are necessary, the air tube "num_patches": 1, @@ -137,13 +147,13 @@ # Fluids Physical Parameters "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) "fluid_pp(1)%pi_inf": 0, - "fluid_pp(1)%Re(1)": Re, + "fluid_pp(1)%Re(1)": 1.0 / mu, # new case additions "periodic_forcing": "T", "periodic_ibs": "T", - "compute_CD": "F", "volume_filtering_momentum_eqn": "T", + "filter_width": 3.0*D/2, "u_inf_ref": v1, "rho_inf_ref": rho, @@ -151,7 +161,6 @@ "store_levelset": "F", "slab_domain_decomposition": "T", - "compute_autocorrelation": "T", } case_dict.update(ib_dict) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index d3dcab1ac7..c352412c75 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -174,9 +174,9 @@ contains type(scalar_field), & intent(in), optional :: beta - type(scalar_field), dimension(2:4), intent(in), optional :: stat_reynolds_stress - type(scalar_field), dimension(2:4), intent(in), optional :: stat_eff_visc - type(scalar_field), dimension(2:4), intent(in), optional :: stat_int_mom_exch + type(scalar_field), dimension(1:4), intent(in), optional :: stat_reynolds_stress + type(scalar_field), dimension(1:4), intent(in), optional :: stat_eff_visc + type(scalar_field), dimension(1:4), intent(in), optional :: stat_int_mom_exch integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start @@ -192,7 +192,7 @@ contains if (present(beta)) then alt_sys = sys_size + 1 else if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - alt_sys = sys_size + 9 + alt_sys = sys_size + 12 else alt_sys = sys_size end if @@ -202,14 +202,14 @@ contains end do if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - do i = sys_size+1, sys_size+3 - MPI_IO_DATA%var(i)%sf => stat_reynolds_stress(i-sys_size+1)%sf(0:m, 0:n, 0:p) + do i = sys_size+1, sys_size+4 + MPI_IO_DATA%var(i)%sf => stat_reynolds_stress(i-sys_size)%sf(0:m, 0:n, 0:p) end do - do i = sys_size+4, sys_size+6 - MPI_IO_DATA%var(i)%sf => stat_eff_visc(i-sys_size-2)%sf(0:m, 0:n, 0:p) + do i = sys_size+5, sys_size+8 + MPI_IO_DATA%var(i)%sf => stat_eff_visc(i-sys_size-4)%sf(0:m, 0:n, 0:p) end do - do i = sys_size+7, sys_size+9 - MPI_IO_DATA%var(i)%sf => stat_int_mom_exch(i-sys_size-5)%sf(0:m, 0:n, 0:p) + do i = sys_size+9, sys_size+12 + MPI_IO_DATA%var(i)%sf => stat_int_mom_exch(i-sys_size-8)%sf(0:m, 0:n, 0:p) end do end if diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 1efc1b97d3..d778eeb7fa 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -1344,7 +1344,7 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(-j, 0:n, 0:p) = & q_particle%sf((m + 1) - j, 0:n, 0:p) else - do i = 2, 4 + do i = 1, 4 stat_reynolds_stress(i)%sf(-j, 0:n, 0:p) = & stat_reynolds_stress(i)%sf((m + 1) - j, 0:n, 0:p) stat_eff_visc(i)%sf(-j, 0:n, 0:p) = & @@ -1375,7 +1375,7 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(m + j, 0:n, 0:p) = & q_particle%sf(j - 1, 0:n, 0:p) else - do i = 2, 4 + do i = 1, 4 stat_reynolds_stress(i)%sf(m + j, 0:n, 0:p) = & stat_reynolds_stress(i)%sf(j - 1, 0:n, 0:p) stat_eff_visc(i)%sf(m + j, 0:n, 0:p) = & @@ -1413,7 +1413,7 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, -j, 0:p) = & q_particle%sf(:, (n + 1) - j, 0:p) else - do i = 2, 4 + do i = 1, 4 stat_reynolds_stress(i)%sf(:, -j, 0:p) = & stat_reynolds_stress(i)%sf(:, (n + 1) - j, 0:p) stat_eff_visc(i)%sf(:, -j, 0:p) = & @@ -1444,7 +1444,7 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, n + j, 0:p) = & q_particle%sf(:, j - 1, 0:p) else - do i = 2, 4 + do i = 1, 4 stat_reynolds_stress(i)%sf(:, n + j, 0:p) = & stat_reynolds_stress(i)%sf(:, j - 1, 0:p) stat_eff_visc(i)%sf(:, n + j, 0:p) = & @@ -1482,7 +1482,7 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, :, -j) = & q_particle%sf(:, :, (p + 1) - j) else - do i = 2, 4 + do i = 1, 4 stat_reynolds_stress(i)%sf(:, :, -j) = & stat_reynolds_stress(i)%sf(:, :, (p + 1) - j) stat_eff_visc(i)%sf(:, :, -j) = & @@ -1514,7 +1514,7 @@ subroutine s_populate_filtered_variables_buffer_regions(q_particle) q_particle%sf(:, :, p + j) = & q_particle%sf(:, :, j - 1) else - do i = 2, 4 + do i = 1, 4 stat_reynolds_stress(i)%sf(:, :, p + j) = & stat_reynolds_stress(i)%sf(:, :, j - 1) stat_eff_visc(i)%sf(:, :, p + j) = & @@ -1559,9 +1559,9 @@ subroutine s_initialize_data_input_module allocate (q_prim_vf(1:sys_size)) if (bubbles_lagrange) allocate (q_particle(1)) - if (q_filtered_wrt) allocate (stat_reynolds_stress(2:4)) - if (q_filtered_wrt) allocate (stat_eff_visc(2:4)) - if (q_filtered_wrt) allocate (stat_int_mom_exch(2:4)) + if (q_filtered_wrt) allocate (stat_reynolds_stress(1:4)) + if (q_filtered_wrt) allocate (stat_eff_visc(1:4)) + if (q_filtered_wrt) allocate (stat_int_mom_exch(1:4)) ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the @@ -1601,7 +1601,7 @@ subroutine s_initialize_data_input_module end if if (q_filtered_wrt) then - do i = 2, 4 + do i = 1, 4 allocate (stat_reynolds_stress(i)%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & -buff_size:p + buff_size)) @@ -1707,15 +1707,15 @@ subroutine s_finalize_data_input_module end if if (q_filtered_wrt) then - do i = 2, 4 + do i = 1, 4 deallocate (stat_reynolds_stress(i)%sf) end do deallocate(stat_reynolds_stress) - do i = 2, 4 + do i = 1, 4 deallocate (stat_eff_visc(i)%sf) end do deallocate(stat_eff_visc) - do i = 2, 4 + do i = 1, 4 deallocate (stat_int_mom_exch(i)%sf) end do deallocate(stat_int_mom_exch) diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index b454764c3e..481181064c 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -328,21 +328,21 @@ subroutine s_save_data(t_step, varname, pres, c, H) ! Adding filtered quantities if (q_filtered_wrt) then ! filtered cons vars - do i = 2, 4 + do i = 1, 4 q_sf = stat_reynolds_stress(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) write (varname, '(A,I0)') 'stat_reynolds_stresss', i call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' end do - do i = 2, 4 + do i = 1, 4 q_sf = stat_eff_visc(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) write (varname, '(A,I0)') 'stat_eff_viscs', i call s_write_variable_to_formatted_database_file(varname, t_step) varname(:) = ' ' end do - do i = 2, 4 + do i = 1, 4 q_sf = stat_int_mom_exch(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) write (varname, '(A,I0)') 'stat_int_mom_exchs', i call s_write_variable_to_formatted_database_file(varname, t_step) diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp index cc90cce4ef..c69ab97db1 100644 --- a/src/simulation/m_additional_forcing.fpp +++ b/src/simulation/m_additional_forcing.fpp @@ -17,29 +17,25 @@ module m_additional_forcing s_add_periodic_forcing, s_finalize_additional_forcing_module, & s_compute_phase_average, s_compute_periodic_forcing; - real(wp), allocatable, dimension(:) :: q_bar ! 1:3 rho*u, 4 rho, 5 T type(scalar_field), allocatable, dimension(:) :: q_periodic_force - real(wp), allocatable, dimension(:) :: q_spatial_avg - real(wp), allocatable, dimension(:), public :: q_spatial_avg_glb ! 1:3 rho*u, 4 rho, 5 T real(wp) :: volfrac_phi integer :: N_x_total_glb + real(wp) :: spatial_rho, spatial_u + real(wp) :: phase_rho, phase_u - !$acc declare create(q_bar, q_periodic_force, q_spatial_avg, q_spatial_avg_glb, volfrac_phi, N_x_total_glb) + !$acc declare create(q_periodic_force, volfrac_phi, N_x_total_glb) + !$acc declare create(spatial_rho, spatial_u, phase_rho, phase_u) contains subroutine s_initialize_additional_forcing_module integer :: i - if (periodic_forcing) then - @:ALLOCATE(q_bar(1:5)) - @:ALLOCATE(q_periodic_force(1:8)) - do i = 1, 8 - @:ALLOCATE(q_periodic_force(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(q_periodic_force(i)) - end do - @:ALLOCATE(q_spatial_avg(1:5)) - @:ALLOCATE(q_spatial_avg_glb(1:5)) - end if + + @:ALLOCATE(q_periodic_force(1:3)) + do i = 1, 3 + @:ALLOCATE(q_periodic_force(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(q_periodic_force(i)) + end do volfrac_phi = num_ibs * 4._wp/3._wp * pi * patch_ib(1)%radius**3 / ((x_domain%end - x_domain%beg)*(y_domain%end - y_domain%beg)*(z_domain%end - z_domain%beg)) !$acc update device(volfrac_phi) @@ -57,60 +53,52 @@ contains do i = 0, m do j = 0, n do k = 0, p - rhs_vf(1)%sf(i, j, k) = rhs_vf(1)%sf(i, j, k) + q_periodic_force(7)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! continuity - rhs_vf(2)%sf(i, j, k) = rhs_vf(2)%sf(i, j, k) + q_periodic_force(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! x momentum - rhs_vf(5)%sf(i, j, k) = rhs_vf(5)%sf(i, j, k) + (q_periodic_force(4)%sf(i, j, k) + q_periodic_force(8)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) ! energy + rhs_vf(1)%sf(i, j, k) = rhs_vf(1)%sf(i, j, k) + q_periodic_force(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! continuity + rhs_vf(2)%sf(i, j, k) = rhs_vf(2)%sf(i, j, k) + q_periodic_force(2)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! x momentum + rhs_vf(5)%sf(i, j, k) = rhs_vf(5)%sf(i, j, k) + q_periodic_force(3)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! energy end do end do end do end subroutine s_add_periodic_forcing + !< compute the space and time average of quantities subroutine s_compute_phase_average(q_cons_vf, t_step) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer, intent(in) :: t_step + real(wp) :: spatial_rho_glb, spatial_u_glb integer :: i, j, k - !$acc loop seq - do i = 1, 5 - q_spatial_avg(i) = 0._wp - end do + ! zero spatial averages + spatial_rho = 0._wp + spatial_u = 0._wp + !$acc update device(spatial_rho, spatial_u) - ! spatial average - !$acc parallel loop collapse(3) gang vector default(present) reduction(+:q_spatial_avg(:)) + ! compute spatial averages + !$acc parallel loop collapse(3) gang vector default(present) reduction(+:spatial_rho, spatial_u) do i = 0, m do j = 0, n do k = 0, p - q_spatial_avg(4) = q_spatial_avg(4) + q_cons_vf(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) - q_spatial_avg(5) = q_spatial_avg(5) + (0.4_wp/287._wp * (q_cons_vf(5)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) & - - 0.5_wp * ((q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2 & - + (q_cons_vf(3)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2 & - + (q_cons_vf(4)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k))**2))) * fluid_indicator_function%sf(i, j, k) - - q_spatial_avg(1) = q_spatial_avg(1) + (q_cons_vf(2)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) - q_spatial_avg(2) = q_spatial_avg(2) + (q_cons_vf(3)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) - q_spatial_avg(3) = q_spatial_avg(3) + (q_cons_vf(4)%sf(i, j, k)) * fluid_indicator_function%sf(i, j, k) + spatial_rho = spatial_rho + q_cons_vf(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! rho + spatial_u = spatial_u + q_cons_vf(2)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! u end do end do end do - !$acc update host(q_spatial_avg(:)) + !$acc update host(spatial_rho, spatial_u) - do i = 1, 5 - call s_mpi_allreduce_sum(q_spatial_avg(i), q_spatial_avg_glb(i)) - end do + ! reduction sum across entire domain + call s_mpi_allreduce_sum(spatial_rho, spatial_rho_glb) + call s_mpi_allreduce_sum(spatial_u, spatial_u_glb) - !$acc update device(q_spatial_avg_glb(:)) + ! compute phase averages + phase_rho = phase_rho + (spatial_rho_glb / real(N_x_total_glb, wp) - phase_rho) / real(t_step, wp) + phase_u = phase_u + (spatial_u_glb / real(N_x_total_glb, wp) - phase_u) / real(t_step, wp) + !$acc update device(phase_rho, phase_u) - !$acc loop seq - do i = 1, 5 - q_spatial_avg_glb(i) = q_spatial_avg_glb(i) / real(N_x_total_glb, wp) - end do + if (proc_rank == 0) then + print *, t_step, 'rho', phase_rho, 'rho*u', phase_u + end if - ! time average - !$acc loop seq - do i = 1, 5 - q_bar(i) = ( (q_spatial_avg_glb(i) + (t_step - 1._wp)*q_bar(i)) / t_step ) - end do end subroutine s_compute_phase_average !< computes the periodic forcing terms described in Khalloufi and Capecelatro @@ -123,21 +111,14 @@ contains do i = 0, m do j = 0, n do k = 0, p + ! f_rho + q_periodic_force(1)%sf(i, j, k) = (rho_inf_ref - phase_rho/(1._wp - volfrac_phi)) / dt + ! f_u - q_periodic_force(1)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - q_bar(1)/(1._wp - volfrac_phi)) / dt - q_periodic_force(2)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - q_bar(2)/(1._wp - volfrac_phi)) / dt - q_periodic_force(3)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - q_bar(3)/(1._wp - volfrac_phi)) / dt + q_periodic_force(2)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - phase_u/(1._wp - volfrac_phi)) / dt ! u*f_u - q_periodic_force(4)%sf(i, j, k) = q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(1)%sf(i, j, k) - q_periodic_force(5)%sf(i, j, k) = q_cons_vf(3)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(2)%sf(i, j, k) - q_periodic_force(6)%sf(i, j, k) = q_cons_vf(4)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(3)%sf(i, j, k) - - ! f_rho - q_periodic_force(7)%sf(i, j, k) = (rho_inf_ref - q_bar(4)/(1._wp - volfrac_phi)) / dt - - ! f_T - q_periodic_force(8)%sf(i, j, k) = (q_cons_vf(1)%sf(i, j, k) / 1.4_wp) * (T_inf_ref - q_bar(5)/(1._wp - volfrac_phi)) / dt + q_periodic_force(3)%sf(i, j, k) = q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(2)%sf(i, j, k) end do end do end do @@ -145,15 +126,10 @@ contains subroutine s_finalize_additional_forcing_module integer :: i - if (periodic_forcing) then - @:DEALLOCATE(q_bar) - do i = 1, 8 - @:DEALLOCATE(q_periodic_force(i)%sf) - end do - @:DEALLOCATE(q_periodic_force) - @:DEALLOCATE(q_spatial_avg) - @:DEALLOCATE(q_spatial_avg_glb) - end if + do i = 1, 3 + @:DEALLOCATE(q_periodic_force(i)%sf) + end do + @:DEALLOCATE(q_periodic_force) end subroutine s_finalize_additional_forcing_module end module m_additional_forcing \ No newline at end of file diff --git a/src/simulation/m_compute_particle_forces.fpp b/src/simulation/m_compute_particle_forces.fpp index 8a1ef5f092..9a2dbe8f09 100644 --- a/src/simulation/m_compute_particle_forces.fpp +++ b/src/simulation/m_compute_particle_forces.fpp @@ -9,6 +9,8 @@ module m_compute_particle_forces use m_mpi_proxy + use m_volume_filtering + implicit none private; public :: s_initialize_particle_forces_module, & @@ -21,9 +23,7 @@ module m_compute_particle_forces contains subroutine s_initialize_particle_forces_module - if (compute_CD) then - @:ALLOCATE(FD_calc(0:num_ibs)) - end if + @:ALLOCATE(FD_calc(0:num_ibs)) end subroutine s_initialize_particle_forces_module @@ -63,9 +63,7 @@ contains end subroutine s_compute_drag_coefficient subroutine s_finalize_particle_forces_module - if (compute_CD) then - @:DEALLOCATE(FD_calc) - end if + @:DEALLOCATE(FD_calc) end subroutine s_finalize_particle_forces_module diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp index 93b8d6502d..9d574c72f7 100644 --- a/src/simulation/m_compute_statistics.fpp +++ b/src/simulation/m_compute_statistics.fpp @@ -9,6 +9,8 @@ module m_compute_statistics use m_additional_forcing + use m_nvtx + implicit none private; public :: s_initialize_statistics_module, s_finalize_statistics_module, & @@ -52,20 +54,20 @@ contains @:ACC_SETUP_SFs(Msn_int_mom_exch(i)) end do - @:ALLOCATE(stat_reynolds_stress(2:4)) - do i = 2, 4 + @:ALLOCATE(stat_reynolds_stress(1:4)) + do i = 1, 4 @:ALLOCATE(stat_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) @:ACC_SETUP_SFs(stat_reynolds_stress(i)) end do - @:ALLOCATE(stat_eff_visc(2:4)) - do i = 2, 4 + @:ALLOCATE(stat_eff_visc(1:4)) + do i = 1, 4 @:ALLOCATE(stat_eff_visc(i)%sf(0:m, 0:n, 0:p)) @:ACC_SETUP_SFs(stat_eff_visc(i)) end do - @:ALLOCATE(stat_int_mom_exch(2:4)) - do i = 2, 4 + @:ALLOCATE(stat_int_mom_exch(1:4)) + do i = 1, 4 @:ALLOCATE(stat_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) @:ACC_SETUP_SFs(stat_int_mom_exch(i)) end do @@ -125,7 +127,7 @@ contains subroutine s_compute_234_order_statistics(ns, Msn, q_stat) type(scalar_field), dimension(1:4), intent(in) :: Msn - type(scalar_field), dimension(2:4), intent(inout) :: q_stat + type(scalar_field), dimension(1:4), intent(inout) :: q_stat real(wp), intent(in) :: ns integer :: i, j, k @@ -134,6 +136,7 @@ contains do i = 0, m do j = 0, n do k = 0, p + q_stat(1)%sf(i, j, k) = Msn(1)%sf(i, j, k) q_stat(2)%sf(i, j, k) = Msn(2)%sf(i, j, k) / (ns - 1._wp) q_stat(3)%sf(i, j, k) = sqrt(ns - 1._wp) / (ns - 2._wp) * ns * Msn(3)%sf(i, j, k) / (Msn(2)%sf(i, j, k)**1.5) q_stat(4)%sf(i, j, k) = (ns - 1._wp) / ((ns - 2._wp) * (ns - 3._wp)) * ((ns + 1._wp) * (ns * Msn(4)%sf(i, j, k) / (Msn(2)%sf(i, j, k)**2) - 3._wp) + 6._wp) @@ -160,17 +163,17 @@ contains end do @:DEALLOCATE(Msn_int_mom_exch) - do i = 2, 4 + do i = 1, 4 @:DEALLOCATE(stat_reynolds_stress(i)%sf) end do @:DEALLOCATE(stat_reynolds_stress) - do i = 2, 4 + do i = 1, 4 @:DEALLOCATE(stat_eff_visc(i)%sf) end do @:DEALLOCATE(stat_eff_visc) - do i = 2, 4 + do i = 1, 4 @:DEALLOCATE(stat_int_mom_exch(i)%sf) end do @:DEALLOCATE(stat_int_mom_exch) diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index f43cebc798..fd783bef1f 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -94,9 +94,9 @@ contains type(scalar_field), & intent(inout), optional :: beta - type(scalar_field), dimension(2:4), intent(inout), optional :: stat_reynolds_stress - type(scalar_field), dimension(2:4), intent(inout), optional :: stat_eff_visc - type(scalar_field), dimension(2:4), intent(inout), optional :: stat_int_mom_exch + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_reynolds_stress + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_eff_visc + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_int_mom_exch if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) @@ -796,9 +796,9 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta - type(scalar_field), dimension(2:4), intent(inout), optional :: stat_reynolds_stress - type(scalar_field), dimension(2:4), intent(inout), optional :: stat_eff_visc - type(scalar_field), dimension(2:4), intent(inout), optional :: stat_int_mom_exch + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_reynolds_stress + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_eff_visc + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_int_mom_exch #ifdef MFC_MPI @@ -821,7 +821,7 @@ contains if (present(beta)) then alt_sys = sys_size + 1 else if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - alt_sys = sys_size + 9 + alt_sys = sys_size + 12 else alt_sys = sys_size end if diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index bcd8c74dec..db2eb1d298 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -502,7 +502,7 @@ module m_global_parameters !> @} logical :: periodic_ibs - logical :: compute_CD + logical :: compute_particle_drag real(wp) :: u_inf_ref !< reference freestream velocity real(wp) :: rho_inf_ref !< reference freestream density real(wp) :: T_inf_ref !< reference freestream temperature @@ -512,8 +512,9 @@ module m_global_parameters logical :: slab_domain_decomposition logical :: compute_autocorrelation integer :: t_step_stat_start + real(wp) :: filter_width - !$acc declare create(u_inf_ref, rho_inf_ref, T_inf_ref) + !$acc declare create(u_inf_ref, rho_inf_ref, T_inf_ref, filter_width) contains @@ -791,7 +792,7 @@ contains #:endif periodic_ibs = .false. - compute_CD = .false. + compute_particle_drag = .false. u_inf_ref = dflt_real rho_inf_ref = dflt_real T_inf_ref = dflt_real @@ -801,6 +802,7 @@ contains slab_domain_decomposition = .false. compute_autocorrelation = .false. t_step_stat_start = dflt_int + filter_width = dflt_real end subroutine s_assign_default_values_to_user_inputs diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 730f5ead50..bb359a4bed 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -75,7 +75,7 @@ contains & 'bc_y%beg', 'bc_y%end', 'bc_z%beg', 'bc_z%end', 'fd_order', & & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & & 'R0_type', 'num_source', 'relax_model', 'num_ibs', 'n_start', & - & 'num_bc_patches'] + & 'num_bc_patches', 't_step_stat_start'] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -92,7 +92,7 @@ contains & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage', & - & 'periodic_ibs', 'compute_CD', 'periodic_forcing', 'volume_filtering_momentum_eqn', & + & 'periodic_ibs', 'compute_particle_drag', 'periodic_forcing', 'volume_filtering_momentum_eqn', & & 'store_levelset', 'slab_domain_decomposition', 'compute_autocorrelation' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -133,7 +133,7 @@ contains & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0', & & 'tau_star', 'cont_damage_s', 'alpha_bar', 'u_inf_ref', & - & 'rho_inf_ref', 'T_inf_ref', 't_step_stat_start' ] + & 'rho_inf_ref', 'T_inf_ref', 'filter_width' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 5ac4e4dad6..c08a2ce7eb 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -189,9 +189,10 @@ contains rkck_adap_dt, rkck_tolerance, & hyperelasticity, R0ref, num_bc_patches, Bx0, powell, & cont_damage, tau_star, cont_damage_s, alpha_bar, & - periodic_ibs, compute_CD, u_inf_ref, rho_inf_ref, T_inf_ref, & + periodic_ibs, compute_particle_drag, u_inf_ref, rho_inf_ref, T_inf_ref, & periodic_forcing, volume_filtering_momentum_eqn, store_levelset, & - slab_domain_decomposition, compute_autocorrelation, t_step_stat_start + slab_domain_decomposition, compute_autocorrelation, t_step_stat_start, & + filter_width ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. @@ -1320,6 +1321,42 @@ contains call s_compute_derived_variables(t_step) + ! ! Volume filter flow variables, compute unclosed terms and their statistics + ! if (volume_filtering_momentum_eqn) then + ! if (t_step > t_step_stat_start) then + ! call nvtxStartRange('VOLUME-FILTER-MOMENTUM-EQUATION') + ! call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) + ! call nvtxEndRange + + ! call nvtxStartRange('COMPUTE-STATISTICS') + ! call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + ! call nvtxEndRange + + ! ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) + ! ! write(101, *) stat_reynolds_stress(2)%sf(10, 10, 10), stat_reynolds_stress(3)%sf(10, 10, 10), stat_reynolds_stress(4)%sf(10, 10, 10) + ! end if + + ! ! TEMPORARY, for v+v + ! ! if (t_step == 1) then + ! ! open(unit=100, file='dat_reynolds_stress.txt', status='replace', action='write') + ! ! open(unit=101, file='stat_reynolds_stress.txt', status='replace', action='write') + ! ! end if + ! ! if (t_step == 999) then + ! ! close(100) + ! ! close(101) + ! ! end if + + ! call nvtxStartRange("COMPUTE-PARTICLE-FORCES") + ! call s_compute_particle_forces() + ! call nvtxEndRange + ! end if + + ! if (periodic_forcing) then + ! call nvtxStartRange("COMPUTE-PERIODIC-FORCING") + ! call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) + ! call s_compute_periodic_forcing(q_cons_ts(1)%vf) + ! call nvtxEndRange + ! end if #ifdef DEBUG print *, 'Computed derived vars' @@ -1343,34 +1380,6 @@ contains if (relax) call s_infinite_relaxation_k(q_cons_ts(1)%vf) - ! Volume filter flow variables, compute unclosed terms and their statistics - if (volume_filtering_momentum_eqn) then - call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) - - if (t_step > t_step_stat_start) then - call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) - - ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) - ! write(101, *) stat_reynolds_stress(2)%sf(10, 10, 10), stat_reynolds_stress(3)%sf(10, 10, 10), stat_reynolds_stress(4)%sf(10, 10, 10) - end if - - ! TEMPORARY, for v+v - ! if (t_step == 1) then - ! open(unit=100, file='dat_reynolds_stress.txt', status='replace', action='write') - ! open(unit=101, file='stat_reynolds_stress.txt', status='replace', action='write') - ! end if - ! if (t_step == 999) then - ! close(100) - ! close(101) - ! end if - - end if - - if (periodic_forcing) then - call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) - call s_compute_periodic_forcing(q_cons_ts(1)%vf) - end if - ! Time-stepping loop controls t_step = t_step + 1 @@ -1450,7 +1459,7 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") - do i = 2, 4 + do i = 1, 4 !$acc update host(stat_reynolds_stress(i)%sf) !$acc update host(stat_eff_visc(i)%sf) !$acc update host(stat_int_mom_exch(i)%sf) @@ -1607,7 +1616,7 @@ contains if (mhd .and. powell) call s_initialize_mhd_powell_module - call s_initialize_particle_forces_module() + if (compute_particle_drag) call s_initialize_particle_forces_module() if (periodic_forcing) call s_initialize_additional_forcing_module() if (volume_filtering_momentum_eqn) then call s_initialize_fftw_explicit_filter_module() @@ -1726,7 +1735,7 @@ contains !$acc update device(ib_markers%sf) end if - !$acc update device(u_inf_ref, rho_inf_ref, T_inf_ref) + !$acc update device(u_inf_ref, rho_inf_ref, T_inf_ref, filter_width) end subroutine s_initialize_gpu_vars @@ -1756,8 +1765,8 @@ contains if (bodyForces) call s_finalize_body_forces_module() if (mhd .and. powell) call s_finalize_mhd_powell_module - call s_finalize_particle_forces_module() - call s_finalize_additional_forcing_module() + if (compute_particle_drag) call s_finalize_particle_forces_module() + if (periodic_forcing) call s_finalize_additional_forcing_module() if (volume_filtering_momentum_eqn) call s_finalize_fftw_explicit_filter_module ! Terminating MPI execution environment diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index fa44071328..c954f6eac6 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -30,7 +30,7 @@ module m_volume_filtering s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & s_initialize_filtered_fluid_indicator_function, s_finalize_fftw_explicit_filter_module, & s_apply_fftw_filter_cons, s_volume_filter_momentum_eqn, s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield, & - s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, & + s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity, s_compute_interphase_momentum_exchange @@ -55,21 +55,25 @@ module m_volume_filtering type(scalar_field), allocatable, dimension(:) :: div_pres_visc_stress ! unclosed terms in volume filtered momentum equation - type(vector_field), allocatable, dimension(:) :: reynolds_stress - type(vector_field), allocatable, dimension(:) :: eff_visc - type(scalar_field), allocatable, dimension(:) :: int_mom_exch + type(vector_field), allocatable, dimension(:), public :: reynolds_stress + type(vector_field), allocatable, dimension(:), public :: eff_visc + type(scalar_field), allocatable, dimension(:), public :: int_mom_exch ! magnitude of unclosed terms in momentum equation type(scalar_field), public :: mag_reynolds_stress type(scalar_field), public :: mag_eff_visc type(scalar_field), public :: mag_int_mom_exch + ! 1/mu real(wp), allocatable, dimension(:, :) :: Res + ! x-,y-,z-direction forces on particles + real(wp), allocatable, dimension(:, :) :: particle_forces + !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, q_cons_filtered) !$acc declare create(visc_stress, pres_visc_stress, div_pres_visc_stress) !$acc declare create(reynolds_stress, eff_visc, int_mom_exch, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) - !$acc declare create(Res) + !$acc declare create(Res, particle_forces) #if defined(MFC_OpenACC) ! GPU plans @@ -213,9 +217,11 @@ contains Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) end do end do - !$acc update device(Res, Re_idx, Re_size) + !$acc update device(Res) end if + @:ALLOCATE(particle_forces(0:num_ibs, 3)) + !< global sizes Nx = m_glb + 1 Ny = n_glb + 1 @@ -339,7 +345,7 @@ contains integer :: i, j, k, idx ! gaussian filter - sigma_stddev = 3.0_dp * 0.05_dp + sigma_stddev = filter_width Lx = x_domain_end_glb - x_domain_beg_glb Ly = y_domain_end_glb - y_domain_beg_glb @@ -522,17 +528,13 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: i, j, k - call nvtxStartRange("FILTER-CONSERVATIVE-VARIABLES") call s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) - call nvtxEndRange - call nvtxStartRange("COMPUTE-MOMENTUM-UNCLOSED-TERMS") call s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) call s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, mag_eff_visc) call s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) - call nvtxEndRange end subroutine s_volume_filter_momentum_eqn @@ -993,7 +995,7 @@ contains type(scalar_field), dimension(1:num_dims), intent(in) :: int_mom_exch type(scalar_field), intent(inout) :: mag_int_mom_exch - integer :: i, j, k, l, q, ii + integer :: i, j, k !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m @@ -1008,6 +1010,28 @@ contains end subroutine s_compute_interphase_momentum_exchange + ! computes x-,y-,z-direction forces on particles + subroutine s_compute_particle_forces + real(wp) :: dvol + integer :: i, j, k, l + + !$acc parallel loop collapse(3) gang vector default(present) private(dvol) + do i = 0, m + do j = 0, n + do k = 0, p + dvol = dx(i) * dy(j) * dz(k) + !$acc atomic + particle_forces(ib_markers%sf(i, j, k), 1) = particle_forces(ib_markers%sf(i, j, k), 1) + div_pres_visc_stress(1)%sf(i, j, k) * dvol + !$acc atomic + particle_forces(ib_markers%sf(i, j, k), 2) = particle_forces(ib_markers%sf(i, j, k), 2) + div_pres_visc_stress(2)%sf(i, j, k) * dvol + !$acc atomic + particle_forces(ib_markers%sf(i, j, k), 3) = particle_forces(ib_markers%sf(i, j, k), 3) + div_pres_visc_stress(3)%sf(i, j, k) * dvol + end do + end do + end do + + end subroutine s_compute_particle_forces + !< transpose domain from z-slabs to y-slabs on each processor subroutine s_mpi_transpose_slabZ2Y @@ -1281,6 +1305,9 @@ contains @:DEALLOCATE(mag_eff_visc%sf) @:DEALLOCATE(mag_int_mom_exch%sf) + @:DEALLOCATE(Res) + @:DEALLOCATE(particle_forces) + @:DEALLOCATE(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 2b46a4cb05..f9bedd37af 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -300,7 +300,7 @@ def analytic(self): 'tau_star': ParamType.REAL, 'cont_damage_s': ParamType.REAL, 'alpha_bar': ParamType.REAL, - 'compute_CD': ParamType.LOG, + 'compute_particle_drag': ParamType.LOG, 'u_inf_ref': ParamType.REAL, 'rho_inf_ref': ParamType.REAL, 'T_inf_ref': ParamType.REAL, @@ -308,6 +308,7 @@ def analytic(self): 'volume_filtering_momentum_eqn': ParamType.LOG, 'compute_autocorrelation': ParamType.LOG, 't_step_stat_start': ParamType.INT, + 'filter_width': ParamType.REAL, }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', diff --git a/toolchain/templates/delta.mako b/toolchain/templates/delta.mako index 694f22c457..52246fd334 100644 --- a/toolchain/templates/delta.mako +++ b/toolchain/templates/delta.mako @@ -16,7 +16,7 @@ % endif % if gpu: #SBATCH --gpus-per-node=${tasks_per_node} -#SBATCH --mem=208G +#SBATCH --mem=240G #SBATCH --gpu-bind=closest % endif #SBATCH --output="${name}.out" From 40efc90ccc2cf33e95600844784863782792e38a Mon Sep 17 00:00:00 2001 From: conradd3 Date: Wed, 27 Aug 2025 12:56:36 -0500 Subject: [PATCH 08/30] mpi data output bug for filtered q --- src/post_process/m_global_parameters.fpp | 8 ++++---- src/simulation/m_global_parameters.fpp | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index ec6a3ca3f9..37b396a583 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -781,9 +781,9 @@ contains MPI_IO_DATA%var(i)%sf => null() end do else if (q_filtered_wrt) then - allocate (MPI_IO_DATA%view(1:sys_size+9)) - allocate (MPI_IO_DATA%var(1:sys_size+9)) - do i = 1, sys_size+9 + allocate (MPI_IO_DATA%view(1:sys_size+12)) + allocate (MPI_IO_DATA%var(1:sys_size+12)) + do i = 1, sys_size+12 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do @@ -974,7 +974,7 @@ contains if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null() if (q_filtered_wrt) then - do i = sys_size+1, sys_size+9 + do i = sys_size+1, sys_size+12 MPI_IO_DATA%var(i)%sf => null() end do end if diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index db2eb1d298..6efe39d5d9 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -1158,8 +1158,8 @@ contains allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) else if (volume_filtering_momentum_eqn) then - allocate (MPI_IO_DATA%view(1:sys_size+9)) - allocate (MPI_IO_DATA%var(1:sys_size+9)) + allocate (MPI_IO_DATA%view(1:sys_size+12)) + allocate (MPI_IO_DATA%var(1:sys_size+12)) else allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) @@ -1180,7 +1180,7 @@ contains MPI_IO_DATA%var(i)%sf => null() end do else if (volume_filtering_momentum_eqn) then - do i = sys_size+1, sys_size+9 + do i = sys_size+1, sys_size+12 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do @@ -1357,7 +1357,7 @@ contains MPI_IO_DATA%var(i)%sf => null() end do else if (volume_filtering_momentum_eqn) then - do i = 1, sys_size+9 + do i = 1, sys_size+12 MPI_IO_DATA%var(i)%sf => null() end do else From 30fb0e7406ffdb8e12035738c193fc75e8d9716b Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Tue, 2 Sep 2025 14:28:36 -0500 Subject: [PATCH 09/30] gpu profiling blocks --- src/simulation/m_additional_forcing.fpp | 6 +-- src/simulation/m_start_up.fpp | 72 ++++++++++++------------- src/simulation/m_volume_filtering.fpp | 29 ++++++++++ 3 files changed, 68 insertions(+), 39 deletions(-) diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp index c69ab97db1..ae1d028330 100644 --- a/src/simulation/m_additional_forcing.fpp +++ b/src/simulation/m_additional_forcing.fpp @@ -95,9 +95,9 @@ contains phase_u = phase_u + (spatial_u_glb / real(N_x_total_glb, wp) - phase_u) / real(t_step, wp) !$acc update device(phase_rho, phase_u) - if (proc_rank == 0) then - print *, t_step, 'rho', phase_rho, 'rho*u', phase_u - end if + ! if (proc_rank == 0) then + ! print *, t_step, 'rho', phase_rho, 'rho*u', phase_u + ! end if end subroutine s_compute_phase_average diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index c08a2ce7eb..453632807a 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1321,42 +1321,42 @@ contains call s_compute_derived_variables(t_step) - ! ! Volume filter flow variables, compute unclosed terms and their statistics - ! if (volume_filtering_momentum_eqn) then - ! if (t_step > t_step_stat_start) then - ! call nvtxStartRange('VOLUME-FILTER-MOMENTUM-EQUATION') - ! call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) - ! call nvtxEndRange - - ! call nvtxStartRange('COMPUTE-STATISTICS') - ! call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) - ! call nvtxEndRange - - ! ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) - ! ! write(101, *) stat_reynolds_stress(2)%sf(10, 10, 10), stat_reynolds_stress(3)%sf(10, 10, 10), stat_reynolds_stress(4)%sf(10, 10, 10) - ! end if - - ! ! TEMPORARY, for v+v - ! ! if (t_step == 1) then - ! ! open(unit=100, file='dat_reynolds_stress.txt', status='replace', action='write') - ! ! open(unit=101, file='stat_reynolds_stress.txt', status='replace', action='write') - ! ! end if - ! ! if (t_step == 999) then - ! ! close(100) - ! ! close(101) - ! ! end if - - ! call nvtxStartRange("COMPUTE-PARTICLE-FORCES") - ! call s_compute_particle_forces() - ! call nvtxEndRange - ! end if - - ! if (periodic_forcing) then - ! call nvtxStartRange("COMPUTE-PERIODIC-FORCING") - ! call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) - ! call s_compute_periodic_forcing(q_cons_ts(1)%vf) - ! call nvtxEndRange - ! end if + ! Volume filter flow variables, compute unclosed terms and their statistics + if (volume_filtering_momentum_eqn) then + if (t_step > t_step_stat_start) then + call nvtxStartRange("VOLUME-FILTER-MOMENTUM-EQUATION") + call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) + call nvtxEndRange + + call nvtxStartRange("COMPUTE-STATISTICS") + call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + call nvtxEndRange + + ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) + ! write(101, *) stat_reynolds_stress(2)%sf(10, 10, 10), stat_reynolds_stress(3)%sf(10, 10, 10), stat_reynolds_stress(4)%sf(10, 10, 10) + end if + + ! TEMPORARY, for v+v + ! if (t_step == 1) then + ! open(unit=100, file='dat_reynolds_stress.txt', status='replace', action='write') + ! open(unit=101, file='stat_reynolds_stress.txt', status='replace', action='write') + ! end if + ! if (t_step == 999) then + ! close(100) + ! close(101) + ! end if + + call nvtxStartRange("COMPUTE-PARTICLE-FORCES") + call s_compute_particle_forces() + call nvtxEndRange + end if + + if (periodic_forcing) then + call nvtxStartRange("COMPUTE-PERIODIC-FORCING") + call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) + call s_compute_periodic_forcing(q_cons_ts(1)%vf) + call nvtxEndRange + end if #ifdef DEBUG print *, 'Computed derived vars' diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index c954f6eac6..a503412162 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -333,6 +333,10 @@ contains cmplx_kernelG1d, onembed, 1, Nz, & FFTW_FORWARD, FFTW_MEASURE) #endif + + ! file for particle forces + open(unit=100, file='particle_force.bin', status='replace', form='unformatted', access='stream') + end subroutine s_initialize_fftw_explicit_filter_module !< initialize the gaussian filtering kernel in real space and then compute its DFT @@ -528,13 +532,23 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer :: i, j, k + call nvtxStartRange("FILTER-CONS-VARS") call s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) + call nvtxEndRange + call nvtxStartRange("UNCLOSED-TERM-SETUP") call s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) + call nvtxEndRange + + call nvtxStartRange("FILTER-UNCLOSED-TERM-VARS") call s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) + call nvtxEndRange + + call nvtxStartRange("COMPUTE-UNCLOSED-TERMS") call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, mag_eff_visc) call s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) + call nvtxEndRange end subroutine s_volume_filter_momentum_eqn @@ -1012,6 +1026,7 @@ contains ! computes x-,y-,z-direction forces on particles subroutine s_compute_particle_forces + real(wp), dimension(num_ibs, 3) :: force_glb real(wp) :: dvol integer :: i, j, k, l @@ -1030,6 +1045,18 @@ contains end do end do + ! reduce particle forces across processors + do i = 1, num_ibs + call s_mpi_allreduce_sum(particle_forces(i, 1), force_glb(i, 1)) + call s_mpi_allreduce_sum(particle_forces(i, 2), force_glb(i, 2)) + call s_mpi_allreduce_sum(particle_forces(i, 3), force_glb(i, 3)) + end do + + ! write particle forces to file + if (proc_rank == 0) then + write(100) force_glb + end if + end subroutine s_compute_particle_forces @@ -1329,6 +1356,8 @@ contains call fftw_destroy_plan(plan_z_c2c_kernelG) #endif + close(100) + end subroutine s_finalize_fftw_explicit_filter_module end module m_volume_filtering \ No newline at end of file From 0a35fda9720cb83ec031f97c7fa9a0cc0b33a6cd Mon Sep 17 00:00:00 2001 From: conradd3 Date: Tue, 2 Sep 2025 17:56:00 -0500 Subject: [PATCH 10/30] full tensor stats and data output --- runs/phi01/case.py | 7 +- src/common/m_mpi_common.fpp | 39 ++- src/post_process/m_data_input.f90 | 331 ++++++----------------- src/post_process/m_global_parameters.fpp | 8 +- src/post_process/m_start_up.f90 | 57 ++-- src/simulation/m_compute_statistics.fpp | 214 +++++++++++---- src/simulation/m_data_output.fpp | 33 ++- src/simulation/m_global_parameters.fpp | 10 +- src/simulation/m_mpi_proxy.fpp | 3 +- src/simulation/m_start_up.fpp | 34 ++- src/simulation/m_volume_filtering.fpp | 154 +---------- toolchain/mfc/run/case_dicts.py | 1 + 12 files changed, 373 insertions(+), 518 deletions(-) diff --git a/runs/phi01/case.py b/runs/phi01/case.py index 3034d52b6d..e47086a47e 100644 --- a/runs/phi01/case.py +++ b/runs/phi01/case.py @@ -34,8 +34,9 @@ #print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow dt = 4.0E-06 -Nt = 200 -t_save = 10 +Nt = 20 +t_save = 1 +t_step_start_stats = 10 Nx = 99 Ny = 99 @@ -79,7 +80,7 @@ "t_step_start": 0, "t_step_stop": Nt, # 3000 "t_step_save": t_save, # 10 - "t_step_stat_start": 50, + "t_step_stat_start": t_step_start_stats, # Simulation Algorithm Parameters # Only one patches are necessary, the air tube "num_patches": 1, diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index c352412c75..4eca64e8ad 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -153,7 +153,8 @@ contains !! @param levelset closest distance from every cell to the IB !! @param levelset_norm normalized vector from every cell to the closest point to the IB !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta, filtered_fluid_indicator_function, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) type(scalar_field), & dimension(sys_size), & @@ -174,9 +175,11 @@ contains type(scalar_field), & intent(in), optional :: beta - type(scalar_field), dimension(1:4), intent(in), optional :: stat_reynolds_stress - type(scalar_field), dimension(1:4), intent(in), optional :: stat_eff_visc - type(scalar_field), dimension(1:4), intent(in), optional :: stat_int_mom_exch + type(scalar_field), intent(in), optional :: filtered_fluid_indicator_function + type(vector_field), dimension(1:9), intent(in), optional :: stat_reynolds_stress + type(vector_field), dimension(1:9), intent(in), optional :: stat_eff_visc + type(vector_field), dimension(1:3), intent(in), optional :: stat_int_mom_exch + type(vector_field), dimension(1:sys_size), intent(in), optional :: stat_q_cons_filtered integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start @@ -192,7 +195,7 @@ contains if (present(beta)) then alt_sys = sys_size + 1 else if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - alt_sys = sys_size + 12 + alt_sys = sys_size + 1 + 9*4 + 9*4 + 3*4 + 6*4 ! 109 else alt_sys = sys_size end if @@ -202,15 +205,27 @@ contains end do if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - do i = sys_size+1, sys_size+4 - MPI_IO_DATA%var(i)%sf => stat_reynolds_stress(i-sys_size)%sf(0:m, 0:n, 0:p) + MPI_IO_DATA%var(sys_size+1)%sf => filtered_fluid_indicator_function%sf(0:m, 0:n, 0:p) + do i = 1, 9 + do j = 1, 4 + MPI_IO_DATA%var(sys_size+1+(i-1)*4+j)%sf => stat_reynolds_stress(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do + end do + do i = 1, 9 + do j = 1, 4 + MPI_IO_DATA%var(sys_size+37+(i-1)*4+j)%sf => stat_eff_visc(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do end do - do i = sys_size+5, sys_size+8 - MPI_IO_DATA%var(i)%sf => stat_eff_visc(i-sys_size-4)%sf(0:m, 0:n, 0:p) + do i = 1, 3 + do j = 1, 4 + MPI_IO_DATA%var(sys_size+73+(i-1)*4+j)%sf => stat_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do + end do + do i = 1, sys_size + do j = 1, 4 + MPI_IO_DATA%var(sys_size+85+(i-1)*4+j)%sf => stat_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do end do - do i = sys_size+9, sys_size+12 - MPI_IO_DATA%var(i)%sf => stat_int_mom_exch(i-sys_size-8)%sf(0:m, 0:n, 0:p) - end do end if if (present(beta)) then diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index d778eeb7fa..7ac7a502dc 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -29,7 +29,6 @@ module m_data_input s_read_parallel_data_files, & s_populate_grid_variables_buffer_regions, & s_populate_conservative_variables_buffer_regions, & - s_populate_filtered_variables_buffer_regions, & s_finalize_data_input_module abstract interface @@ -61,9 +60,11 @@ end subroutine s_read_abstract_data_files ! type(scalar_field), public :: ib_markers !< type(integer_field), public :: ib_markers - type(scalar_field), allocatable, dimension(:), public :: stat_reynolds_stress - type(scalar_field), allocatable, dimension(:), public :: stat_eff_visc - type(scalar_field), allocatable, dimension(:), public :: stat_int_mom_exch + type(scalar_field), public :: filtered_fluid_indicator_function + type(vector_field), allocatable, dimension(:), public :: stat_reynolds_stress + type(vector_field), allocatable, dimension(:), public :: stat_eff_visc + type(vector_field), allocatable, dimension(:), public :: stat_int_mom_exch + type(vector_field), allocatable, dimension(:), public :: stat_q_cons_filtered procedure(s_read_abstract_data_files), pointer :: s_read_data_files => null() @@ -301,8 +302,8 @@ subroutine s_read_parallel_data_files(t_step) if (bubbles_lagrange) then alt_sys = sys_size + 1 - else if (q_filtered_wrt) then - alt_sys = sys_size + 9 + else if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + alt_sys = sys_size + 1 + 9*4 + 9*4 + 3*4 + 6*4 ! 109, filtered indicator, stats of: R_u, R_mu, F_imet, q_cons_filtered else alt_sys = sys_size end if @@ -461,11 +462,13 @@ subroutine s_read_parallel_data_files(t_step) ! Initialize MPI data I/O if (ib) then - if (q_filtered_wrt) then + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then call s_initialize_mpi_data(q_cons_vf, ib_markers, & + filtered_fluid_indicator_function=filtered_fluid_indicator_function, & stat_reynolds_stress=stat_reynolds_stress, & stat_eff_visc=stat_eff_visc, & - stat_int_mom_exch=stat_int_mom_exch) + stat_int_mom_exch=stat_int_mom_exch, & + stat_q_cons_filtered=stat_q_cons_filtered) else call s_initialize_mpi_data(q_cons_vf, ib_markers) end if @@ -500,7 +503,7 @@ subroutine s_read_parallel_data_files(t_step) call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & mpi_p, status, ierr) end do - else if (q_filtered_wrt) then + else if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then do i = 1, alt_sys var_MOK = int(i, MPI_OFFSET_KIND) @@ -1328,229 +1331,11 @@ subroutine s_populate_conservative_variables_buffer_regions(q_particle) end subroutine s_populate_conservative_variables_buffer_regions - subroutine s_populate_filtered_variables_buffer_regions(q_particle) - - type(scalar_field), intent(inout), optional :: q_particle - - integer :: i, j, k !< Generic loop iterators - - ! Populating Buffer Regions in the x-direction - - ! Periodic BC at the beginning - if (bc_x%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(-j, 0:n, 0:p) = & - q_particle%sf((m + 1) - j, 0:n, 0:p) - else - do i = 1, 4 - stat_reynolds_stress(i)%sf(-j, 0:n, 0:p) = & - stat_reynolds_stress(i)%sf((m + 1) - j, 0:n, 0:p) - stat_eff_visc(i)%sf(-j, 0:n, 0:p) = & - stat_eff_visc(i)%sf((m + 1) - j, 0:n, 0:p) - stat_int_mom_exch(i)%sf(-j, 0:n, 0:p) = & - stat_int_mom_exch(i)%sf((m + 1) - j, 0:n, 0:p) - end do - end if - end do - - ! Processor BC at the beginning - else - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'x') - end if - - end if - - ! Perodic BC at the end - if (bc_x%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(m + j, 0:n, 0:p) = & - q_particle%sf(j - 1, 0:n, 0:p) - else - do i = 1, 4 - stat_reynolds_stress(i)%sf(m + j, 0:n, 0:p) = & - stat_reynolds_stress(i)%sf(j - 1, 0:n, 0:p) - stat_eff_visc(i)%sf(m + j, 0:n, 0:p) = & - stat_eff_visc(i)%sf(j - 1, 0:n, 0:p) - stat_int_mom_exch(i)%sf(m + j, 0:n, 0:p) = & - stat_int_mom_exch(i)%sf(j - 1, 0:n, 0:p) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'x') - end if - - end if - - ! END: Populating Buffer Regions in the x-direction - - ! Populating Buffer Regions in the y-direction - - if (n > 0) then - - ! Periodic BC at the beginning - if (bc_y%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, -j, 0:p) = & - q_particle%sf(:, (n + 1) - j, 0:p) - else - do i = 1, 4 - stat_reynolds_stress(i)%sf(:, -j, 0:p) = & - stat_reynolds_stress(i)%sf(:, (n + 1) - j, 0:p) - stat_eff_visc(i)%sf(:, -j, 0:p) = & - stat_eff_visc(i)%sf(:, (n + 1) - j, 0:p) - stat_int_mom_exch(i)%sf(:, -j, 0:p) = & - stat_int_mom_exch(i)%sf(:, (n + 1) - j, 0:p) - end do - end if - end do - - ! Processor BC at the beginning - else - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'y') - end if - - end if - - ! Perodic BC at the end - if (bc_y%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, n + j, 0:p) = & - q_particle%sf(:, j - 1, 0:p) - else - do i = 1, 4 - stat_reynolds_stress(i)%sf(:, n + j, 0:p) = & - stat_reynolds_stress(i)%sf(:, j - 1, 0:p) - stat_eff_visc(i)%sf(:, n + j, 0:p) = & - stat_eff_visc(i)%sf(:, j - 1, 0:p) - stat_int_mom_exch(i)%sf(:, n + j, 0:p) = & - stat_int_mom_exch(i)%sf(:, j - 1, 0:p) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'y') - end if - - end if - - ! END: Populating Buffer Regions in the y-direction - - ! Populating Buffer Regions in the z-direction - - if (p > 0) then - - ! Periodic BC at the beginning - if (bc_z%beg == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, -j) = & - q_particle%sf(:, :, (p + 1) - j) - else - do i = 1, 4 - stat_reynolds_stress(i)%sf(:, :, -j) = & - stat_reynolds_stress(i)%sf(:, :, (p + 1) - j) - stat_eff_visc(i)%sf(:, :, -j) = & - stat_eff_visc(i)%sf(:, :, (p + 1) - j) - stat_int_mom_exch(i)%sf(:, :, -j) = & - stat_int_mom_exch(i)%sf(:, :, (p + 1) - j) - end do - end if - end do - - ! Processor BC at the beginning - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'beg', 'z') - end if - - end if - - ! Perodic BC at the end - if (bc_z%end == BC_PERIODIC) then - - do j = 1, buff_size - if (present(q_particle)) then - q_particle%sf(:, :, p + j) = & - q_particle%sf(:, :, j - 1) - else - do i = 1, 4 - stat_reynolds_stress(i)%sf(:, :, p + j) = & - stat_reynolds_stress(i)%sf(:, :, j - 1) - stat_eff_visc(i)%sf(:, :, p + j) = & - stat_eff_visc(i)%sf(:, :, j - 1) - stat_int_mom_exch(i)%sf(:, :, p + j) = & - stat_int_mom_exch(i)%sf(:, :, j - 1) - end do - end if - end do - - ! Processor BC at the end - else - - if (present(q_particle)) then - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z', q_particle) - else - call s_mpi_sendrecv_cons_vars_buffer_regions(q_cons_vf, & - 'end', 'z') - end if - - end if - - end if - - end if - - ! END: Populating Buffer Regions in the z-direction - - end subroutine s_populate_filtered_variables_buffer_regions - !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module subroutine s_initialize_data_input_module - integer :: i !< Generic loop iterator + integer :: i, j !< Generic loop iterator ! Allocating the parts of the conservative and primitive variables ! that do not require the direct knowledge of the dimensionality of @@ -1559,9 +1344,10 @@ subroutine s_initialize_data_input_module allocate (q_prim_vf(1:sys_size)) if (bubbles_lagrange) allocate (q_particle(1)) - if (q_filtered_wrt) allocate (stat_reynolds_stress(1:4)) - if (q_filtered_wrt) allocate (stat_eff_visc(1:4)) - if (q_filtered_wrt) allocate (stat_int_mom_exch(1:4)) + if (q_filtered_wrt) allocate (stat_reynolds_stress(1:9)) + if (q_filtered_wrt) allocate (stat_eff_visc(1:9)) + if (q_filtered_wrt) allocate (stat_int_mom_exch(1:3)) + if (q_filtered_wrt) allocate (stat_q_cons_filtered(1:sys_size)) ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the @@ -1601,16 +1387,42 @@ subroutine s_initialize_data_input_module end if if (q_filtered_wrt) then - do i = 1, 4 - allocate (stat_reynolds_stress(i)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - -buff_size:p + buff_size)) - allocate (stat_eff_visc(i)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - -buff_size:p + buff_size)) - allocate (stat_int_mom_exch(i)%sf(-buff_size:m + buff_size, & - -buff_size:n + buff_size, & - -buff_size:p + buff_size)) + allocate (filtered_fluid_indicator_function%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + do i = 1, 9 + allocate (stat_reynolds_stress(i)%vf(1:4)) + allocate (stat_eff_visc(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + allocate (stat_reynolds_stress(i)%vf(j)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + allocate (stat_eff_visc(i)%vf(j)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + end do + end do + do i = 1, 3 + allocate (stat_int_mom_exch(i)%vf(1:4)) + end do + do i = 1, 3 + do j = 1, 4 + allocate (stat_int_mom_exch(i)%vf(j)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + end do + end do + do i = 1, sys_size + allocate (stat_q_cons_filtered(i)%vf(1:4)) + end do + do i = 1, sys_size + do j = 1, 4 + allocate (stat_q_cons_filtered(i)%vf(j)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + end do end do end if @@ -1682,7 +1494,7 @@ end subroutine s_initialize_data_input_module !> Deallocation procedures for the module subroutine s_finalize_data_input_module - integer :: i !< Generic loop iterator + integer :: i, j !< Generic loop iterator ! Deallocating the conservative and primitive variables do i = 1, sys_size @@ -1707,18 +1519,35 @@ subroutine s_finalize_data_input_module end if if (q_filtered_wrt) then - do i = 1, 4 - deallocate (stat_reynolds_stress(i)%sf) - end do + deallocate (filtered_fluid_indicator_function%sf) + do i = 1, 9 + do j = 1, 4 + deallocate (stat_reynolds_stress(i)%vf(j)%sf) + end do + deallocate(stat_reynolds_stress(i)%vf) + end do deallocate(stat_reynolds_stress) - do i = 1, 4 - deallocate (stat_eff_visc(i)%sf) - end do + do i = 1, 9 + do j = 1, 4 + deallocate (stat_eff_visc(i)%vf(j)%sf) + end do + deallocate(stat_eff_visc(i)%vf) + end do deallocate(stat_eff_visc) - do i = 1, 4 - deallocate (stat_int_mom_exch(i)%sf) - end do + do i = 1, 3 + do j = 1, 4 + deallocate (stat_int_mom_exch(i)%vf(j)%sf) + end do + deallocate(stat_int_mom_exch(i)%vf) + end do deallocate(stat_int_mom_exch) + do i = 1, sys_size + do j = 1, 4 + deallocate (stat_q_cons_filtered(i)%vf(j)%sf) + end do + deallocate(stat_q_cons_filtered(i)%vf) + end do + deallocate(stat_q_cons_filtered) end if s_read_data_files => null() diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 37b396a583..1b7e452e9f 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -781,9 +781,9 @@ contains MPI_IO_DATA%var(i)%sf => null() end do else if (q_filtered_wrt) then - allocate (MPI_IO_DATA%view(1:sys_size+12)) - allocate (MPI_IO_DATA%var(1:sys_size+12)) - do i = 1, sys_size+12 + allocate (MPI_IO_DATA%view(1:sys_size+1+4*9+4*9+3*4+6*4)) + allocate (MPI_IO_DATA%var (1:sys_size+1+4*9+4*9+3*4+6*4)) + do i = 1, sys_size+1+4*9+4*9+3*4+6*4 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do @@ -974,7 +974,7 @@ contains if (bubbles_lagrange) MPI_IO_DATA%var(sys_size + 1)%sf => null() if (q_filtered_wrt) then - do i = sys_size+1, sys_size+12 + do i = sys_size+1, sys_size+1+4*9+4*9+3*4+6*4 MPI_IO_DATA%var(i)%sf => null() end do end if diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 481181064c..59c3e9499d 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -180,7 +180,6 @@ subroutine s_perform_time_step(t_step) ! Populating the buffer regions of the conservative variables if (buff_size > 0) then call s_populate_conservative_variables_buffer_regions() - if (q_filtered_wrt) call s_populate_filtered_variables_buffer_regions() if (bubbles_lagrange) call s_populate_conservative_variables_buffer_regions(q_particle(1)) end if @@ -326,28 +325,50 @@ subroutine s_save_data(t_step, varname, pres, c, H) end do ! Adding filtered quantities - if (q_filtered_wrt) then - ! filtered cons vars - do i = 1, 4 - q_sf = stat_reynolds_stress(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) - write (varname, '(A,I0)') 'stat_reynolds_stresss', i - call s_write_variable_to_formatted_database_file(varname, t_step) + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + ! filtered fluid indicator + q_sf = filtered_fluid_indicator_function%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A)') 'filtered_fluid_indicator_function' + call s_write_variable_to_formatted_database_file(varname, t_step) - varname(:) = ' ' + varname(:) = ' ' + + ! filtered vars stats + do i = 1, 9 + do j = 1, 4 + q_sf = stat_reynolds_stress(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_reynolds_stress', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do end do - do i = 1, 4 - q_sf = stat_eff_visc(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) - write (varname, '(A,I0)') 'stat_eff_viscs', i - call s_write_variable_to_formatted_database_file(varname, t_step) + do i = 1, 9 + do j = 1, 4 + q_sf = stat_eff_visc(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_eff_visc', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) - varname(:) = ' ' + varname(:) = ' ' + end do end do - do i = 1, 4 - q_sf = stat_int_mom_exch(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) - write (varname, '(A,I0)') 'stat_int_mom_exchs', i - call s_write_variable_to_formatted_database_file(varname, t_step) + do i = 1, 3 + do j = 1, 4 + q_sf = stat_int_mom_exch(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_int_mom_exch', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) - varname(:) = ' ' + varname(:) = ' ' + end do + end do + do i = 1, sys_size + do j = 1, 4 + q_sf = stat_q_cons_filtered(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_q_cons_filtered', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do end do end if diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp index 9d574c72f7..1e1e4de29d 100644 --- a/src/simulation/m_compute_statistics.fpp +++ b/src/simulation/m_compute_statistics.fpp @@ -15,85 +15,148 @@ module m_compute_statistics private; public :: s_initialize_statistics_module, s_finalize_statistics_module, & s_compute_statistics_momentum_unclosed_terms, s_update_statistics, & - s_compute_234_order_statistics + s_compute_statistical_moments ! terms for computing 1st, 2nd, 3rd, and 4th order statistical moments - type(scalar_field), allocatable, dimension(:) :: Msn_reynolds_stress - type(scalar_field), allocatable, dimension(:) :: Msn_eff_visc - type(scalar_field), allocatable, dimension(:) :: Msn_int_mom_exch + type(vector_field), allocatable, dimension(:) :: Msn_reynolds_stress + type(vector_field), allocatable, dimension(:) :: Msn_eff_visc + type(vector_field), allocatable, dimension(:) :: Msn_int_mom_exch + type(vector_field), allocatable, dimension(:) :: Msn_q_cons_filtered ! 2nd, 3rd, and 4th statistical moments for unclosed terms in volume filtered momentum equation - type(scalar_field), allocatable, dimension(:), public :: stat_reynolds_stress - type(scalar_field), allocatable, dimension(:), public :: stat_eff_visc - type(scalar_field), allocatable, dimension(:), public :: stat_int_mom_exch + type(vector_field), allocatable, dimension(:), public :: stat_reynolds_stress + type(vector_field), allocatable, dimension(:), public :: stat_eff_visc + type(vector_field), allocatable, dimension(:), public :: stat_int_mom_exch + type(vector_field), allocatable, dimension(:), public :: stat_q_cons_filtered - !$acc declare create(Msn_reynolds_stress, Msn_eff_visc, Msn_int_mom_exch) + !$acc declare create(Msn_reynolds_stress, Msn_eff_visc, Msn_int_mom_exch, Msn_q_cons_filtered) - !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) contains subroutine s_initialize_statistics_module - integer :: i + integer :: i, j - @:ALLOCATE(Msn_reynolds_stress(1:4)) - do i = 1, 4 - @:ALLOCATE(Msn_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(Msn_reynolds_stress(i)) + @:ALLOCATE(Msn_reynolds_stress(1:9)) + do i = 1, 9 + @:ALLOCATE(Msn_reynolds_stress(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(Msn_reynolds_stress(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_reynolds_stress(i)) + end do + + @:ALLOCATE(Msn_eff_visc(1:9)) + do i = 1, 9 + @:ALLOCATE(Msn_eff_visc(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(Msn_eff_visc(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_eff_visc(i)) end do - @:ALLOCATE(Msn_eff_visc(1:4)) - do i = 1, 4 - @:ALLOCATE(Msn_eff_visc(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(Msn_eff_visc(i)) + @:ALLOCATE(Msn_int_mom_exch(1:3)) + do i = 1, 3 + @:ALLOCATE(Msn_int_mom_exch(i)%vf(1:4)) + end do + do i = 1, 3 + do j = 1, 4 + @:ALLOCATE(Msn_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_int_mom_exch(i)) end do - @:ALLOCATE(Msn_int_mom_exch(1:4)) - do i = 1, 4 - @:ALLOCATE(Msn_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(Msn_int_mom_exch(i)) + @:ALLOCATE(Msn_q_cons_filtered(1:sys_size)) + do i = 1, sys_size + @:ALLOCATE(Msn_q_cons_filtered(i)%vf(1:4)) + end do + do i = 1, sys_size + do j = 1, 4 + @:ALLOCATE(Msn_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_q_cons_filtered) end do - @:ALLOCATE(stat_reynolds_stress(1:4)) - do i = 1, 4 - @:ALLOCATE(stat_reynolds_stress(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(stat_reynolds_stress(i)) + @:ALLOCATE(stat_reynolds_stress(1:9)) + do i = 1, 9 + @:ALLOCATE(stat_reynolds_stress(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(stat_reynolds_stress(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_reynolds_stress(i)) end do - @:ALLOCATE(stat_eff_visc(1:4)) - do i = 1, 4 - @:ALLOCATE(stat_eff_visc(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(stat_eff_visc(i)) + @:ALLOCATE(stat_eff_visc(1:9)) + do i = 1, 9 + @:ALLOCATE(stat_eff_visc(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(stat_eff_visc(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_eff_visc(i)) end do - @:ALLOCATE(stat_int_mom_exch(1:4)) - do i = 1, 4 - @:ALLOCATE(stat_int_mom_exch(i)%sf(0:m, 0:n, 0:p)) - @:ACC_SETUP_SFs(stat_int_mom_exch(i)) + @:ALLOCATE(stat_int_mom_exch(1:3)) + do i = 1, 3 + @:ALLOCATE(stat_int_mom_exch(i)%vf(1:4)) + end do + do i = 1, 3 + do j = 1, 4 + @:ALLOCATE(stat_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_int_mom_exch(i)) + end do + + @:ALLOCATE(stat_q_cons_filtered(1:sys_size)) + do i = 1, sys_size + @:ALLOCATE(stat_q_cons_filtered(i)%vf(1:4)) + end do + do i = 1, sys_size + do j = 1, 4 + @:ALLOCATE(stat_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_q_cons_filtered) end do end subroutine s_initialize_statistics_module subroutine s_compute_statistics_momentum_unclosed_terms(n_step, reynolds_stress, eff_visc, int_mom_exch) - type(scalar_field), intent(in) :: reynolds_stress - type(scalar_field), intent(in) :: eff_visc - type(scalar_field), intent(in) :: int_mom_exch + type(vector_field), dimension(3), intent(in) :: reynolds_stress + type(vector_field), dimension(3), intent(in) :: eff_visc + type(scalar_field), dimension(3), intent(in) :: int_mom_exch integer, intent(in) :: n_step real(wp) :: ns + integer :: i, j ns = real(n_step, wp) ! update M1, M2, M3, M4 - call s_update_statistics(ns, reynolds_stress, Msn_reynolds_stress) - call s_update_statistics(ns, eff_visc, Msn_eff_visc) - call s_update_statistics(ns, int_mom_exch, Msn_int_mom_exch) + do i = 1, 3 + do j = 1, 3 + call s_update_statistics(ns, reynolds_stress(i)%vf(j), Msn_reynolds_stress((i-1)*3 + j)%vf) + call s_update_statistics(ns, eff_visc(i)%vf(j), Msn_eff_visc((i-1)*3 + j)%vf) + end do + call s_update_statistics(ns, int_mom_exch(i), Msn_int_mom_exch(i)%vf) + end do - ! compute 2nd, 3rd, 4th order statistical moments + ! compute 1st, 2nd, 3rd, 4th order statistical moments if (n_step > 3) then - call s_compute_234_order_statistics(ns, Msn_reynolds_stress, stat_reynolds_stress) - call s_compute_234_order_statistics(ns, Msn_eff_visc, stat_eff_visc) - call s_compute_234_order_statistics(ns, Msn_int_mom_exch, stat_int_mom_exch) + do i = 1, 3 + do j = 1, 3 + call s_compute_statistical_moments(ns, Msn_reynolds_stress((i-1)*3 + j)%vf, stat_reynolds_stress((i-1)*3 + j)%vf) + call s_compute_statistical_moments(ns, Msn_eff_visc((i-1)*3 + j)%vf, stat_eff_visc((i-1)*3 + j)%vf) + end do + call s_compute_statistical_moments(ns, Msn_int_mom_exch(i)%vf, stat_int_mom_exch(i)%vf) + end do end if end subroutine s_compute_statistics_momentum_unclosed_terms @@ -125,7 +188,7 @@ contains end subroutine s_update_statistics - subroutine s_compute_234_order_statistics(ns, Msn, q_stat) + subroutine s_compute_statistical_moments(ns, Msn, q_stat) type(scalar_field), dimension(1:4), intent(in) :: Msn type(scalar_field), dimension(1:4), intent(inout) :: q_stat @@ -144,40 +207,75 @@ contains end do end do - end subroutine s_compute_234_order_statistics + end subroutine s_compute_statistical_moments subroutine s_finalize_statistics_module integer :: i, j - do i = 1, 4 - @:DEALLOCATE(Msn_reynolds_stress(i)%sf) + + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(Msn_reynolds_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_reynolds_stress(i)%vf) end do @:DEALLOCATE(Msn_reynolds_stress) - do i = 1, 4 - @:DEALLOCATE(Msn_eff_visc(i)%sf) + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(Msn_eff_visc(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_eff_visc(i)%vf) end do @:DEALLOCATE(Msn_eff_visc) - do i = 1, 4 - @:DEALLOCATE(Msn_int_mom_exch(i)%sf) + do i = 1, 3 + do j = 1, 4 + @:DEALLOCATE(Msn_int_mom_exch(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_int_mom_exch(i)%vf) end do @:DEALLOCATE(Msn_int_mom_exch) - do i = 1, 4 - @:DEALLOCATE(stat_reynolds_stress(i)%sf) + do i = 1, sys_size + do j = 1, 4 + @:DEALLOCATE(Msn_q_cons_filtered(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_q_cons_filtered(i)%vf) + end do + @:DEALLOCATE(Msn_q_cons_filtered) + + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(stat_reynolds_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_reynolds_stress(i)%vf) end do @:DEALLOCATE(stat_reynolds_stress) - do i = 1, 4 - @:DEALLOCATE(stat_eff_visc(i)%sf) + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(stat_eff_visc(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_eff_visc(i)%vf) end do @:DEALLOCATE(stat_eff_visc) - do i = 1, 4 - @:DEALLOCATE(stat_int_mom_exch(i)%sf) + do i = 1, 3 + do j = 1, 4 + @:DEALLOCATE(stat_int_mom_exch(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_int_mom_exch(i)%vf) end do @:DEALLOCATE(stat_int_mom_exch) + do i = 1, sys_size + do j = 1, 4 + @:DEALLOCATE(stat_q_cons_filtered(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_q_cons_filtered(i)%vf) + end do + @:DEALLOCATE(stat_q_cons_filtered) + end subroutine s_finalize_statistics_module end module m_compute_statistics diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index fd783bef1f..840278d58f 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -76,7 +76,8 @@ contains !! @param q_cons_vf Conservative variables !! @param q_prim_vf Primitive variables !! @param t_step Current time step - subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta, filtered_fluid_indicator_function, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) type(scalar_field), & dimension(sys_size), & @@ -94,14 +95,17 @@ contains type(scalar_field), & intent(inout), optional :: beta - type(scalar_field), dimension(1:4), intent(inout), optional :: stat_reynolds_stress - type(scalar_field), dimension(1:4), intent(inout), optional :: stat_eff_visc - type(scalar_field), dimension(1:4), intent(inout), optional :: stat_int_mom_exch + type(scalar_field), intent(inout), optional :: filtered_fluid_indicator_function + type(vector_field), dimension(1:9), intent(inout), optional :: stat_reynolds_stress + type(vector_field), dimension(1:9), intent(inout), optional :: stat_eff_visc + type(vector_field), dimension(1:3), intent(inout), optional :: stat_int_mom_exch + type(vector_field), dimension(1:sys_size), intent(inout), optional :: stat_q_cons_filtered if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) else - call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, & + filtered_fluid_indicator_function, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) end if end subroutine s_write_data_files @@ -790,15 +794,18 @@ contains !! @param q_prim_vf Cell-average primitive variables !! @param t_step Current time-step !! @param beta Eulerian void fraction from lagrangian bubbles - subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, filtered_fluid_indicator_function, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta - type(scalar_field), dimension(1:4), intent(inout), optional :: stat_reynolds_stress - type(scalar_field), dimension(1:4), intent(inout), optional :: stat_eff_visc - type(scalar_field), dimension(1:4), intent(inout), optional :: stat_int_mom_exch + type(scalar_field), intent(inout), optional :: filtered_fluid_indicator_function + type(vector_field), dimension(1:9), intent(inout), optional :: stat_reynolds_stress + type(vector_field), dimension(1:9), intent(inout), optional :: stat_eff_visc + type(vector_field), dimension(1:3), intent(inout), optional :: stat_int_mom_exch + type(vector_field), dimension(1:sys_size), intent(inout), optional :: stat_q_cons_filtered #ifdef MFC_MPI @@ -821,7 +828,7 @@ contains if (present(beta)) then alt_sys = sys_size + 1 else if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - alt_sys = sys_size + 12 + alt_sys = sys_size + 1 + 9*4 + 9*4 + 3*4 + 6*4 else alt_sys = sys_size end if @@ -906,8 +913,10 @@ contains if (ib) then if (present(stat_reynolds_stress) .and. present(stat_eff_visc) .and. present(stat_int_mom_exch)) then - call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, & - stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, stat_int_mom_exch=stat_int_mom_exch) + call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, & + filtered_fluid_indicator_function=filtered_fluid_indicator_function, & + stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, & + stat_int_mom_exch=stat_int_mom_exch, stat_q_cons_filtered=stat_q_cons_filtered) else call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) end if diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 6efe39d5d9..50590a26fe 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -513,6 +513,7 @@ module m_global_parameters logical :: compute_autocorrelation integer :: t_step_stat_start real(wp) :: filter_width + logical :: q_filtered_wrt !$acc declare create(u_inf_ref, rho_inf_ref, T_inf_ref, filter_width) @@ -803,6 +804,7 @@ contains compute_autocorrelation = .false. t_step_stat_start = dflt_int filter_width = dflt_real + q_filtered_wrt = .false. end subroutine s_assign_default_values_to_user_inputs @@ -1158,8 +1160,8 @@ contains allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) else if (volume_filtering_momentum_eqn) then - allocate (MPI_IO_DATA%view(1:sys_size+12)) - allocate (MPI_IO_DATA%var(1:sys_size+12)) + allocate (MPI_IO_DATA%view(1:sys_size+109)) + allocate (MPI_IO_DATA%var(1:sys_size+109)) else allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) @@ -1180,7 +1182,7 @@ contains MPI_IO_DATA%var(i)%sf => null() end do else if (volume_filtering_momentum_eqn) then - do i = sys_size+1, sys_size+12 + do i = sys_size+1, sys_size+109 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do @@ -1357,7 +1359,7 @@ contains MPI_IO_DATA%var(i)%sf => null() end do else if (volume_filtering_momentum_eqn) then - do i = 1, sys_size+12 + do i = 1, sys_size+109 MPI_IO_DATA%var(i)%sf => null() end do else diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index bb359a4bed..bac8259b81 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -93,7 +93,8 @@ contains & 'viscous', 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & & 'hyperelasticity', 'rkck_adap_dt', 'bc_io', 'powell', 'cont_damage', & & 'periodic_ibs', 'compute_particle_drag', 'periodic_forcing', 'volume_filtering_momentum_eqn', & - & 'store_levelset', 'slab_domain_decomposition', 'compute_autocorrelation' ] + & 'store_levelset', 'slab_domain_decomposition', 'compute_autocorrelation', & + & 'q_filtered_wrt' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 453632807a..1ddf59c881 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -192,7 +192,7 @@ contains periodic_ibs, compute_particle_drag, u_inf_ref, rho_inf_ref, T_inf_ref, & periodic_forcing, volume_filtering_momentum_eqn, store_levelset, & slab_domain_decomposition, compute_autocorrelation, t_step_stat_start, & - filter_width + filter_width, q_filtered_wrt ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. @@ -1329,7 +1329,7 @@ contains call nvtxEndRange call nvtxStartRange("COMPUTE-STATISTICS") - call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, reynolds_stress, eff_visc, int_mom_exch) call nvtxEndRange ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) @@ -1459,11 +1459,25 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") - do i = 1, 4 - !$acc update host(stat_reynolds_stress(i)%sf) - !$acc update host(stat_eff_visc(i)%sf) - !$acc update host(stat_int_mom_exch(i)%sf) - end do + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + !$acc update host(filtered_fluid_indicator_function%sf) + do i = 1, 9 + do j = 1, 4 + !$acc update host(stat_reynolds_stress(i)%vf(j)%sf) + !$acc update host(stat_eff_visc(i)%vf(j)%sf) + end do + end do + do i = 1, 3 + do j = 1, 4 + !$acc update host(stat_int_mom_exch(i)%vf(j)%sf) + end do + end do + do i = 1, sys_size + do j = 1, 4 + !$acc update host(stat_q_cons_filtered(i)%vf(j)%sf) + end do + end do + end if do i = 1, sys_size !$acc update host(q_cons_ts(1)%vf(i)%sf) do l = 0, p @@ -1496,9 +1510,11 @@ contains call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() else - if (volume_filtering_momentum_eqn) then + if (volume_filtering_momentum_eqn .and. (t_step == 0 .or. t_step == t_step_stop)) then call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, & - stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, stat_int_mom_exch=stat_int_mom_exch) + filtered_fluid_indicator_function=filtered_fluid_indicator_function, & + stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, & + stat_int_mom_exch=stat_int_mom_exch, stat_q_cons_filtered=stat_q_cons_filtered) else call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count) end if diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index a503412162..7baf741244 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -545,9 +545,9 @@ contains call nvtxEndRange call nvtxStartRange("COMPUTE-UNCLOSED-TERMS") - call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) - call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, mag_eff_visc) - call s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) + call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) + call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress) + call s_compute_interphase_momentum_exchange(int_mom_exch) call nvtxEndRange end subroutine s_volume_filter_momentum_eqn @@ -624,7 +624,7 @@ contains integer :: i - do i = contxb, momxe + do i = 1, sys_size call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) end do @@ -810,11 +810,9 @@ contains end subroutine s_setup_terms_filtering - subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress, mag_reynolds_stress) + subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress - type(scalar_field), intent(inout) :: mag_reynolds_stress - real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_Ru integer :: i, j, k, l, q !$acc parallel loop collapse(3) gang vector default(present) @@ -833,81 +831,18 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - !$acc loop seq - do l = 1, num_dims - !$acc loop seq - do q = 1, num_dims - reynolds_stress(l)%vf(q)%sf(i, j, k) = reynolds_stress(l)%vf(q)%sf(i, j, k) * filtered_fluid_indicator_function%sf(i, j, k) - end do - end do - end do - end do - end do - - ! set boundary buffer zone values -#ifdef MFC_MPI - do l = 1, num_dims - do q = 1, num_dims - call s_populate_scalarfield_buffers(reynolds_stress(l)%vf(q)) - end do - end do -#else - do l = 1, num_dims - do q = 1, num_dims - reynolds_stress(l)%vf(q)%sf(-buff_size:-1, :, :) = reynolds_stress(l)%vf(q)%sf(m-buff_size+1:m, :, :) - reynolds_stress(l)%vf(q)%sf(m+1:m+buff_size, :, :) = reynolds_stress(l)%vf(q)%sf(0:buff_size-1, :, :) - - reynolds_stress(l)%vf(q)%sf(:, -buff_size:-1, :) = reynolds_stress(l)%vf(q)%sf(:, n-buff_size+1:n, :) - reynolds_stress(l)%vf(q)%sf(:, n+1:n+buff_size, :) = reynolds_stress(l)%vf(q)%sf(:, 0:buff_size-1, :) - - reynolds_stress(l)%vf(q)%sf(:, :, -buff_size:-1) = reynolds_stress(l)%vf(q)%sf(:, :, p-buff_size+1:p) - reynolds_stress(l)%vf(q)%sf(:, :, p+1:p+buff_size) = reynolds_stress(l)%vf(q)%sf(:, :, 0:buff_size-1) - end do - end do -#endif - - ! div(Ru), using CD2 FD scheme - !$acc parallel loop collapse(3) gang vector default(present) copy(div_Ru) - do i = 0, m - do j = 0, n - do k = 0, p - !$acc loop seq - do l = 1, num_dims - div_Ru(l, i, j, k) = (reynolds_stress(l)%vf(1)%sf(i+1, j, k) - reynolds_stress(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (reynolds_stress(l)%vf(2)%sf(i, j+1, k) - reynolds_stress(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (reynolds_stress(l)%vf(3)%sf(i, j, k+1) - reynolds_stress(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) - end do - end do - end do - end do - - !$acc parallel loop collapse(3) gang vector default(present) copyin(div_Ru) - do i = 0, m - do j = 0, n - do k = 0, p - mag_reynolds_stress%sf(i, j, k) = sqrt(div_Ru(1, i, j, k)**2 + div_Ru(2, i, j, k)**2 + div_Ru(3, i, j, k)**2) - end do - end do - end do - end subroutine s_compute_pseudo_turbulent_reynolds_stress - subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, mag_eff_visc) + subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress - type(scalar_field), intent(inout) :: mag_eff_visc - real(wp), dimension(1:num_dims, 0:m, 0:n, 0:p) :: div_eff_visc integer :: i, j, k, l, q ! set buffers for filtered momentum quantities and density #ifdef MFC_MPI - do i = 1, momxe + do i = contxb, momxe call s_populate_scalarfield_buffers(q_cons_filtered(i)) end do #else @@ -942,86 +877,13 @@ contains end do end do - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - !$acc loop seq - do l = 1, num_dims - !$acc loop seq - do q = 1, num_dims - eff_visc(l)%vf(q)%sf(i, j, k) = eff_visc(l)%vf(q)%sf(i, j, k) * filtered_fluid_indicator_function%sf(i, j, k) - end do - end do - end do - end do - end do - - ! set boundary buffer zone values -#ifdef MFC_MPI - do l = 1, num_dims - do q = 1, num_dims - call s_populate_scalarfield_buffers(eff_visc(l)%vf(q)) - end do - end do -#else - do l = 1, num_dims - do q = 1, num_dims - eff_visc(l)%vf(q)%sf(-buff_size:-1, :, :) = eff_visc(l)%vf(q)%sf(m-buff_size+1:m, :, :) - eff_visc(l)%vf(q)%sf(m+1:m+buff_size, :, :) = eff_visc(l)%vf(q)%sf(0:buff_size-1, :, :) - - eff_visc(l)%vf(q)%sf(:, -buff_size:-1, :) = eff_visc(l)%vf(q)%sf(:, n-buff_size+1:n, :) - eff_visc(l)%vf(q)%sf(:, n+1:n+buff_size, :) = eff_visc(l)%vf(q)%sf(:, 0:buff_size-1, :) - - eff_visc(l)%vf(q)%sf(:, :, -buff_size:-1) = eff_visc(l)%vf(q)%sf(:, :, p-buff_size+1:p) - eff_visc(l)%vf(q)%sf(:, :, p+1:p+buff_size) = eff_visc(l)%vf(q)%sf(:, :, 0:buff_size-1) - end do - end do -#endif - - ! div(eff_visc), using CD2 FD scheme - !$acc parallel loop collapse(3) gang vector default(present) copy(div_eff_visc) - do i = 0, m - do j = 0, n - do k = 0, p - !$acc loop seq - do l = 1, num_dims - div_eff_visc(l, i, j, k) = (eff_visc(l)%vf(1)%sf(i+1, j, k) - eff_visc(l)%vf(1)%sf(i-1, j, k))/(2._wp*dx(i)) & - + (eff_visc(l)%vf(2)%sf(i, j+1, k) - eff_visc(l)%vf(2)%sf(i, j-1, k))/(2._wp*dy(j)) & - + (eff_visc(l)%vf(3)%sf(i, j, k+1) - eff_visc(l)%vf(3)%sf(i, j, k-1))/(2._wp*dz(k)) - end do - end do - end do - end do - - !$acc parallel loop collapse(3) gang vector default(present) copyin(div_eff_visc) - do i = 0, m - do j = 0, n - do k = 0, p - mag_eff_visc%sf(i, j, k) = sqrt(div_eff_visc(1, i, j, k)**2 + div_eff_visc(2, i, j, k)**2 + div_eff_visc(3, i, j, k)**2) - end do - end do - end do - end subroutine s_compute_effective_viscosity - subroutine s_compute_interphase_momentum_exchange(int_mom_exch, mag_int_mom_exch) + subroutine s_compute_interphase_momentum_exchange(int_mom_exch) type(scalar_field), dimension(1:num_dims), intent(in) :: int_mom_exch - type(scalar_field), intent(inout) :: mag_int_mom_exch integer :: i, j, k - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - mag_int_mom_exch%sf(i, j, k) = sqrt(int_mom_exch(1)%sf(i, j, k)**2 & - + int_mom_exch(2)%sf(i, j, k)**2 & - + int_mom_exch(3)%sf(i, j, k)**2) - end do - end do - end do - end subroutine s_compute_interphase_momentum_exchange ! computes x-,y-,z-direction forces on particles diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index f9bedd37af..b8ac4ba7c7 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -309,6 +309,7 @@ def analytic(self): 'compute_autocorrelation': ParamType.LOG, 't_step_stat_start': ParamType.INT, 'filter_width': ParamType.REAL, + 'q_filtered_wrt': ParamType.LOG, }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', From 8ab21bdb0a1f7f66f0928f37f921908e95cfe4b1 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Wed, 3 Sep 2025 12:43:42 -0500 Subject: [PATCH 11/30] added filtered pres stats --- runs/phi01/case.py | 6 +- src/common/m_mpi_common.fpp | 10 +++- src/post_process/m_data_input.f90 | 22 +++++-- src/post_process/m_start_up.f90 | 9 ++- src/simulation/m_compute_statistics.fpp | 77 ++++++++++++++++++------- src/simulation/m_data_output.fpp | 18 ++++-- src/simulation/m_start_up.fpp | 10 +++- src/simulation/m_volume_filtering.fpp | 65 +++++++++------------ 8 files changed, 137 insertions(+), 80 deletions(-) diff --git a/runs/phi01/case.py b/runs/phi01/case.py index e47086a47e..9751518117 100644 --- a/runs/phi01/case.py +++ b/runs/phi01/case.py @@ -34,9 +34,9 @@ #print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow dt = 4.0E-06 -Nt = 20 -t_save = 1 -t_step_start_stats = 10 +Nt = 100 +t_save = 10 +t_step_start_stats = 50 Nx = 99 Ny = 99 diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 4eca64e8ad..2469f11348 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -154,7 +154,7 @@ contains !! @param levelset_norm normalized vector from every cell to the closest point to the IB !! @param beta Eulerian void fraction from lagrangian bubbles subroutine s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, beta, filtered_fluid_indicator_function, & - stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered, stat_filtered_pressure) type(scalar_field), & dimension(sys_size), & @@ -179,7 +179,8 @@ contains type(vector_field), dimension(1:9), intent(in), optional :: stat_reynolds_stress type(vector_field), dimension(1:9), intent(in), optional :: stat_eff_visc type(vector_field), dimension(1:3), intent(in), optional :: stat_int_mom_exch - type(vector_field), dimension(1:sys_size), intent(in), optional :: stat_q_cons_filtered + type(vector_field), dimension(1:sys_size-1), intent(in), optional :: stat_q_cons_filtered + type(scalar_field), dimension(1:4), intent(in), optional :: stat_filtered_pressure integer, dimension(num_dims) :: sizes_glb, sizes_loc integer, dimension(1) :: airfoil_glb, airfoil_loc, airfoil_start @@ -221,11 +222,14 @@ contains MPI_IO_DATA%var(sys_size+73+(i-1)*4+j)%sf => stat_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p) end do end do - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 MPI_IO_DATA%var(sys_size+85+(i-1)*4+j)%sf => stat_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p) end do end do + do i = 1, 4 + MPI_IO_DATA%var(sys_size+105+i)%sf => stat_filtered_pressure(i)%sf(0:m, 0:n, 0:p) + end do end if if (present(beta)) then diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 7ac7a502dc..d2b203d675 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -65,6 +65,7 @@ end subroutine s_read_abstract_data_files type(vector_field), allocatable, dimension(:), public :: stat_eff_visc type(vector_field), allocatable, dimension(:), public :: stat_int_mom_exch type(vector_field), allocatable, dimension(:), public :: stat_q_cons_filtered + type(scalar_field), allocatable, dimension(:), public :: stat_filtered_pressure procedure(s_read_abstract_data_files), pointer :: s_read_data_files => null() @@ -468,7 +469,8 @@ subroutine s_read_parallel_data_files(t_step) stat_reynolds_stress=stat_reynolds_stress, & stat_eff_visc=stat_eff_visc, & stat_int_mom_exch=stat_int_mom_exch, & - stat_q_cons_filtered=stat_q_cons_filtered) + stat_q_cons_filtered=stat_q_cons_filtered, & + stat_filtered_pressure=stat_filtered_pressure) else call s_initialize_mpi_data(q_cons_vf, ib_markers) end if @@ -1347,7 +1349,8 @@ subroutine s_initialize_data_input_module if (q_filtered_wrt) allocate (stat_reynolds_stress(1:9)) if (q_filtered_wrt) allocate (stat_eff_visc(1:9)) if (q_filtered_wrt) allocate (stat_int_mom_exch(1:3)) - if (q_filtered_wrt) allocate (stat_q_cons_filtered(1:sys_size)) + if (q_filtered_wrt) allocate (stat_q_cons_filtered(1:sys_size-1)) + if (q_filtered_wrt) allocate (stat_filtered_pressure(1:4)) ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of the @@ -1414,16 +1417,21 @@ subroutine s_initialize_data_input_module -buff_size:p + buff_size)) end do end do - do i = 1, sys_size + do i = 1, sys_size-1 allocate (stat_q_cons_filtered(i)%vf(1:4)) end do - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 allocate (stat_q_cons_filtered(i)%vf(j)%sf(-buff_size:m + buff_size, & -buff_size:n + buff_size, & -buff_size:p + buff_size)) end do end do + do i = 1, 4 + allocate (stat_filtered_pressure(i)%sf(-buff_size:m + buff_size, & + -buff_size:n + buff_size, & + -buff_size:p + buff_size)) + end do end if ! Simulation is 2D @@ -1541,13 +1549,17 @@ subroutine s_finalize_data_input_module deallocate(stat_int_mom_exch(i)%vf) end do deallocate(stat_int_mom_exch) - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 deallocate (stat_q_cons_filtered(i)%vf(j)%sf) end do deallocate(stat_q_cons_filtered(i)%vf) end do deallocate(stat_q_cons_filtered) + do i = 1, 4 + deallocate(stat_filtered_pressure(i)%sf) + end do + deallocate(stat_filtered_pressure) end if s_read_data_files => null() diff --git a/src/post_process/m_start_up.f90 b/src/post_process/m_start_up.f90 index 59c3e9499d..11d9c8fbf1 100644 --- a/src/post_process/m_start_up.f90 +++ b/src/post_process/m_start_up.f90 @@ -361,7 +361,7 @@ subroutine s_save_data(t_step, varname, pres, c, H) varname(:) = ' ' end do end do - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 q_sf = stat_q_cons_filtered(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) write (varname, '(A,I0,A,I0)') 'stat_q_cons_filtered', i, '_m', j @@ -370,6 +370,13 @@ subroutine s_save_data(t_step, varname, pres, c, H) varname(:) = ' ' end do end do + do i = 1, 4 + q_sf = stat_filtered_pressure(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'stat_filtered_pressure_m', i + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do end if ! Adding the species' concentrations to the formatted database file diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp index 1e1e4de29d..6f73d6d967 100644 --- a/src/simulation/m_compute_statistics.fpp +++ b/src/simulation/m_compute_statistics.fpp @@ -22,16 +22,18 @@ module m_compute_statistics type(vector_field), allocatable, dimension(:) :: Msn_eff_visc type(vector_field), allocatable, dimension(:) :: Msn_int_mom_exch type(vector_field), allocatable, dimension(:) :: Msn_q_cons_filtered + type(scalar_field), allocatable, dimension(:) :: Msn_filtered_pressure - ! 2nd, 3rd, and 4th statistical moments for unclosed terms in volume filtered momentum equation + ! 1st, 2nd, 3rd, and 4th statistical moments for unclosed terms in volume filtered momentum equation type(vector_field), allocatable, dimension(:), public :: stat_reynolds_stress type(vector_field), allocatable, dimension(:), public :: stat_eff_visc type(vector_field), allocatable, dimension(:), public :: stat_int_mom_exch type(vector_field), allocatable, dimension(:), public :: stat_q_cons_filtered + type(scalar_field), allocatable, dimension(:), public :: stat_filtered_pressure - !$acc declare create(Msn_reynolds_stress, Msn_eff_visc, Msn_int_mom_exch, Msn_q_cons_filtered) + !$acc declare create(Msn_reynolds_stress, Msn_eff_visc, Msn_int_mom_exch, Msn_q_cons_filtered, Msn_filtered_pressure) - !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) + !$acc declare create(stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered, stat_filtered_pressure) contains @@ -71,17 +73,23 @@ contains @:ACC_SETUP_VFs(Msn_int_mom_exch(i)) end do - @:ALLOCATE(Msn_q_cons_filtered(1:sys_size)) - do i = 1, sys_size + @:ALLOCATE(Msn_q_cons_filtered(1:sys_size-1)) + do i = 1, sys_size-1 @:ALLOCATE(Msn_q_cons_filtered(i)%vf(1:4)) end do - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 @:ALLOCATE(Msn_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p)) end do @:ACC_SETUP_VFs(Msn_q_cons_filtered) end do + @:ALLOCATE(Msn_filtered_pressure(1:4)) + do i = 1, 4 + @:ALLOCATE(Msn_filtered_pressure(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(Msn_filtered_pressure(i)) + end do + @:ALLOCATE(stat_reynolds_stress(1:9)) do i = 1, 9 @:ALLOCATE(stat_reynolds_stress(i)%vf(1:4)) @@ -115,29 +123,38 @@ contains @:ACC_SETUP_VFs(stat_int_mom_exch(i)) end do - @:ALLOCATE(stat_q_cons_filtered(1:sys_size)) - do i = 1, sys_size + @:ALLOCATE(stat_q_cons_filtered(1:sys_size-1)) + do i = 1, sys_size-1 @:ALLOCATE(stat_q_cons_filtered(i)%vf(1:4)) end do - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 @:ALLOCATE(stat_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p)) end do @:ACC_SETUP_VFs(stat_q_cons_filtered) end do + @:ALLOCATE(stat_filtered_pressure(1:4)) + do i = 1, 4 + @:ALLOCATE(stat_filtered_pressure(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_filtered_pressure(i)) + end do + end subroutine s_initialize_statistics_module - subroutine s_compute_statistics_momentum_unclosed_terms(n_step, reynolds_stress, eff_visc, int_mom_exch) - type(vector_field), dimension(3), intent(in) :: reynolds_stress - type(vector_field), dimension(3), intent(in) :: eff_visc - type(scalar_field), dimension(3), intent(in) :: int_mom_exch - - integer, intent(in) :: n_step + subroutine s_compute_statistics_momentum_unclosed_terms(t_step, t_step_stat_start, reynolds_stress, eff_visc, int_mom_exch, q_cons_filtered, filtered_pressure) + type(vector_field), dimension(1:3), intent(in) :: reynolds_stress + type(vector_field), dimension(1:3), intent(in) :: eff_visc + type(scalar_field), dimension(1:3), intent(in) :: int_mom_exch + type(scalar_field), dimension(sys_size-1), intent(in) :: q_cons_filtered + type(scalar_field), intent(in) :: filtered_pressure + integer, intent(in) :: t_step + integer, intent(in) :: t_step_stat_start + real(wp) :: ns integer :: i, j - ns = real(n_step, wp) + ns = real(t_step - t_step_stat_start, wp) ! update M1, M2, M3, M4 do i = 1, 3 @@ -147,16 +164,24 @@ contains end do call s_update_statistics(ns, int_mom_exch(i), Msn_int_mom_exch(i)%vf) end do + do i = 1, sys_size-1 + call s_update_statistics(ns, q_cons_filtered(i), Msn_q_cons_filtered(i)%vf) + end do + call s_update_statistics(ns, filtered_pressure, Msn_filtered_pressure) ! compute 1st, 2nd, 3rd, 4th order statistical moments - if (n_step > 3) then + if (t_step == t_step_stop-1) then ! only compute at final time do i = 1, 3 do j = 1, 3 - call s_compute_statistical_moments(ns, Msn_reynolds_stress((i-1)*3 + j)%vf, stat_reynolds_stress((i-1)*3 + j)%vf) - call s_compute_statistical_moments(ns, Msn_eff_visc((i-1)*3 + j)%vf, stat_eff_visc((i-1)*3 + j)%vf) + call s_compute_statistical_moments(ns, Msn_reynolds_stress((i-1)*3 + j)%vf, stat_reynolds_stress((i-1)*3 + j)%vf) + call s_compute_statistical_moments(ns, Msn_eff_visc((i-1)*3 + j)%vf, stat_eff_visc((i-1)*3 + j)%vf) end do call s_compute_statistical_moments(ns, Msn_int_mom_exch(i)%vf, stat_int_mom_exch(i)%vf) end do + do i = 1, sys_size-1 + call s_compute_statistical_moments(ns, Msn_q_cons_filtered(i)%vf, stat_q_cons_filtered(i)%vf) + end do + call s_compute_statistical_moments(ns, Msn_filtered_pressure, stat_filtered_pressure) end if end subroutine s_compute_statistics_momentum_unclosed_terms @@ -236,7 +261,7 @@ contains end do @:DEALLOCATE(Msn_int_mom_exch) - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 @:DEALLOCATE(Msn_q_cons_filtered(i)%vf(j)%sf) end do @@ -244,6 +269,11 @@ contains end do @:DEALLOCATE(Msn_q_cons_filtered) + do i = 1, 4 + @:DEALLOCATE(Msn_filtered_pressure(i)%sf) + end do + @:DEALLOCATE(Msn_filtered_pressure) + do i = 1, 9 do j = 1, 4 @:DEALLOCATE(stat_reynolds_stress(i)%vf(j)%sf) @@ -268,7 +298,7 @@ contains end do @:DEALLOCATE(stat_int_mom_exch) - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 @:DEALLOCATE(stat_q_cons_filtered(i)%vf(j)%sf) end do @@ -276,6 +306,11 @@ contains end do @:DEALLOCATE(stat_q_cons_filtered) + do i = 1, 4 + @:DEALLOCATE(stat_filtered_pressure(i)%sf) + end do + @:DEALLOCATE(stat_filtered_pressure) + end subroutine s_finalize_statistics_module end module m_compute_statistics diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 840278d58f..94a04b94d3 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -77,7 +77,8 @@ contains !! @param q_prim_vf Primitive variables !! @param t_step Current time step subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta, filtered_fluid_indicator_function, & - stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, & + stat_q_cons_filtered, stat_filtered_pressure) type(scalar_field), & dimension(sys_size), & @@ -99,13 +100,15 @@ contains type(vector_field), dimension(1:9), intent(inout), optional :: stat_reynolds_stress type(vector_field), dimension(1:9), intent(inout), optional :: stat_eff_visc type(vector_field), dimension(1:3), intent(inout), optional :: stat_int_mom_exch - type(vector_field), dimension(1:sys_size), intent(inout), optional :: stat_q_cons_filtered + type(vector_field), dimension(1:sys_size-1), intent(inout), optional :: stat_q_cons_filtered + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_filtered_pressure if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, beta) else call s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, & - filtered_fluid_indicator_function, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) + filtered_fluid_indicator_function, stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, & + stat_q_cons_filtered, stat_filtered_pressure) end if end subroutine s_write_data_files @@ -795,7 +798,8 @@ contains !! @param t_step Current time-step !! @param beta Eulerian void fraction from lagrangian bubbles subroutine s_write_parallel_data_files(q_cons_vf, q_prim_vf, t_step, beta, filtered_fluid_indicator_function, & - stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered) + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, & + stat_q_cons_filtered, stat_filtered_pressure) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf @@ -805,7 +809,8 @@ contains type(vector_field), dimension(1:9), intent(inout), optional :: stat_reynolds_stress type(vector_field), dimension(1:9), intent(inout), optional :: stat_eff_visc type(vector_field), dimension(1:3), intent(inout), optional :: stat_int_mom_exch - type(vector_field), dimension(1:sys_size), intent(inout), optional :: stat_q_cons_filtered + type(vector_field), dimension(1:sys_size-1), intent(inout), optional :: stat_q_cons_filtered + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_filtered_pressure #ifdef MFC_MPI @@ -916,7 +921,8 @@ contains call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm, & filtered_fluid_indicator_function=filtered_fluid_indicator_function, & stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, & - stat_int_mom_exch=stat_int_mom_exch, stat_q_cons_filtered=stat_q_cons_filtered) + stat_int_mom_exch=stat_int_mom_exch, stat_q_cons_filtered=stat_q_cons_filtered, & + stat_filtered_pressure=stat_filtered_pressure) else call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) end if diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 1ddf59c881..531f4145da 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1329,7 +1329,7 @@ contains call nvtxEndRange call nvtxStartRange("COMPUTE-STATISTICS") - call s_compute_statistics_momentum_unclosed_terms(t_step - t_step_stat_start, reynolds_stress, eff_visc, int_mom_exch) + call s_compute_statistics_momentum_unclosed_terms(t_step, t_step_stat_start, reynolds_stress, eff_visc, int_mom_exch, q_cons_filtered, filtered_pressure) call nvtxEndRange ! write(100, *) mag_reynolds_stress%sf(10, 10, 10) @@ -1472,11 +1472,14 @@ contains !$acc update host(stat_int_mom_exch(i)%vf(j)%sf) end do end do - do i = 1, sys_size + do i = 1, sys_size-1 do j = 1, 4 !$acc update host(stat_q_cons_filtered(i)%vf(j)%sf) end do end do + do i = 1, 4 + !$acc update host(stat_filtered_pressure(i)%sf) + end do end if do i = 1, sys_size !$acc update host(q_cons_ts(1)%vf(i)%sf) @@ -1514,7 +1517,8 @@ contains call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, & filtered_fluid_indicator_function=filtered_fluid_indicator_function, & stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, & - stat_int_mom_exch=stat_int_mom_exch, stat_q_cons_filtered=stat_q_cons_filtered) + stat_int_mom_exch=stat_int_mom_exch, stat_q_cons_filtered=stat_q_cons_filtered, & + stat_filtered_pressure=stat_filtered_pressure) else call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count) end if diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 7baf741244..ec659a7905 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -45,7 +45,8 @@ module m_volume_filtering type(scalar_field), public :: filtered_fluid_indicator_function ! volume filtered conservative variables - type(scalar_field), allocatable, dimension(:) :: q_cons_filtered + type(scalar_field), allocatable, dimension(:), public :: q_cons_filtered + type(scalar_field), public :: filtered_pressure ! viscous and pressure+viscous stress tensors type(vector_field), allocatable, dimension(:) :: visc_stress @@ -59,20 +60,15 @@ module m_volume_filtering type(vector_field), allocatable, dimension(:), public :: eff_visc type(scalar_field), allocatable, dimension(:), public :: int_mom_exch - ! magnitude of unclosed terms in momentum equation - type(scalar_field), public :: mag_reynolds_stress - type(scalar_field), public :: mag_eff_visc - type(scalar_field), public :: mag_int_mom_exch - ! 1/mu real(wp), allocatable, dimension(:, :) :: Res ! x-,y-,z-direction forces on particles real(wp), allocatable, dimension(:, :) :: particle_forces - !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, q_cons_filtered) + !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, q_cons_filtered, filtered_pressure) !$acc declare create(visc_stress, pres_visc_stress, div_pres_visc_stress) - !$acc declare create(reynolds_stress, eff_visc, int_mom_exch, mag_reynolds_stress, mag_eff_visc, mag_int_mom_exch) + !$acc declare create(reynolds_stress, eff_visc, int_mom_exch) !$acc declare create(Res, particle_forces) #if defined(MFC_OpenACC) @@ -116,14 +112,19 @@ contains integer :: i, j, k integer :: size_n(1), inembed(1), onembed(1) - @:ALLOCATE(q_cons_filtered(1:sys_size)) - do i = 1, sys_size + @:ALLOCATE(q_cons_filtered(1:sys_size-1)) + do i = 1, sys_size-1 @:ALLOCATE(q_cons_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & idwbuff(2)%beg:idwbuff(2)%end, & idwbuff(3)%beg:idwbuff(3)%end)) @:ACC_SETUP_SFs(q_cons_filtered(i)) end do + @:ALLOCATE(filtered_pressure%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(filtered_pressure) + @:ALLOCATE(visc_stress(1:num_dims)) do i = 1, num_dims @:ALLOCATE(visc_stress(i)%vf(1:num_dims)) @@ -192,21 +193,6 @@ contains @:ACC_SETUP_SFs(int_mom_exch(i)) end do - @:ALLOCATE(mag_reynolds_stress%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(mag_reynolds_stress) - - @:ALLOCATE(mag_eff_visc%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(mag_eff_visc) - - @:ALLOCATE(mag_int_mom_exch%sf(idwbuff(1)%beg:idwbuff(1)%end, & - idwbuff(2)%beg:idwbuff(2)%end, & - idwbuff(3)%beg:idwbuff(3)%end)) - @:ACC_SETUP_SFs(mag_int_mom_exch) - if (viscous) then @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) end if @@ -537,11 +523,12 @@ contains call nvtxEndRange call nvtxStartRange("UNCLOSED-TERM-SETUP") - call s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) + call s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress, filtered_pressure) call nvtxEndRange call nvtxStartRange("FILTER-UNCLOSED-TERM-VARS") call s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., filtered_pressure) call nvtxEndRange call nvtxStartRange("COMPUTE-UNCLOSED-TERMS") @@ -620,11 +607,11 @@ contains !< apply the gaussian filter to the conservative variables and compute their filtered components subroutine s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered + type(scalar_field), dimension(sys_size-1), intent(inout) :: q_cons_filtered integer :: i - do i = 1, sys_size + do i = 1, sys_size-1 call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) end do @@ -701,10 +688,11 @@ contains end subroutine s_compute_viscous_stress_tensor - subroutine s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf) + subroutine s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, filtered_pressure) type(vector_field), dimension(num_dims), intent(inout) :: pres_visc_stress type(vector_field), dimension(num_dims), intent(in) :: visc_stress type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(scalar_field), intent(inout) :: filtered_pressure real(wp) :: pressure integer :: i, j, k @@ -724,6 +712,8 @@ contains pres_visc_stress(3)%vf(1)%sf(i, j, k) = - visc_stress(3)%vf(1)%sf(i, j, k) pres_visc_stress(3)%vf(2)%sf(i, j, k) = - visc_stress(3)%vf(2)%sf(i, j, k) pres_visc_stress(3)%vf(3)%sf(i, j, k) = pressure - visc_stress(3)%vf(3)%sf(i, j, k) + + filtered_pressure%sf(i, j, k) = pressure end do end do end do @@ -758,12 +748,13 @@ contains end subroutine s_compute_divergence_stress_tensor !< setup for calculation of unclosed terms in volume filtered momentum eqn - subroutine s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) + subroutine s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress, filtered_pressure) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress type(vector_field), dimension(1:num_dims), intent(inout) :: pres_visc_stress type(scalar_field), dimension(1:num_dims), intent(inout) :: div_pres_visc_stress + type(scalar_field), intent(inout) :: filtered_pressure integer :: i, j, k, l, q @@ -804,14 +795,14 @@ contains ! effective viscosity setup, return viscous stress tensor call s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) - call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf) + call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, filtered_pressure) call s_compute_divergence_stress_tensor(div_pres_visc_stress, pres_visc_stress) end subroutine s_setup_terms_filtering subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_filtered + type(scalar_field), dimension(sys_size-1), intent(in) :: q_cons_filtered type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress integer :: i, j, k, l, q @@ -834,7 +825,7 @@ contains end subroutine s_compute_pseudo_turbulent_reynolds_stress subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_filtered + type(scalar_field), dimension(1:sys_size-1), intent(inout) :: q_cons_filtered type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress @@ -1143,11 +1134,13 @@ contains @:DEALLOCATE(fluid_indicator_function%sf) @:DEALLOCATE(filtered_fluid_indicator_function%sf) - do i = 1, sys_size + do i = 1, sys_size-1 @:DEALLOCATE(q_cons_filtered(i)%sf) end do @:DEALLOCATE(q_cons_filtered) + @:DEALLOCATE(filtered_pressure%sf) + do i = 1, num_dims do j = 1, num_dims @:DEALLOCATE(visc_stress(i)%vf(j)%sf) @@ -1190,10 +1183,6 @@ contains end do @:DEALLOCATE(int_mom_exch) - @:DEALLOCATE(mag_reynolds_stress%sf) - @:DEALLOCATE(mag_eff_visc%sf) - @:DEALLOCATE(mag_int_mom_exch%sf) - @:DEALLOCATE(Res) @:DEALLOCATE(particle_forces) From f1b883968096de53685beaec15d2fd83c314def2 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Wed, 3 Sep 2025 17:07:27 -0500 Subject: [PATCH 12/30] updated pressure calc in vol filter and cleanup --- src/simulation/m_additional_forcing.fpp | 22 ++-- src/simulation/m_start_up.fpp | 9 +- src/simulation/m_volume_filtering.fpp | 128 +++++++++--------------- 3 files changed, 58 insertions(+), 101 deletions(-) diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp index ae1d028330..b3b6807b55 100644 --- a/src/simulation/m_additional_forcing.fpp +++ b/src/simulation/m_additional_forcing.fpp @@ -15,7 +15,7 @@ module m_additional_forcing private; public :: s_initialize_additional_forcing_module, & s_add_periodic_forcing, s_finalize_additional_forcing_module, & - s_compute_phase_average, s_compute_periodic_forcing; + s_compute_periodic_forcing type(scalar_field), allocatable, dimension(:) :: q_periodic_force real(wp) :: volfrac_phi @@ -61,8 +61,8 @@ contains end do end subroutine s_add_periodic_forcing - !< compute the space and time average of quantities - subroutine s_compute_phase_average(q_cons_vf, t_step) + !< compute the space and time average of quantities, compute the periodic forcing terms described in Khalloufi and Capecelatro + subroutine s_compute_periodic_forcing(q_cons_vf, t_step) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer, intent(in) :: t_step real(wp) :: spatial_rho_glb, spatial_u_glb @@ -95,18 +95,7 @@ contains phase_u = phase_u + (spatial_u_glb / real(N_x_total_glb, wp) - phase_u) / real(t_step, wp) !$acc update device(phase_rho, phase_u) - ! if (proc_rank == 0) then - ! print *, t_step, 'rho', phase_rho, 'rho*u', phase_u - ! end if - - end subroutine s_compute_phase_average - - !< computes the periodic forcing terms described in Khalloufi and Capecelatro - subroutine s_compute_periodic_forcing(q_cons_vf) - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - - integer :: i, j, k - + ! compute periodic forcing terms for mass, momentum, energy !$acc parallel loop collapse(3) gang vector default(present) do i = 0, m do j = 0, n @@ -122,6 +111,7 @@ contains end do end do end do + end subroutine s_compute_periodic_forcing subroutine s_finalize_additional_forcing_module @@ -132,4 +122,4 @@ contains @:DEALLOCATE(q_periodic_force) end subroutine s_finalize_additional_forcing_module -end module m_additional_forcing \ No newline at end of file +end module m_additional_forcing diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 531f4145da..0d65666498 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1324,8 +1324,8 @@ contains ! Volume filter flow variables, compute unclosed terms and their statistics if (volume_filtering_momentum_eqn) then if (t_step > t_step_stat_start) then - call nvtxStartRange("VOLUME-FILTER-MOMENTUM-EQUATION") - call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf) + call nvtxStartRange("VOLUME-FILTERED-MOMENTUM-EQUATION") + call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf, q_prim_vf) call nvtxEndRange call nvtxStartRange("COMPUTE-STATISTICS") @@ -1346,15 +1346,16 @@ contains ! close(101) ! end if + ! Compute explicit x-, y-, z- forces on each particle call nvtxStartRange("COMPUTE-PARTICLE-FORCES") call s_compute_particle_forces() call nvtxEndRange end if + ! Compute terms to force a constant mass flow rate in fully periodic domain if (periodic_forcing) then call nvtxStartRange("COMPUTE-PERIODIC-FORCING") - call s_compute_phase_average(q_cons_ts(1)%vf, t_step+1) - call s_compute_periodic_forcing(q_cons_ts(1)%vf) + call s_compute_periodic_forcing(q_cons_ts(1)%vf, t_step+1) call nvtxEndRange end if diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index ec659a7905..c1317d624e 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -29,10 +29,10 @@ module m_volume_filtering private; public :: s_initialize_fftw_explicit_filter_module, & s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & s_initialize_filtered_fluid_indicator_function, s_finalize_fftw_explicit_filter_module, & - s_apply_fftw_filter_cons, s_volume_filter_momentum_eqn, s_apply_fftw_filter_tensor, s_apply_fftw_filter_scalarfield, & + s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, & s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & - s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity, s_compute_interphase_momentum_exchange + s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity #if !defined(MFC_OpenACC) include 'fftw3.f03' @@ -514,27 +514,40 @@ contains end subroutine s_initialize_filtered_fluid_indicator_function !< calculate the unclosed terms present in the volume filtered momentum equation - subroutine s_volume_filter_momentum_eqn(q_cons_vf) + subroutine s_volume_filter_momentum_eqn(q_cons_vf, q_prim_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer :: i, j, k call nvtxStartRange("FILTER-CONS-VARS") - call s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) + do i = 1, sys_size-1 + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) + end do + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_prim_vf(E_idx), filtered_pressure) call nvtxEndRange - call nvtxStartRange("UNCLOSED-TERM-SETUP") - call s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress, filtered_pressure) - call nvtxEndRange + call nvtxStartRange("COMPUTE-UNCLOSED-TERMS") + call s_setup_terms_filtering(q_cons_vf, q_prim_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) - call nvtxStartRange("FILTER-UNCLOSED-TERM-VARS") - call s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., filtered_pressure) - call nvtxEndRange + ! pseudo turbulent reynolds stress + do i = 1, num_dims + do j = 1, num_dims + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(i)%vf(j)) + end do + end do + ! effective viscosity + do i = 1, num_dims + do j = 1, num_dims + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., visc_stress(i)%vf(j), eff_visc(i)%vf(j)) + end do + end do + ! interphase momentum exchange + do i = 1, num_dims + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .false., div_pres_visc_stress(i), int_mom_exch(i)) + end do - call nvtxStartRange("COMPUTE-UNCLOSED-TERMS") call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress) - call s_compute_interphase_momentum_exchange(int_mom_exch) call nvtxEndRange end subroutine s_volume_filter_momentum_eqn @@ -569,7 +582,9 @@ contains end do end if + call nvtxStartRange("FORWARD-3D-FFT") call s_mpi_FFT_fwd + call nvtxEndRange !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC @@ -580,7 +595,9 @@ contains end do end do + call nvtxStartRange("BACKWARD-3D-FFT") call s_mpi_FFT_bwd + call nvtxEndRange if (present(q_temp_out)) then !$acc parallel loop collapse(3) gang vector default(present) @@ -604,50 +621,6 @@ contains end subroutine s_apply_fftw_filter_scalarfield - !< apply the gaussian filter to the conservative variables and compute their filtered components - subroutine s_apply_fftw_filter_cons(q_cons_vf, q_cons_filtered) - type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf - type(scalar_field), dimension(sys_size-1), intent(inout) :: q_cons_filtered - - integer :: i - - do i = 1, sys_size-1 - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) - end do - - end subroutine s_apply_fftw_filter_cons - - !< apply the gaussian filter to the requisite tensors to compute unclosed terms of interest - subroutine s_apply_fftw_filter_tensor(reynolds_stress, visc_stress, eff_visc, div_pres_visc_stress, int_mom_exch) - type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress - type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress - type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc - type(scalar_field), dimension(1:num_dims), intent(inout) :: div_pres_visc_stress - type(scalar_field), dimension(1:num_dims), intent(inout) :: int_mom_exch - - integer :: i, j - - ! pseudo turbulent reynolds stress - do i = 1, num_dims - do j = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(i)%vf(j)) - end do - end do - - ! effective viscosity - do i = 1, num_dims - do j = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., visc_stress(i)%vf(j), eff_visc(i)%vf(j)) - end do - end do - - ! interphase momentum exchange - do i = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .false., div_pres_visc_stress(i), int_mom_exch(i)) - end do - - end subroutine s_apply_fftw_filter_tensor - ! compute viscous stress tensor subroutine s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) type(vector_field), dimension(num_dims), intent(inout) :: visc_stress @@ -688,11 +661,11 @@ contains end subroutine s_compute_viscous_stress_tensor - subroutine s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, filtered_pressure) + subroutine s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, q_prim_vf) type(vector_field), dimension(num_dims), intent(inout) :: pres_visc_stress type(vector_field), dimension(num_dims), intent(in) :: visc_stress type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf - type(scalar_field), intent(inout) :: filtered_pressure + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf real(wp) :: pressure integer :: i, j, k @@ -700,20 +673,15 @@ contains do i = 0, m do j = 0, n do k = 0, p - pressure = (q_cons_vf(E_idx)%sf(i, j, k) - 0.5_wp * (q_cons_vf(momxb)%sf(i, j, k)**2 + q_cons_vf(momxb+1)%sf(i, j, k)**2 + q_cons_vf(momxb+2)%sf(i, j, k)**2) & - / q_cons_vf(contxb)%sf(i, j, k) - pi_infs(1) - qvs(1)) / (gammas(1)) - - pres_visc_stress(1)%vf(1)%sf(i, j, k) = pressure - visc_stress(1)%vf(1)%sf(i, j, k) + pres_visc_stress(1)%vf(1)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k) - visc_stress(1)%vf(1)%sf(i, j, k) pres_visc_stress(1)%vf(2)%sf(i, j, k) = - visc_stress(1)%vf(2)%sf(i, j, k) pres_visc_stress(1)%vf(3)%sf(i, j, k) = - visc_stress(1)%vf(3)%sf(i, j, k) pres_visc_stress(2)%vf(1)%sf(i, j, k) = - visc_stress(2)%vf(1)%sf(i, j, k) - pres_visc_stress(2)%vf(2)%sf(i, j, k) = pressure - visc_stress(2)%vf(2)%sf(i, j, k) + pres_visc_stress(2)%vf(2)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k) - visc_stress(2)%vf(2)%sf(i, j, k) pres_visc_stress(2)%vf(3)%sf(i, j, k) = - visc_stress(2)%vf(3)%sf(i, j, k) pres_visc_stress(3)%vf(1)%sf(i, j, k) = - visc_stress(3)%vf(1)%sf(i, j, k) pres_visc_stress(3)%vf(2)%sf(i, j, k) = - visc_stress(3)%vf(2)%sf(i, j, k) - pres_visc_stress(3)%vf(3)%sf(i, j, k) = pressure - visc_stress(3)%vf(3)%sf(i, j, k) - - filtered_pressure%sf(i, j, k) = pressure + pres_visc_stress(3)%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k) - visc_stress(3)%vf(3)%sf(i, j, k) end do end do end do @@ -748,13 +716,13 @@ contains end subroutine s_compute_divergence_stress_tensor !< setup for calculation of unclosed terms in volume filtered momentum eqn - subroutine s_setup_terms_filtering(q_cons_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress, filtered_pressure) + subroutine s_setup_terms_filtering(q_cons_vf, q_prim_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress type(vector_field), dimension(1:num_dims), intent(inout) :: pres_visc_stress type(scalar_field), dimension(1:num_dims), intent(inout) :: div_pres_visc_stress - type(scalar_field), intent(inout) :: filtered_pressure integer :: i, j, k, l, q @@ -767,7 +735,7 @@ contains do l = 1, num_dims !$acc loop seq do q = 1, num_dims - reynolds_stress(l)%vf(q)%sf(i, j, k) = (q_cons_vf(momxb-1+l)%sf(i, j, k) * q_cons_vf(momxb-1+q)%sf(i, j, k)) / q_cons_vf(1)%sf(i, j, k) ! (rho*u x rho*u)/rho = rho*(u x u) + reynolds_stress(l)%vf(q)%sf(i, j, k) = q_cons_vf(1)%sf(i, j, k) * (q_prim_vf(momxb-1+l)%sf(i, j, k) * q_prim_vf(momxb-1+q)%sf(i, j, k)) ! rho*(u x u) end do end do end do @@ -776,11 +744,11 @@ contains ! set density and momentum buffers #ifdef MFC_MPI - do i = 1, momxe + do i = contxb, momxe call s_populate_scalarfield_buffers(q_cons_vf(i)) end do #else - do i = 1, momxe + do i = contxb, momxe q_cons_vf(i)%sf(-buff_size:-1, :, :) = q_cons_vf(i)%sf(m-buff_size+1:m, :, :) q_cons_vf(i)%sf(m+1:m+buff_size, :, :) = q_cons_vf(i)%sf(0:buff_size-1, :, :) @@ -795,8 +763,9 @@ contains ! effective viscosity setup, return viscous stress tensor call s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) - call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, filtered_pressure) + call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, q_prim_vf) + ! interphase momentum exchange term setup call s_compute_divergence_stress_tensor(div_pres_visc_stress, pres_visc_stress) end subroutine s_setup_terms_filtering @@ -837,7 +806,7 @@ contains call s_populate_scalarfield_buffers(q_cons_filtered(i)) end do #else - do i = 1, momxe + do i = contxb, momxe q_cons_filtered(i)%sf(-buff_size:-1, :, :) = q_cons_filtered(i)%sf(m-buff_size+1:m, :, :) q_cons_filtered(i)%sf(m+1:m+buff_size, :, :) = q_cons_filtered(i)%sf(0:buff_size-1, :, :) @@ -870,13 +839,6 @@ contains end subroutine s_compute_effective_viscosity - subroutine s_compute_interphase_momentum_exchange(int_mom_exch) - type(scalar_field), dimension(1:num_dims), intent(in) :: int_mom_exch - - integer :: i, j, k - - end subroutine s_compute_interphase_momentum_exchange - ! computes x-,y-,z-direction forces on particles subroutine s_compute_particle_forces real(wp), dimension(num_ibs, 3) :: force_glb @@ -1036,7 +998,9 @@ contains end do ! transpose z-slab to y-slab + call nvtxStartRange("SLAB-MPI-TRANSPOSE-Z2Y") call s_mpi_transpose_slabZ2Y + call nvtxEndRange ! 3D y-slab -> 1D z, x, y !$acc parallel loop collapse(3) gang vector default(present) @@ -1080,7 +1044,9 @@ contains end do ! transpose y-slab to z-slab + call nvtxStartRange("SLAB-MPI-TRANSPOSE-Y2Z") call s_mpi_transpose_slabY2Z + call nvtxEndRange ! 3D z-slab -> 1D y, x, z !$acc parallel loop collapse(3) gang vector default(present) From b85bbf065a25487b888c155ef4cf5bcb48f60716 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Thu, 4 Sep 2025 11:04:58 -0500 Subject: [PATCH 13/30] pointer reference gpu bug in viscous stress tensor calc --- src/simulation/m_volume_filtering.fpp | 100 +++++++++++++++++--------- 1 file changed, 67 insertions(+), 33 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index c1317d624e..fea8a22811 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -622,42 +622,76 @@ contains end subroutine s_apply_fftw_filter_scalarfield ! compute viscous stress tensor - subroutine s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) + subroutine s_compute_viscous_stress_tensor(visc_stress, q_prim_vf, q_cons_filtered) type(vector_field), dimension(num_dims), intent(inout) :: visc_stress - type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(in), optional :: q_prim_vf + type(scalar_field), dimension(sys_size-1), intent(in), optional :: q_cons_filtered real(wp) :: dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz ! spatial velocity derivatives integer :: i, j, k - !$acc parallel loop collapse(3) gang vector default(present) private(dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz) - do i = 0, m - do j = 0, n - do k = 0, p - ! velocity gradients, local to each process - dudx = ( q_cons_vf(2)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(2)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) - dudy = ( q_cons_vf(2)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(2)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) - dudz = ( q_cons_vf(2)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(2)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) - - dvdx = ( q_cons_vf(3)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(3)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) - dvdy = ( q_cons_vf(3)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(3)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) - dvdz = ( q_cons_vf(3)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(3)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) - - dwdx = ( q_cons_vf(4)%sf(i+1, j, k)/q_cons_vf(1)%sf(i+1, j, k) - q_cons_vf(4)%sf(i-1, j, k)/q_cons_vf(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) - dwdy = ( q_cons_vf(4)%sf(i, j+1, k)/q_cons_vf(1)%sf(i, j+1, k) - q_cons_vf(4)%sf(i, j-1, k)/q_cons_vf(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) - dwdz = ( q_cons_vf(4)%sf(i, j, k+1)/q_cons_vf(1)%sf(i, j, k+1) - q_cons_vf(4)%sf(i, j, k-1)/q_cons_vf(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) - - ! viscous stress tensor, visc_stress(row, column) - visc_stress(1)%vf(1)%sf(i, j, k) = (4._wp/3._wp * dudx - 2._wp/3._wp * (dvdy + dwdz)) / Res(1, 1) - visc_stress(1)%vf(2)%sf(i, j, k) = (dudy + dvdx) / Res(1, 1) - visc_stress(1)%vf(3)%sf(i, j, k) = (dudz + dwdx) / Res(1, 1) - visc_stress(2)%vf(1)%sf(i, j, k) = (dvdx + dudy) / Res(1, 1) - visc_stress(2)%vf(2)%sf(i, j, k) = (4._wp/3._wp * dvdy - 2._wp/3._wp * (dudx + dwdz)) / Res(1, 1) - visc_stress(2)%vf(3)%sf(i, j, k) = (dvdz + dwdy) / Res(1, 1) - visc_stress(3)%vf(1)%sf(i, j, k) = (dwdx + dudz) / Res(1, 1) - visc_stress(3)%vf(2)%sf(i, j, k) = (dwdy + dvdz) / Res(1, 1) - visc_stress(3)%vf(3)%sf(i, j, k) = (4._wp/3._wp * dwdz - 2._wp/3._wp * (dudx + dvdy)) / Res(1, 1) + if (present(q_prim_vf)) then + !$acc parallel loop collapse(3) gang vector default(present) private(dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz) + do i = 0, m + do j = 0, n + do k = 0, p + ! velocity gradients, local to each process + dudx = ( q_prim_vf(2)%sf(i+1, j, k) - q_prim_vf(2)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dudy = ( q_prim_vf(2)%sf(i, j+1, k) - q_prim_vf(2)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dudz = ( q_prim_vf(2)%sf(i, j, k+1) - q_prim_vf(2)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + dvdx = ( q_prim_vf(3)%sf(i+1, j, k) - q_prim_vf(3)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dvdy = ( q_prim_vf(3)%sf(i, j+1, k) - q_prim_vf(3)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dvdz = ( q_prim_vf(3)%sf(i, j, k+1) - q_prim_vf(3)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + dwdx = ( q_prim_vf(4)%sf(i+1, j, k) - q_prim_vf(4)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dwdy = ( q_prim_vf(4)%sf(i, j+1, k) - q_prim_vf(4)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dwdz = ( q_prim_vf(4)%sf(i, j, k+1) - q_prim_vf(4)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + ! viscous stress tensor, visc_stress(row, column) + visc_stress(1)%vf(1)%sf(i, j, k) = (4._wp/3._wp * dudx - 2._wp/3._wp * (dvdy + dwdz)) / Res(1, 1) + visc_stress(1)%vf(2)%sf(i, j, k) = (dudy + dvdx) / Res(1, 1) + visc_stress(1)%vf(3)%sf(i, j, k) = (dudz + dwdx) / Res(1, 1) + visc_stress(2)%vf(1)%sf(i, j, k) = (dvdx + dudy) / Res(1, 1) + visc_stress(2)%vf(2)%sf(i, j, k) = (4._wp/3._wp * dvdy - 2._wp/3._wp * (dudx + dwdz)) / Res(1, 1) + visc_stress(2)%vf(3)%sf(i, j, k) = (dvdz + dwdy) / Res(1, 1) + visc_stress(3)%vf(1)%sf(i, j, k) = (dwdx + dudz) / Res(1, 1) + visc_stress(3)%vf(2)%sf(i, j, k) = (dwdy + dvdz) / Res(1, 1) + visc_stress(3)%vf(3)%sf(i, j, k) = (4._wp/3._wp * dwdz - 2._wp/3._wp * (dudx + dvdy)) / Res(1, 1) + end do end do - end do - end do + end do + else if (present(q_cons_filtered)) then + !$acc parallel loop collapse(3) gang vector default(present) private(dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz) + do i = 0, m + do j = 0, n + do k = 0, p + ! velocity gradients, local to each process + dudx = ( q_cons_filtered(2)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(2)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dudy = ( q_cons_filtered(2)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(2)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dudz = ( q_cons_filtered(2)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(2)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + dvdx = ( q_cons_filtered(3)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(3)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dvdy = ( q_cons_filtered(3)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(3)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dvdz = ( q_cons_filtered(3)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(3)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + dwdx = ( q_cons_filtered(4)%sf(i+1, j, k)/q_cons_filtered(1)%sf(i+1, j, k) - q_cons_filtered(4)%sf(i-1, j, k)/q_cons_filtered(1)%sf(i-1, j, k) ) / (dx(i-1) + dx(i+1)) + dwdy = ( q_cons_filtered(4)%sf(i, j+1, k)/q_cons_filtered(1)%sf(i, j+1, k) - q_cons_filtered(4)%sf(i, j-1, k)/q_cons_filtered(1)%sf(i, j-1, k) ) / (dy(j-1) + dy(j+1)) + dwdz = ( q_cons_filtered(4)%sf(i, j, k+1)/q_cons_filtered(1)%sf(i, j, k+1) - q_cons_filtered(4)%sf(i, j, k-1)/q_cons_filtered(1)%sf(i, j, k-1) ) / (dz(k-1) + dz(k+1)) + + ! viscous stress tensor, visc_stress(row, column) + visc_stress(1)%vf(1)%sf(i, j, k) = (4._wp/3._wp * dudx - 2._wp/3._wp * (dvdy + dwdz)) / Res(1, 1) + visc_stress(1)%vf(2)%sf(i, j, k) = (dudy + dvdx) / Res(1, 1) + visc_stress(1)%vf(3)%sf(i, j, k) = (dudz + dwdx) / Res(1, 1) + visc_stress(2)%vf(1)%sf(i, j, k) = (dvdx + dudy) / Res(1, 1) + visc_stress(2)%vf(2)%sf(i, j, k) = (4._wp/3._wp * dvdy - 2._wp/3._wp * (dudx + dwdz)) / Res(1, 1) + visc_stress(2)%vf(3)%sf(i, j, k) = (dvdz + dwdy) / Res(1, 1) + visc_stress(3)%vf(1)%sf(i, j, k) = (dwdx + dudz) / Res(1, 1) + visc_stress(3)%vf(2)%sf(i, j, k) = (dwdy + dvdz) / Res(1, 1) + visc_stress(3)%vf(3)%sf(i, j, k) = (4._wp/3._wp * dwdz - 2._wp/3._wp * (dudx + dvdy)) / Res(1, 1) + end do + end do + end do + end if end subroutine s_compute_viscous_stress_tensor @@ -761,7 +795,7 @@ contains #endif ! effective viscosity setup, return viscous stress tensor - call s_compute_viscous_stress_tensor(visc_stress, q_cons_vf) + call s_compute_viscous_stress_tensor(visc_stress, q_prim_vf=q_prim_vf) call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_cons_vf, q_prim_vf) @@ -819,7 +853,7 @@ contains #endif ! calculate stress tensor with filtered quantities - call s_compute_viscous_stress_tensor(visc_stress, q_cons_filtered) + call s_compute_viscous_stress_tensor(visc_stress, q_cons_filtered=q_cons_filtered) ! calculate eff_visc !$acc parallel loop collapse(3) gang vector default(present) From aff2ca46f05bf209b0ae7f3c2cbe356b37f938ad Mon Sep 17 00:00:00 2001 From: conradd3 Date: Fri, 5 Sep 2025 13:52:01 -0500 Subject: [PATCH 14/30] bug fix causes 1d tests to fail --- src/simulation/m_start_up.fpp | 12 +++++------- src/simulation/m_time_steppers.fpp | 2 -- src/simulation/p_main.fpp | 2 +- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 0d65666498..c593da603d 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -89,8 +89,6 @@ module m_start_up use m_mhd - use m_compute_particle_forces - use m_additional_forcing use m_volume_filtering @@ -1347,9 +1345,11 @@ contains ! end if ! Compute explicit x-, y-, z- forces on each particle - call nvtxStartRange("COMPUTE-PARTICLE-FORCES") - call s_compute_particle_forces() - call nvtxEndRange + if (compute_particle_drag) then + call nvtxStartRange("COMPUTE-PARTICLE-FORCES") + call s_compute_particle_forces() + call nvtxEndRange + end if end if ! Compute terms to force a constant mass flow rate in fully periodic domain @@ -1637,7 +1637,6 @@ contains if (mhd .and. powell) call s_initialize_mhd_powell_module - if (compute_particle_drag) call s_initialize_particle_forces_module() if (periodic_forcing) call s_initialize_additional_forcing_module() if (volume_filtering_momentum_eqn) then call s_initialize_fftw_explicit_filter_module() @@ -1786,7 +1785,6 @@ contains if (bodyForces) call s_finalize_body_forces_module() if (mhd .and. powell) call s_finalize_mhd_powell_module - if (compute_particle_drag) call s_finalize_particle_forces_module() if (periodic_forcing) call s_finalize_additional_forcing_module() if (volume_filtering_momentum_eqn) call s_finalize_fftw_explicit_filter_module diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 5132efbb23..93d2c91724 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -671,8 +671,6 @@ contains real(wp) :: start, finish - integer :: n_step - ! Stage 1 of 3 if (.not. adap_dt) then diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 80b3e4ecf0..9cd571b2ac 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -56,7 +56,7 @@ program p_main call s_initialize_gpu_vars() call nvtxEndRange - call s_initialize_fluid_indicator_function() + if (volume_filtering_momentum_eqn .or. periodic_forcing) call s_initialize_fluid_indicator_function() if (volume_filtering_momentum_eqn) then call s_initialize_filtering_kernel() call s_initialize_filtered_fluid_indicator_function() From 622a0a60db5dabe7445c3047f934768a8d007f30 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Sun, 7 Sep 2025 22:21:33 -0500 Subject: [PATCH 15/30] particle drag force bug fix, accumulation --- runs/phi01/sphere_array_locations.txt | 380 +++++++++++++------------- src/simulation/m_volume_filtering.fpp | 25 +- voronoi/gen_voronoi_3D.py | 2 +- 3 files changed, 211 insertions(+), 196 deletions(-) diff --git a/runs/phi01/sphere_array_locations.txt b/runs/phi01/sphere_array_locations.txt index cb062253cc..047707ef90 100644 --- a/runs/phi01/sphere_array_locations.txt +++ b/runs/phi01/sphere_array_locations.txt @@ -1,190 +1,190 @@ --2.269415855407714844e-01 -1.414051055908203125e-01 3.922535181045532227e-01 -4.000198841094970703e-01 2.981948852539062500e-02 -2.832174301147460938e-01 --3.220155239105224609e-01 -3.898024559020996094e-01 -3.041059970855712891e-01 -2.814270257949829102e-01 -7.608795166015625000e-02 -1.437755823135375977e-01 --2.728327512741088867e-01 4.227894544601440430e-01 3.520679473876953125e-02 --4.947633743286132812e-01 -4.232151508331298828e-01 -2.972397804260253906e-01 --1.808261871337890625e-02 2.877434492111206055e-01 -2.310247421264648438e-01 -3.818988800048828125e-01 3.529353141784667969e-01 1.727198362350463867e-01 --2.346787452697753906e-01 2.829644680023193359e-01 1.594238281250000000e-01 --4.887726306915283203e-01 4.662406444549560547e-02 7.227540016174316406e-02 --2.048213481903076172e-01 4.885343313217163086e-01 -2.821706533432006836e-01 --4.693455696105957031e-01 4.566423892974853516e-01 1.360166072845458984e-02 --2.810692787170410156e-02 3.964089155197143555e-01 2.224528789520263672e-01 --4.457854032516479492e-01 2.029451131820678711e-01 -2.691650390625000000e-01 --4.315460920333862305e-01 3.888773918151855469e-02 -4.190684556961059570e-01 -4.569005966186523438e-01 4.780390262603759766e-01 -1.672872304916381836e-01 -4.523042440414428711e-01 2.975084781646728516e-01 -5.123972892761230469e-03 -1.155309677124023438e-01 3.826811313629150391e-01 3.157733678817749023e-01 --3.529649972915649414e-01 3.223993778228759766e-01 3.534083366394042969e-01 --1.699209213256835938e-03 -3.757699728012084961e-01 4.251234531402587891e-01 -3.104512691497802734e-01 3.631212711334228516e-01 3.740961551666259766e-01 -3.886995315551757812e-01 -4.476237297058105469e-01 3.331944942474365234e-01 --8.131015300750732422e-02 3.511540889739990234e-01 6.623625755310058594e-02 -5.544662475585937500e-03 2.087895870208740234e-01 -4.609942436218261719e-03 --2.697887420654296875e-01 7.647264003753662109e-02 1.385573148727416992e-01 --4.056740999221801758e-01 -2.304553985595703125e-03 -2.276074886322021484e-01 --3.986057043075561523e-01 -8.398652076721191406e-02 8.779549598693847656e-02 -1.455659866333007812e-01 -5.315554141998291016e-02 3.587335348129272461e-01 --3.624105453491210938e-02 -1.932673454284667969e-01 3.783030509948730469e-01 -2.404289245605468750e-01 2.313592433929443359e-01 -9.129595756530761719e-02 -4.290236234664916992e-01 -2.806437015533447266e-01 -3.928461074829101562e-01 -3.948264122009277344e-01 1.061335802078247070e-01 -1.345469951629638672e-01 -4.199941158294677734e-01 -5.409121513366699219e-02 -4.431722164154052734e-01 --1.276044845581054688e-01 5.453205108642578125e-02 4.209027290344238281e-01 -2.240920066833496094e-01 5.745470523834228516e-02 -2.274198532104492188e-01 -3.475044965744018555e-01 -1.186680793762207031e-02 3.881464004516601562e-01 --1.399791240692138672e-02 -5.303645133972167969e-02 3.192350864410400391e-01 -3.149266242980957031e-01 -4.960085153579711914e-01 4.852926731109619141e-01 -1.159789562225341797e-01 7.240676879882812500e-02 -2.081871032714843750e-03 -3.457980155944824219e-01 -4.685097932815551758e-01 1.311070919036865234e-01 --3.134734630584716797e-01 -1.447633504867553711e-01 2.294397354125976562e-01 --2.322396039962768555e-01 4.453787803649902344e-01 2.214672565460205078e-01 -7.549452781677246094e-02 2.149226665496826172e-01 1.942512989044189453e-01 -4.877026081085205078e-01 9.565687179565429688e-02 4.446644783020019531e-01 -2.452219724655151367e-01 -1.041567325592041016e-02 -4.420824050903320312e-01 -3.802477121353149414e-01 -2.260215282440185547e-01 -6.829130649566650391e-02 -4.026585817337036133e-01 -9.730875492095947266e-02 5.328035354614257812e-02 --1.340943574905395508e-01 -2.988189458847045898e-01 4.915304183959960938e-01 -1.499507427215576172e-01 -1.232669353485107422e-01 -3.215692043304443359e-01 -7.229900360107421875e-02 1.496689319610595703e-01 -1.584017276763916016e-01 -4.887890815734863281e-02 -2.996931076049804688e-01 -6.179094314575195312e-02 -3.264107704162597656e-01 1.829891204833984375e-01 4.166131019592285156e-01 -3.418397903442382812e-01 -3.681684732437133789e-01 -1.888689994812011719e-01 --1.746954917907714844e-01 3.889560699462890625e-03 2.538719177246093750e-01 --1.082150936126708984e-01 -1.183983087539672852e-01 -4.667383432388305664e-01 -4.464948177337646484e-02 7.829546928405761719e-02 -4.987317323684692383e-01 -2.724659442901611328e-01 3.989661931991577148e-01 -2.271368503570556641e-01 -2.325954437255859375e-01 2.180564403533935547e-01 7.740092277526855469e-02 -4.475378990173339844e-01 8.053278923034667969e-02 2.720277309417724609e-01 -2.500159740447998047e-01 1.361670494079589844e-01 -4.378540515899658203e-01 --8.050751686096191406e-02 2.042385339736938477e-01 4.733436107635498047e-01 -6.334328651428222656e-02 3.953868150711059570e-01 -1.099604368209838867e-01 --6.584823131561279297e-02 4.609835147857666016e-01 -2.351213693618774414e-01 --3.965889215469360352e-01 2.626715898513793945e-01 -4.403696060180664062e-01 --4.123499393463134766e-01 4.679954051971435547e-01 -4.630439281463623047e-01 --3.268948793411254883e-01 -2.706754207611083984e-01 4.083846807479858398e-01 -4.519817829132080078e-01 -4.413130283355712891e-01 -4.950367212295532227e-01 -1.736700534820556641e-01 -4.334635734558105469e-01 3.858578205108642578e-01 --2.476015090942382812e-01 -8.808064460754394531e-02 -3.171390295028686523e-01 -1.416424512863159180e-01 6.130337715148925781e-03 1.613216400146484375e-01 -1.161313056945800781e-01 -8.472347259521484375e-02 -6.638598442077636719e-02 -6.862294673919677734e-02 7.571196556091308594e-02 -3.263452053070068359e-01 --2.883186340332031250e-01 1.637139320373535156e-01 -1.617478132247924805e-01 -4.712302684783935547e-01 -1.252410411834716797e-01 2.302359342575073242e-01 --3.321516513824462891e-02 -3.931099176406860352e-01 -1.693089008331298828e-01 -4.347057342529296875e-01 3.060367107391357422e-01 -1.781182289123535156e-01 -4.378421306610107422e-01 -2.324944734573364258e-01 4.174745082855224609e-01 -1.022851467132568359e-02 -1.360912322998046875e-01 6.093466281890869141e-02 -1.258683204650878906e-01 -2.447234392166137695e-01 3.956108093261718750e-01 --1.879813671112060547e-01 3.079674243927001953e-01 3.408046960830688477e-01 --3.804820775985717773e-01 3.240450620651245117e-01 -1.224457025527954102e-01 -4.557719230651855469e-01 -3.179004192352294922e-01 2.294783592224121094e-01 --1.324630975723266602e-01 -2.825807332992553711e-01 2.794981002807617188e-02 --9.088420867919921875e-02 -4.938784837722778320e-01 -4.559993743896484375e-01 -4.321286678314208984e-01 1.908559799194335938e-01 -4.160747528076171875e-01 -4.761004447937011719e-01 -3.449964523315429688e-02 -9.512662887573242188e-02 --3.295238018035888672e-01 -4.874784946441650391e-01 3.628075122833251953e-01 --3.269430398941040039e-01 4.961208105087280273e-01 -1.530282497406005859e-01 -1.903204917907714844e-01 4.334928989410400391e-01 1.328067779541015625e-01 --1.938850879669189453e-01 -3.347861766815185547e-01 3.228425979614257812e-01 --7.716512680053710938e-02 -1.792883872985839844e-01 -1.214803457260131836e-01 -2.945523262023925781e-01 4.375331401824951172e-01 -4.941940307617187500e-02 -2.805604934692382812e-01 3.923368453979492188e-02 3.594279289245605469e-03 --3.963446617126464844e-02 4.087066650390625000e-02 1.291446685791015625e-01 -3.017591238021850586e-01 -4.672487974166870117e-01 -3.370153903961181641e-01 --5.923175811767578125e-02 -1.029053926467895508e-01 -2.954306602478027344e-01 --4.299471378326416016e-01 1.944204568862915039e-01 1.885912418365478516e-01 -1.226736307144165039e-01 -4.231331348419189453e-01 -4.431772232055664062e-01 --1.630305051803588867e-01 1.654865741729736328e-01 1.177084445953369141e-02 --2.820068597793579102e-01 -1.914020776748657227e-01 4.649567604064941406e-02 --1.803944110870361328e-01 -5.573785305023193359e-02 3.654372692108154297e-02 -3.560798168182373047e-01 -2.656357288360595703e-01 1.175208091735839844e-01 -4.641888141632080078e-01 3.300178050994873047e-01 4.690952301025390625e-01 --3.651157617568969727e-01 4.143847227096557617e-01 -3.058776855468750000e-01 -4.892169237136840820e-01 4.351882934570312500e-01 3.436787128448486328e-01 -1.252651214599609375e-02 1.140588521957397461e-01 3.147521018981933594e-01 -2.564185857772827148e-01 4.870939254760742188e-01 2.839587926864624023e-01 -2.440360784530639648e-01 2.740068435668945312e-01 2.384872436523437500e-01 --1.093761920928955078e-01 2.005448341369628906e-01 2.263984680175781250e-01 -2.751414775848388672e-01 3.257715702056884766e-01 -4.216753244400024414e-01 --2.344570159912109375e-01 3.708822727203369141e-01 -4.901626110076904297e-01 -3.321342468261718750e-01 2.205178737640380859e-01 -2.770333290100097656e-01 --3.562602996826171875e-01 2.268432378768920898e-01 3.148293495178222656e-02 --2.453712224960327148e-01 -3.159594535827636719e-01 -1.403638124465942383e-01 --4.225530624389648438e-01 1.738572120666503906e-01 4.009822607040405273e-01 --2.291325330734252930e-01 3.076609373092651367e-01 -2.942405939102172852e-01 --4.163160324096679688e-01 -1.362502574920654297e-01 -4.328134059906005859e-01 -1.602690219879150391e-01 4.211304187774658203e-01 4.947811365127563477e-01 -1.699512004852294922e-01 -3.455421924591064453e-01 1.857841014862060547e-01 -1.917399168014526367e-01 -2.274444103240966797e-01 -1.499438285827636719e-01 -5.063652992248535156e-02 -7.577204704284667969e-02 -4.671556949615478516e-01 -1.856522560119628906e-01 1.085456609725952148e-01 3.598620891571044922e-01 -2.133283615112304688e-01 -1.748585700988769531e-01 5.385351181030273438e-02 -1.607365608215332031e-01 2.551939487457275391e-01 -2.725876569747924805e-01 -4.380518198013305664e-01 2.549636363983154297e-01 2.876336574554443359e-01 --2.457389831542968750e-01 -4.205622673034667969e-01 -4.621033668518066406e-01 --4.958317279815673828e-01 -4.657427072525024414e-01 1.988265514373779297e-01 -6.845688819885253906e-02 2.681604623794555664e-01 -4.308686256408691406e-01 --4.200789928436279297e-01 3.732924461364746094e-01 1.710724830627441406e-01 -3.544092178344726562e-02 -3.218197822570800781e-01 8.597135543823242188e-02 --5.194902420043945312e-02 1.222956180572509766e-02 -5.136704444885253906e-02 -1.391673088073730469e-01 2.476061582565307617e-01 4.182490110397338867e-01 --1.033620834350585938e-01 3.683781623840332031e-02 -3.891081809997558594e-01 --4.138703346252441406e-01 -3.311948776245117188e-01 -4.624009132385253906e-01 --9.261775016784667969e-02 1.478457450866699219e-01 -1.957361698150634766e-01 -2.608032226562500000e-01 -1.573407649993896484e-01 4.948087930679321289e-01 --1.243667602539062500e-01 -4.962480068206787109e-01 3.667256832122802734e-01 --4.454655647277832031e-01 -2.705636024475097656e-01 1.070654392242431641e-01 --4.106376171112060547e-01 -1.618578433990478516e-01 -6.648111343383789062e-02 -3.302078247070312500e-01 -2.219557762145996094e-02 1.648344993591308594e-01 --1.774271726608276367e-01 3.244402408599853516e-01 -9.372758865356445312e-02 -2.811634540557861328e-01 1.279127597808837891e-01 2.315803766250610352e-01 -2.449696063995361328e-01 -3.595451116561889648e-01 -6.689429283142089844e-03 -5.237126350402832031e-02 -2.531653642654418945e-01 -4.304802417755126953e-01 --2.635989189147949219e-01 -2.267163991928100586e-01 -4.170490503311157227e-01 --2.721209526062011719e-01 1.574560403823852539e-01 2.993257045745849609e-01 --3.956274986267089844e-01 2.191853523254394531e-02 2.550070285797119141e-01 --1.563029289245605469e-01 -2.704749107360839844e-01 -2.991802692413330078e-01 -7.597208023071289062e-02 -1.699868440628051758e-01 2.227045297622680664e-01 --3.653595447540283203e-01 -4.391734600067138672e-01 1.462922096252441406e-01 -1.705410480499267578e-01 -4.559497833251953125e-01 -1.512272357940673828e-01 --1.343528032302856445e-01 -1.545200347900390625e-01 2.051105499267578125e-01 -5.652284622192382812e-02 -3.860473632812500000e-02 -1.806387901306152344e-01 -7.492136955261230469e-02 -4.894123077392578125e-01 3.830230236053466797e-02 -2.993867397308349609e-01 -3.184000253677368164e-01 2.854095697402954102e-01 -9.030818939208984375e-02 4.506881237030029297e-01 -3.190367221832275391e-01 -1.546680927276611328e-01 -3.337359428405761719e-01 -2.724964618682861328e-01 -1.143584251403808594e-01 3.319869041442871094e-01 3.964900970458984375e-02 -3.128879070281982422e-01 -1.711206436157226562e-01 -2.891231775283813477e-01 -3.134812116622924805e-01 -3.195825815200805664e-01 4.452092647552490234e-01 --4.257751703262329102e-01 -3.556568622589111328e-01 3.161740303039550781e-01 --4.604424238204956055e-01 1.566462516784667969e-01 -7.651758193969726562e-02 --4.535093307495117188e-01 -1.047830581665039062e-01 3.779829740524291992e-01 --7.651937007904052734e-02 3.510303497314453125e-01 -4.015958309173583984e-01 --5.069994926452636719e-02 -3.019337654113769531e-01 2.270703315734863281e-01 -4.413585662841796875e-01 3.929922580718994141e-01 -3.560695648193359375e-01 -2.530579566955566406e-01 -3.169052600860595703e-01 -4.228000640869140625e-01 --6.997537612915039062e-02 1.933835744857788086e-01 -3.526034355163574219e-01 --1.785504817962646484e-01 1.803159713745117188e-03 -1.765856742858886719e-01 --1.506757736206054688e-02 3.296717405319213867e-01 4.055316448211669922e-01 --4.429192543029785156e-01 -3.453509807586669922e-01 -1.209781169891357422e-01 --2.643674612045288086e-01 1.182488203048706055e-01 -3.157637119293212891e-01 -4.684782028198242188e-02 -4.617999792098999023e-01 2.509958744049072266e-01 --3.250834941864013672e-01 2.819657325744628906e-03 4.174815416336059570e-01 --3.355050086975097656e-02 -4.035353660583496094e-01 -3.605549335479736328e-01 --3.662085533142089844e-01 -2.316244840621948242e-01 -2.756531238555908203e-01 --2.576720714569091797e-01 -1.255595684051513672e-02 -4.626390933990478516e-01 --3.275632858276367188e-01 2.991151809692382812e-02 -4.782438278198242188e-02 -4.056546688079833984e-01 1.594020128250122070e-01 7.798624038696289062e-02 --2.715262174606323242e-01 -3.173813819885253906e-01 1.938788890838623047e-01 -3.270006179809570312e-02 -2.296169996261596680e-01 -2.338488101959228516e-01 --1.381781101226806641e-01 -4.450683593750000000e-01 1.390277147293090820e-01 -4.581812620162963867e-01 -4.004166126251220703e-01 5.525112152099609375e-03 --2.281215190887451172e-01 -1.310509443283081055e-01 -1.401650905609130859e-01 --2.425242662429809570e-01 1.733251810073852539e-01 -4.973032474517822266e-01 --1.258821487426757812e-01 -4.724828004837036133e-01 -5.991733074188232422e-02 -4.821944236755371094e-01 -1.722755432128906250e-01 -2.475099563598632812e-01 -2.750682830810546875e-02 4.665797948837280273e-01 4.664119482040405273e-01 --3.053290843963623047e-01 -3.777220249176025391e-01 2.397775650024414062e-03 -2.908480167388916016e-01 -1.594734191894531250e-01 2.671622037887573242e-01 +-8.877599239349365234e-02 1.935560703277587891e-01 -6.486654281616210938e-02 +-3.341052532196044922e-01 4.142935276031494141e-01 -4.567451477050781250e-01 +2.565863132476806641e-01 -4.949223995208740234e-02 -4.442641735076904297e-01 +3.103950023651123047e-01 -2.099078893661499023e-01 -4.642441272735595703e-01 +-3.521966934204101562e-02 -1.745276451110839844e-01 -3.202521800994873047e-01 +-1.949143409729003906e-02 -1.775810718536376953e-01 -3.603804111480712891e-02 +-1.835894584655761719e-01 3.262339830398559570e-01 -3.085057735443115234e-01 +-1.445159912109375000e-01 1.513528823852539062e-01 -2.023205757141113281e-01 +-4.898538589477539062e-01 -4.509705305099487305e-01 -1.682095527648925781e-01 +3.143328428268432617e-01 4.728571176528930664e-01 1.526627540588378906e-01 +1.280879974365234375e-01 1.239399909973144531e-01 -3.574787378311157227e-01 +-1.123933792114257812e-01 -3.207942247390747070e-01 9.310150146484375000e-02 +-1.386029720306396484e-01 -1.205575466156005859e-02 2.014696598052978516e-01 +-2.808933258056640625e-01 3.925647735595703125e-01 2.450205087661743164e-01 +4.294252395629882812e-02 2.894115447998046875e-01 -2.536165714263916016e-02 +1.801455020904541016e-01 5.933284759521484375e-02 4.247887134552001953e-01 +1.872421503067016602e-01 3.063344955444335938e-02 8.561480045318603516e-02 +2.484493255615234375e-01 -4.173127412796020508e-01 3.008729219436645508e-01 +8.203792572021484375e-02 1.318891048431396484e-01 -1.190292835235595703e-02 +-4.555282592773437500e-01 -3.696656227111816406e-01 2.237200736999511719e-02 +-1.931151151657104492e-01 5.374908447265625000e-02 5.545830726623535156e-02 +-6.292748451232910156e-02 1.790912151336669922e-01 1.174246072769165039e-01 +-2.316267490386962891e-01 -4.075572490692138672e-01 4.597637653350830078e-01 +-3.437596559524536133e-01 4.005973339080810547e-01 -2.290433645248413086e-01 +-1.910818815231323242e-01 -4.736427068710327148e-01 -2.076803445816040039e-01 +-4.528397321701049805e-01 7.907927036285400391e-02 3.940449953079223633e-01 +1.893968582153320312e-01 4.864903688430786133e-01 -3.449935913085937500e-01 +7.300472259521484375e-02 -3.667246103286743164e-01 3.762015104293823242e-01 +-1.821663379669189453e-01 -4.775607585906982422e-02 3.386561870574951172e-01 +5.136466026306152344e-02 4.852104187011718750e-01 -4.752502441406250000e-01 +-3.295025825500488281e-01 -5.519819259643554688e-02 5.781412124633789062e-02 +4.343043565750122070e-01 2.689909934997558594e-01 3.341940641403198242e-01 +-3.969779014587402344e-01 -2.916865348815917969e-01 -1.138211488723754883e-01 +-4.619355201721191406e-01 2.032375335693359375e-02 -1.161878108978271484e-01 +7.124900817871093750e-03 1.223111152648925781e-02 4.087531566619873047e-01 +-3.908715248107910156e-01 1.400717496871948242e-01 2.354013919830322266e-02 +-1.070375442504882812e-01 3.122891187667846680e-01 2.600712776184082031e-01 +4.667922258377075195e-01 -2.228868007659912109e-01 2.890402078628540039e-01 +9.751558303833007812e-03 3.652515411376953125e-01 1.688425540924072266e-01 +-7.598793506622314453e-02 1.410543918609619141e-02 -6.586468219757080078e-02 +-3.012117147445678711e-01 -1.333975791931152344e-02 -2.475223541259765625e-01 +1.425679922103881836e-01 -1.594284772872924805e-01 4.271366596221923828e-01 +-3.488619327545166016e-01 3.043293952941894531e-01 1.312527656555175781e-01 +1.347296237945556641e-01 -2.548012733459472656e-01 2.497346401214599609e-01 +1.558208465576171875e-01 -1.695448160171508789e-01 8.221673965454101562e-02 +2.994102239608764648e-01 -2.616212368011474609e-01 3.708097934722900391e-01 +4.749594926834106445e-01 4.012154340744018555e-01 -1.113747358322143555e-01 +4.658288955688476562e-01 -2.405116558074951172e-01 -4.019365310668945312e-01 +-4.477721452713012695e-01 1.802740097045898438e-01 2.297303676605224609e-01 +2.828998565673828125e-01 3.781812191009521484e-01 -4.897345304489135742e-01 +-1.556029319763183594e-01 -1.499896049499511719e-01 -1.702260971069335938e-01 +-2.203900814056396484e-01 4.228965044021606445e-01 3.943344354629516602e-01 +-7.529938220977783203e-02 -4.034370183944702148e-01 -4.895013570785522461e-01 +-2.633322477340698242e-01 2.260003089904785156e-01 3.617374897003173828e-01 +-2.043257951736450195e-01 -2.201197147369384766e-01 4.399769306182861328e-01 +2.097340822219848633e-01 -3.915596008300781250e-02 -2.276867628097534180e-01 +-1.167770624160766602e-01 4.129269123077392578e-01 -4.588322639465332031e-01 +3.195565938949584961e-01 2.821329832077026367e-01 2.030262947082519531e-01 +4.332208633422851562e-02 2.999825477600097656e-01 -2.426314353942871094e-01 +-2.900393009185791016e-01 7.278752326965332031e-02 3.351804018020629883e-01 +-3.045821189880371094e-02 -1.478650569915771484e-01 3.491390943527221680e-01 +-2.793753147125244141e-02 -1.773738861083984375e-01 1.675630807876586914e-01 +-3.188729286193847656e-01 -4.904426336288452148e-01 -6.549203395843505859e-02 +-4.071967601776123047e-01 -1.066761016845703125e-01 -4.441113471984863281e-01 +4.105618000030517578e-01 -3.848595619201660156e-01 1.863635778427124023e-01 +-1.051111221313476562e-01 -7.725274562835693359e-02 -4.898943901062011719e-01 +3.737279176712036133e-01 1.056033372879028320e-01 4.786680936813354492e-01 +2.511825561523437500e-01 -3.347592353820800781e-01 1.227176189422607422e-01 +-3.208853006362915039e-01 -1.442481279373168945e-01 -9.813189506530761719e-02 +3.365310430526733398e-01 -4.063715934753417969e-01 -4.750763177871704102e-01 +-3.066674470901489258e-01 -2.005393505096435547e-01 -2.603935003280639648e-01 +4.633438587188720703e-02 -3.628603219985961914e-01 -3.448045253753662109e-01 +-1.228909492492675781e-01 4.968223571777343750e-01 1.755017042160034180e-01 +4.529950618743896484e-01 -4.122850894927978516e-01 3.542938232421875000e-01 +3.015396595001220703e-01 6.062459945678710938e-02 -5.255222320556640625e-02 +7.875204086303710938e-02 -3.220939636230468750e-01 2.097034454345703125e-02 +-3.075191974639892578e-01 -4.913786649703979492e-01 1.174443960189819336e-01 +-2.157187461853027344e-01 -1.293109655380249023e-01 -3.813669681549072266e-01 +-2.569644451141357422e-01 -4.775856733322143555e-01 -3.842570781707763672e-01 +3.374536037445068359e-01 2.595454454421997070e-01 -1.862519979476928711e-01 +-2.484831809997558594e-01 1.898849010467529297e-02 -1.008712053298950195e-01 +-3.550199270248413086e-01 -3.802776336669921875e-03 2.112603187561035156e-01 +-4.047393798828125000e-02 -3.331716060638427734e-01 -1.580150127410888672e-01 +2.301404476165771484e-01 1.020783185958862305e-01 2.300353050231933594e-01 +-4.886188507080078125e-01 -4.335124492645263672e-01 -3.716624975204467773e-01 +3.109852075576782227e-01 -3.871500492095947266e-02 1.583197116851806641e-01 +4.864922761917114258e-01 -2.506246566772460938e-01 4.611170291900634766e-01 +4.114500284194946289e-01 -2.497513294219970703e-01 8.945560455322265625e-02 +-2.041511535644531250e-01 -3.061387538909912109e-01 -1.002895832061767578e-01 +-3.356888294219970703e-01 -2.898548841476440430e-01 -4.294934272766113281e-01 +6.349623203277587891e-02 -4.237914085388183594e-01 1.809575557708740234e-01 +1.638014316558837891e-01 -3.412141799926757812e-01 -4.808696508407592773e-01 +4.292991161346435547e-01 -7.350444793701171875e-02 4.452385902404785156e-01 +-2.837867736816406250e-01 2.394533157348632812e-02 -4.843814373016357422e-01 +-2.125334739685058594e-01 1.921176910400390625e-01 -2.379369735717773438e-02 +1.759276390075683594e-01 4.892826080322265625e-01 4.419517517089843750e-01 +-4.233963489532470703e-01 7.077014446258544922e-02 -3.061563968658447266e-01 +-3.712041378021240234e-01 4.946417212486267090e-01 3.635656833648681641e-01 +-4.665093421936035156e-01 4.070787429809570312e-01 -3.274630308151245117e-01 +3.692833185195922852e-01 -8.178091049194335938e-02 -1.193681955337524414e-01 +6.124496459960937500e-03 -2.011668682098388672e-02 8.408391475677490234e-02 +-1.337385177612304688e-02 -2.435498237609863281e-01 -4.735767841339111328e-01 +2.590975761413574219e-01 -3.270063400268554688e-01 -5.099523067474365234e-02 +3.800438642501831055e-01 4.123662710189819336e-01 -3.175902366638183594e-01 +2.355668544769287109e-01 2.839933633804321289e-01 -3.255009651184082031e-01 +-4.340230226516723633e-01 -4.109045267105102539e-01 4.977314472198486328e-01 +2.350783348083496094e-02 -7.954597473144531250e-02 -2.089430093765258789e-01 +2.528522014617919922e-01 2.231028079986572266e-01 -4.818900823593139648e-01 +3.285017013549804688e-01 -1.968045234680175781e-01 2.016012668609619141e-01 +3.276336193084716797e-01 3.824212551116943359e-01 -2.195405960083007812e-02 +4.347554445266723633e-01 -1.944565773010253906e-02 -3.952792882919311523e-01 +-2.355787754058837891e-01 2.512185573577880859e-01 -4.705796241760253906e-01 +2.304534912109375000e-01 2.335491180419921875e-01 3.436188697814941406e-01 +4.291563034057617188e-01 2.084137201309204102e-01 -3.515939712524414062e-01 +4.610210657119750977e-01 2.877938747406005859e-01 9.413146972656250000e-02 +3.239741325378417969e-01 4.200505018234252930e-01 3.377312421798706055e-01 +-4.339945316314697266e-01 -1.799043416976928711e-01 1.667797565460205078e-01 +4.162905216217041016e-01 -2.838604450225830078e-01 -1.204760074615478516e-01 +4.708716869354248047e-01 4.452165365219116211e-01 4.702655076980590820e-01 +3.935134410858154297e-01 -4.494274854660034180e-01 -1.000511646270751953e-02 +-3.325940370559692383e-01 -3.989632129669189453e-01 -2.595729827880859375e-01 +-4.726890325546264648e-01 -1.577985286712646484e-01 -2.004265785217285156e-02 +-2.578830718994140625e-01 1.816778182983398438e-01 1.800514459609985352e-01 +2.873079776763916016e-01 -1.582661867141723633e-01 1.000881195068359375e-03 +1.284685134887695312e-01 -2.347108125686645508e-01 -1.527856588363647461e-01 +-4.975929260253906250e-01 4.154947996139526367e-01 2.424190044403076172e-01 +1.319632530212402344e-01 2.181564569473266602e-01 1.456822156906127930e-01 +4.251360893249511719e-02 5.486690998077392578e-02 2.446963787078857422e-01 +5.265474319458007812e-03 -4.930623769760131836e-01 1.795315742492675781e-02 +3.435378074645996094e-01 -1.437039375305175781e-01 -2.955729961395263672e-01 +-1.589361429214477539e-01 3.439151048660278320e-01 -1.269352436065673828e-01 +-2.996790409088134766e-01 -2.977983951568603516e-01 5.047678947448730469e-02 +1.387677192687988281e-01 -4.051816463470458984e-02 -6.590497493743896484e-02 +-4.859859943389892578e-01 4.686148166656494141e-01 6.054759025573730469e-02 +3.058031797409057617e-01 -4.722125530242919922e-01 -1.649188995361328125e-01 +3.712953329086303711e-01 -3.612419366836547852e-01 -2.953444719314575195e-01 +-2.350592613220214844e-01 1.253683567047119141e-01 -3.582476377487182617e-01 +-4.282865524291992188e-01 -3.783413171768188477e-01 1.956710815429687500e-01 +-1.545268297195434570e-01 -3.127627372741699219e-01 -3.272031545639038086e-01 +2.250815629959106445e-01 -3.367059230804443359e-01 -2.811298370361328125e-01 +-5.611097812652587891e-02 2.276177406311035156e-01 -3.761705160140991211e-01 +1.843569278717041016e-01 3.698165416717529297e-01 -1.461877822875976562e-01 +-3.651070594787597656e-01 3.224494457244873047e-01 -4.502046108245849609e-02 +-1.052534580230712891e-01 1.394950151443481445e-01 3.169180154800415039e-01 +-7.266819477081298828e-02 -3.203969001770019531e-01 3.076763153076171875e-01 +-1.534210443496704102e-01 -1.421678066253662109e-02 -2.598439455032348633e-01 +4.644811153411865234e-01 -2.855896949768066406e-02 6.111550331115722656e-02 +1.615400314331054688e-01 4.353706836700439453e-01 2.680056095123291016e-01 +-4.789991378784179688e-01 -2.737338542938232422e-01 -2.684531211853027344e-01 +-4.801630973815917969e-01 -1.131765842437744141e-01 -2.253174781799316406e-01 +4.725518226623535156e-01 2.924776077270507812e-01 -4.712775945663452148e-01 +3.934500217437744141e-01 6.538939476013183594e-02 -2.147150039672851562e-01 +5.674338340759277344e-02 1.684566736221313477e-01 4.750093221664428711e-01 +-3.127444982528686523e-01 1.864537000656127930e-01 -1.828011274337768555e-01 +-6.377077102661132812e-02 3.063268661499023438e-01 4.461523294448852539e-01 +-2.393376827239990234e-01 -2.101924419403076172e-01 2.160568237304687500e-01 +-4.714767932891845703e-01 2.386778593063354492e-01 -1.962506771087646484e-01 +-4.175131320953369141e-01 1.262202262878417969e-01 -4.906876087188720703e-01 +1.526114940643310547e-01 -1.855427026748657227e-01 -3.443827629089355469e-01 +6.579875946044921875e-02 -4.886317253112792969e-02 -4.445745944976806641e-01 +1.098661422729492188e-01 3.471816778182983398e-01 4.010045528411865234e-01 +2.641906738281250000e-01 -2.310740947723388672e-01 -1.801049709320068359e-01 +2.215981483459472656e-02 1.125121116638183594e-01 -2.007805109024047852e-01 +4.692313671112060547e-01 -3.348422050476074219e-02 2.421901226043701172e-01 +3.015110492706298828e-01 -7.356131076812744141e-02 3.514482975006103516e-01 +-3.965770006179809570e-01 2.962644100189208984e-01 3.929857015609741211e-01 +1.106926202774047852e-01 -4.377689361572265625e-01 -1.675007343292236328e-01 +1.297621726989746094e-01 -8.046376705169677734e-02 2.488052845001220703e-01 +1.898322105407714844e-01 1.719188690185546875e-01 -1.696370840072631836e-01 +4.060682058334350586e-01 1.258714199066162109e-01 1.274476051330566406e-01 +1.603732109069824219e-01 3.966591358184814453e-01 6.766164302825927734e-02 +5.054616928100585938e-02 2.127890586853027344e-01 3.031399250030517578e-01 +-1.690447330474853516e-01 -1.416635513305664062e-01 3.728961944580078125e-02 +-1.341120004653930664e-01 1.080242395401000977e-01 4.635136127471923828e-01 +-2.457776069641113281e-01 -3.851659297943115234e-01 2.513883113861083984e-01 +-1.634557247161865234e-01 -4.583904743194580078e-01 -2.824854850769042969e-02 +-1.784324645996093750e-03 4.497978687286376953e-01 -1.161942481994628906e-01 +4.503953456878662109e-01 1.885429620742797852e-01 -4.877877235412597656e-02 +2.600491046905517578e-01 2.236571311950683594e-01 2.091717720031738281e-02 +-3.822712898254394531e-01 2.547247409820556641e-01 -3.687927722930908203e-01 +-3.667194843292236328e-01 -1.171383857727050781e-01 3.846424818038940430e-01 +-3.668913841247558594e-01 -2.955377101898193359e-01 3.536789417266845703e-01 +2.957736253738403320e-01 8.799576759338378906e-02 -3.451507091522216797e-01 +-1.604117155075073242e-01 3.587515354156494141e-01 5.187714099884033203e-02 +1.919094324111938477e-01 -4.781463146209716797e-01 4.655241966247558594e-03 +-3.640174865722656250e-02 4.754726886749267578e-01 -2.942006587982177734e-01 +-6.335353851318359375e-02 4.125511646270751953e-02 -3.732511997222900391e-01 +9.152126312255859375e-02 3.327772617340087891e-01 -4.209873676300048828e-01 +-5.436992645263671875e-02 4.926524162292480469e-01 3.434299230575561523e-01 +3.771104812622070312e-01 9.526658058166503906e-02 2.973334789276123047e-01 diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index fea8a22811..02a2819a25 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -321,7 +321,9 @@ contains #endif ! file for particle forces - open(unit=100, file='particle_force.bin', status='replace', form='unformatted', access='stream') + if (proc_rank == 0) then + open(unit=100, file='particle_force.bin', status='replace', form='unformatted', access='stream', action='write') + end if end subroutine s_initialize_fftw_explicit_filter_module @@ -879,21 +881,27 @@ contains real(wp) :: dvol integer :: i, j, k, l + ! zero particle forces + particle_forces = 0.0_wp + !$acc update device(particle_forces) + !$acc parallel loop collapse(3) gang vector default(present) private(dvol) do i = 0, m do j = 0, n do k = 0, p dvol = dx(i) * dy(j) * dz(k) !$acc atomic - particle_forces(ib_markers%sf(i, j, k), 1) = particle_forces(ib_markers%sf(i, j, k), 1) + div_pres_visc_stress(1)%sf(i, j, k) * dvol + particle_forces(ib_markers%sf(i, j, k), 1) = particle_forces(ib_markers%sf(i, j, k), 1) - div_pres_visc_stress(1)%sf(i, j, k) * dvol !$acc atomic - particle_forces(ib_markers%sf(i, j, k), 2) = particle_forces(ib_markers%sf(i, j, k), 2) + div_pres_visc_stress(2)%sf(i, j, k) * dvol + particle_forces(ib_markers%sf(i, j, k), 2) = particle_forces(ib_markers%sf(i, j, k), 2) - div_pres_visc_stress(2)%sf(i, j, k) * dvol !$acc atomic - particle_forces(ib_markers%sf(i, j, k), 3) = particle_forces(ib_markers%sf(i, j, k), 3) + div_pres_visc_stress(3)%sf(i, j, k) * dvol + particle_forces(ib_markers%sf(i, j, k), 3) = particle_forces(ib_markers%sf(i, j, k), 3) - div_pres_visc_stress(3)%sf(i, j, k) * dvol end do end do end do + !$acc update host(particle_forces) + ! reduce particle forces across processors do i = 1, num_ibs call s_mpi_allreduce_sum(particle_forces(i, 1), force_glb(i, 1)) @@ -901,6 +909,11 @@ contains call s_mpi_allreduce_sum(particle_forces(i, 3), force_glb(i, 3)) end do + if (proc_rank == 0) then + print *, 'force', force_glb(1, 1) + print *, 'C_D', 2._wp * force_glb(1, 1) / (rho_inf_ref * u_inf_ref**2 * pi * patch_ib(1)%radius**2) + end if + ! write particle forces to file if (proc_rank == 0) then write(100) force_glb @@ -1207,7 +1220,9 @@ contains call fftw_destroy_plan(plan_z_c2c_kernelG) #endif - close(100) + if (proc_rank == 0) then + close(100) + end if end subroutine s_finalize_fftw_explicit_filter_module diff --git a/voronoi/gen_voronoi_3D.py b/voronoi/gen_voronoi_3D.py index c56a02fb8e..ecb08eb36c 100644 --- a/voronoi/gen_voronoi_3D.py +++ b/voronoi/gen_voronoi_3D.py @@ -73,7 +73,7 @@ def lloyd_relaxation_3d(initial_points, box, w=1, iterations=10): initial_points = np.stack((x_i, y_i, z_i), axis=1) box = freud.box.Box.cube(L) - relaxed_points = lloyd_relaxation_3d(initial_points, box, iterations=30) + relaxed_points = lloyd_relaxation_3d(initial_points, box, iterations=40) print(np.shape(relaxed_points)) np.savetxt(output_dir+'/sphere_array_locations.txt', relaxed_points) From 39f84d1878b57406b2c58d55337b63e49a4ddf36 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Sat, 13 Sep 2025 17:07:18 -0500 Subject: [PATCH 16/30] updated interphase momentum exch calc --- src/simulation/m_volume_filtering.fpp | 106 +++++++++++++++++++++++--- src/simulation/p_main.fpp | 1 + 2 files changed, 97 insertions(+), 10 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 02a2819a25..419b07db81 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -28,8 +28,8 @@ module m_volume_filtering private; public :: s_initialize_fftw_explicit_filter_module, & s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & - s_initialize_filtered_fluid_indicator_function, s_finalize_fftw_explicit_filter_module, & - s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, & + s_initialize_filtered_fluid_indicator_function, s_initialize_fluid_indicator_gradient, & + s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, & s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity @@ -43,6 +43,7 @@ module m_volume_filtering ! fluid indicator function (1 = fluid, 0 = otherwise) type(scalar_field), public :: fluid_indicator_function type(scalar_field), public :: filtered_fluid_indicator_function + type(scalar_field), allocatable, dimension(:) :: grad_fluid_indicator ! volume filtered conservative variables type(scalar_field), allocatable, dimension(:), public :: q_cons_filtered @@ -66,7 +67,8 @@ module m_volume_filtering ! x-,y-,z-direction forces on particles real(wp), allocatable, dimension(:, :) :: particle_forces - !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, q_cons_filtered, filtered_pressure) + !$acc declare create(fluid_indicator_function, filtered_fluid_indicator_function, grad_fluid_indicator) + !$acc declare create(q_cons_filtered, filtered_pressure) !$acc declare create(visc_stress, pres_visc_stress, div_pres_visc_stress) !$acc declare create(reynolds_stress, eff_visc, int_mom_exch) !$acc declare create(Res, particle_forces) @@ -456,14 +458,14 @@ contains subroutine s_initialize_fluid_indicator_function integer :: i, j, k - @:ALLOCATE(fluid_indicator_function%sf(0:m, 0:n, 0:p)) + @:ALLOCATE(fluid_indicator_function%sf(-1:m+1, -1:n+1, -1:p+1)) @:ACC_SETUP_SFs(fluid_indicator_function) ! define fluid indicator function !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p + do i = -1, m+1 + do j = -1, n+1 + do k = -1, p+1 if (ib_markers%sf(i, j, k) == 0) then fluid_indicator_function%sf(i, j, k) = 1.0_dp else @@ -515,6 +517,36 @@ contains end subroutine s_initialize_filtered_fluid_indicator_function + + subroutine s_initialize_fluid_indicator_gradient + integer :: i, j, k + + @:ALLOCATE(grad_fluid_indicator(1:3)) + do i = 1, 3 + @:ALLOCATE(grad_fluid_indicator(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(grad_fluid_indicator(i)) + end do + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + grad_fluid_indicator(1)%sf(i, j, k) = (fluid_indicator_function%sf(i+1, j, k) - & + fluid_indicator_function%sf(i-1, j, k)) / & + (x_cc(i+1) - x_cc(i-1)) + grad_fluid_indicator(2)%sf(i, j, k) = (fluid_indicator_function%sf(i, j+1, k) - & + fluid_indicator_function%sf(i, j-1, k)) / & + (y_cc(j+1) - y_cc(j-1)) + grad_fluid_indicator(3)%sf(i, j, k) = (fluid_indicator_function%sf(i, j, k+1) - & + fluid_indicator_function%sf(i, j, k-1)) / & + (z_cc(k+1) - z_cc(k-1)) + end do + end do + end do + + end subroutine s_initialize_fluid_indicator_gradient + + !< calculate the unclosed terms present in the volume filtered momentum equation subroutine s_volume_filter_momentum_eqn(q_cons_vf, q_prim_vf) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -544,9 +576,7 @@ contains end do end do ! interphase momentum exchange - do i = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .false., div_pres_visc_stress(i), int_mom_exch(i)) - end do + call s_compute_interphase_momentum_exchange(filtered_fluid_indicator_function, grad_fluid_indicator, pres_visc_stress, int_mom_exch) call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress) @@ -875,6 +905,58 @@ contains end subroutine s_compute_effective_viscosity + subroutine s_compute_interphase_momentum_exchange(filtered_fluid_indicator_function, grad_fluid_indicator, pres_visc_stress, int_mom_exch) + type(scalar_field), intent(in) :: filtered_fluid_indicator_function + type(scalar_field), dimension(1:3), intent(in) :: grad_fluid_indicator + type(vector_field), dimension(1:3), intent(in) :: pres_visc_stress + type(scalar_field), dimension(1:3), intent(inout) :: int_mom_exch + + integer :: i, j, k, l + + ! x-, y-, z- component loop + do l = 1, 3 + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = pres_visc_stress(1)%vf(l)%sf(i, j, k) * grad_fluid_indicator(1)%sf(i, j, k) & + + pres_visc_stress(2)%vf(l)%sf(i, j, k) * grad_fluid_indicator(2)%sf(i, j, k) & + + pres_visc_stress(3)%vf(l)%sf(i, j, k) * grad_fluid_indicator(3)%sf(i, j, k) + end do + end do + end do + + call nvtxStartRange("FORWARD-3D-FFT") + call s_mpi_FFT_fwd + call nvtxEndRange + + ! convolution with filtering kernel + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + call nvtxStartRange("BACKWARD-3D-FFT") + call s_mpi_FFT_bwd + call nvtxEndRange + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + int_mom_exch(l)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp)) + end do + end do + end do + end do ! end component loop + + end subroutine s_compute_interphase_momentum_exchange + ! computes x-,y-,z-direction forces on particles subroutine s_compute_particle_forces real(wp), dimension(num_ibs, 3) :: force_glb @@ -1146,6 +1228,10 @@ contains @:DEALLOCATE(fluid_indicator_function%sf) @:DEALLOCATE(filtered_fluid_indicator_function%sf) + do i = 1, 3 + @:DEALLOCATE(grad_fluid_indicator(i)%sf) + end do + @:DEALLOCATE(grad_fluid_indicator) do i = 1, sys_size-1 @:DEALLOCATE(q_cons_filtered(i)%sf) diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 9cd571b2ac..ccfa9cca4f 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -60,6 +60,7 @@ program p_main if (volume_filtering_momentum_eqn) then call s_initialize_filtering_kernel() call s_initialize_filtered_fluid_indicator_function() + call s_initialize_fluid_indicator_gradient() end if ! Setting the time-step iterator to the first time-step From def36a980e7d2efb6a7a095427cf03fe93d9baba Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Mon, 15 Sep 2025 13:28:22 -0500 Subject: [PATCH 17/30] removed print statements for runs --- src/simulation/m_volume_filtering.fpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 419b07db81..24fc6f6445 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -991,10 +991,10 @@ contains call s_mpi_allreduce_sum(particle_forces(i, 3), force_glb(i, 3)) end do - if (proc_rank == 0) then - print *, 'force', force_glb(1, 1) - print *, 'C_D', 2._wp * force_glb(1, 1) / (rho_inf_ref * u_inf_ref**2 * pi * patch_ib(1)%radius**2) - end if + ! if (proc_rank == 0) then + ! print *, 'force', force_glb(1, 1) + ! print *, 'C_D', 2._wp * force_glb(1, 1) / (rho_inf_ref * u_inf_ref**2 * pi * patch_ib(1)%radius**2) + ! end if ! write particle forces to file if (proc_rank == 0) then From ee8d2596a46829f916bc83b378d0c26191c5b221 Mon Sep 17 00:00:00 2001 From: Conrad Delgado <168050190+conraddelgado@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:36:48 -0500 Subject: [PATCH 18/30] Delete examples/3D_ibm_sphere_periodic directory --- examples/3D_ibm_sphere_periodic/case.py | 107 ------------------------ 1 file changed, 107 deletions(-) delete mode 100644 examples/3D_ibm_sphere_periodic/case.py diff --git a/examples/3D_ibm_sphere_periodic/case.py b/examples/3D_ibm_sphere_periodic/case.py deleted file mode 100644 index 41938f69fd..0000000000 --- a/examples/3D_ibm_sphere_periodic/case.py +++ /dev/null @@ -1,107 +0,0 @@ -import json -import math - -Mu = 1.84e-05 -gam_a = 1.4 - -D = 0.1 - -# Configuring case dictionary -print( - json.dumps( - { - # Logistics - "run_time_info": "T", - # Computational Domain Parameters - # x direction - "x_domain%beg": -5 * D, - "x_domain%end": 5.0 * D, - # y direction - "y_domain%beg": -2.5 * D, - "y_domain%end": 2.5 * D, - # z direction - "z_domain%beg": -2.5 * D, - "z_domain%end": 2.5 * D, - "cyl_coord": "F", - "m": 99, - "n": 99, - "p": 99, - "dt": 1.0e-6, - "t_step_start": 0, - "t_step_stop": 200, # 3000 - "t_step_save": 10, # 10 - # Simulation Algorithm Parameters - # Only one patches are necessary, the air tube - "num_patches": 1, - # Use the 5 equation model - "model_eqns": 2, - # 6 equations model does not need the K \div(u) term - "alt_soundspeed": "F", - # One fluids: air - "num_fluids": 1, - # time step - "mpp_lim": "F", - # Correct errors when computing speed of sound - "mixture_err": "T", - # Use TVD RK3 for time marching - "time_stepper": 3, - # Reconstruct the primitive variables to minimize spurious - # Use WENO5 - "weno_order": 5, - "weno_eps": 1.0e-16, - "weno_Re_flux": "T", - "weno_avg": "T", - "avg_state": 2, - "mapped_weno": "T", - "null_weights": "F", - "mp_weno": "T", - "riemann_solver": 2, - "wave_speeds": 1, - # Periodic BCs - "bc_x%beg": -1, - "bc_x%end": -1, - "bc_y%beg": -1, - "bc_y%end": -1, - "bc_z%beg": -1, - "bc_z%end": -1, - # Set IB to True and add 1 patch - "ib": "T", - "num_ibs": 1, - "viscous": "T", - # Formatted Database Files Structure Parameters - "format": 1, - "precision": 2, - "prim_vars_wrt": "T", - "E_wrt": "T", - "parallel_io": "T", - # Patch: Constant Tube filled with air - # Specify the cylindrical air tube grid geometry - "patch_icpp(1)%geometry": 9, - "patch_icpp(1)%x_centroid": 0.0, - # Uniform medium density, centroid is at the center of the domain - "patch_icpp(1)%y_centroid": 0.0, - "patch_icpp(1)%z_centroid": 0.0, - "patch_icpp(1)%length_x": 10 * D, - "patch_icpp(1)%length_y": 5 * D, - "patch_icpp(1)%length_z": 5 * D, - # Specify the patch primitive variables - "patch_icpp(1)%vel(1)": 527.2e00, - "patch_icpp(1)%vel(2)": 0.0e00, - "patch_icpp(1)%vel(3)": 0.0e00, - "patch_icpp(1)%pres": 10918.2549, - "patch_icpp(1)%alpha_rho(1)": 0.2199, - "patch_icpp(1)%alpha(1)": 1.0e00, - # Patch: Sphere Immersed Boundary - "patch_ib(1)%geometry": 8, - "patch_ib(1)%x_centroid": -3.0e-3, - "patch_ib(1)%y_centroid": 0.0, - "patch_ib(1)%z_centroid": 0.0, - "patch_ib(1)%radius": D / 2, - "patch_ib(1)%slip": "T", - # Fluids Physical Parameters - "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) - "fluid_pp(1)%pi_inf": 0, - "fluid_pp(1)%Re(1)": 7535533.2, - } - ) -) From a0d20155c41466aa8b10869538b95672abb0679e Mon Sep 17 00:00:00 2001 From: conradd3 Date: Mon, 22 Sep 2025 13:29:25 -0500 Subject: [PATCH 19/30] batched mpiAlltoAll for tensors --- src/simulation/m_volume_filtering.fpp | 352 +++++++++++++++++++++++--- 1 file changed, 317 insertions(+), 35 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 24fc6f6445..d3981dd55a 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -29,9 +29,9 @@ module m_volume_filtering private; public :: s_initialize_fftw_explicit_filter_module, & s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & s_initialize_filtered_fluid_indicator_function, s_initialize_fluid_indicator_gradient, & - s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, & + s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, s_filter_tensor_field, & s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & - s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & + s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_transpose_slabZ2Y_tensor, s_mpi_transpose_slabY2Z_tensor, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity #if !defined(MFC_OpenACC) @@ -94,6 +94,8 @@ module m_volume_filtering ! 3D arrays for slab transposes complex(c_double_complex), allocatable :: data_cmplx_slabz(:, :, :), data_cmplx_slaby(:, :, :) + ! 3D arrays for slab transposes of tensor quantities + complex(c_double_complex), allocatable :: data_cmplx_slabz_tensor(:, :, :, :), data_cmplx_slaby_tensor(:, :, :, :) ! input/output array for FFT routine real(c_double), allocatable :: data_real_3D_slabz(:, :, :) @@ -105,7 +107,12 @@ module m_volume_filtering complex(c_double_complex), allocatable :: cmplx_kernelG1d(:) !$acc declare create(Nx, Ny, Nz, NxC, Nyloc, Nzloc) - !$acc declare create(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy, data_cmplx_slabz, data_cmplx_slaby, data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) + !$acc declare create(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) + !$acc declare create(data_cmplx_slabz, data_cmplx_slaby, data_cmplx_slabz_tensor, data_cmplx_slaby_tensor, data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) + + ! buffers for data transpose + complex(c_double_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) + complex(c_double_complex), allocatable :: sendbuf_tensor(:), recvbuf_tensor(:) contains @@ -232,6 +239,13 @@ contains @:ALLOCATE(data_real_3D_slabz(Nx, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slabz(NxC, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slaby(NxC, Nyloc, Nz)) + @:ALLOCATE(data_cmplx_slabz_tensor(9, NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby_tensor(9, NxC, Nyloc, Nz)) + + allocate(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) + allocate(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) + allocate(sendbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) + allocate(recvbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) #if defined(MFC_OpenACC) !< GPU FFT plans @@ -564,17 +578,19 @@ contains call s_setup_terms_filtering(q_cons_vf, q_prim_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) ! pseudo turbulent reynolds stress - do i = 1, num_dims - do j = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(i)%vf(j)) - end do - end do + ! do i = 1, num_dims + ! do j = 1, num_dims + ! call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(i)%vf(j)) + ! end do + ! end do + call s_filter_tensor_field(reynolds_stress) ! effective viscosity - do i = 1, num_dims - do j = 1, num_dims - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., visc_stress(i)%vf(j), eff_visc(i)%vf(j)) - end do - end do + ! do i = 1, num_dims + ! do j = 1, num_dims + ! call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., visc_stress(i)%vf(j), eff_visc(i)%vf(j)) + ! end do + ! end do + call s_filter_tensor_field(visc_stress, eff_visc) ! interphase momentum exchange call s_compute_interphase_momentum_exchange(filtered_fluid_indicator_function, grad_fluid_indicator, pres_visc_stress, int_mom_exch) @@ -1006,78 +1022,340 @@ contains !< transpose domain from z-slabs to y-slabs on each processor subroutine s_mpi_transpose_slabZ2Y - complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) integer :: dest_rank, src_rank integer :: i, j, k - allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) - allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) - - !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) + !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf_sf) do dest_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slabz(i, j+dest_rank*Nyloc, k) + sendbuf_sf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slabz(i, j+dest_rank*Nyloc, k) end do end do end do end do - call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) + !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf_sf) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - data_cmplx_slaby(i, j, k+src_rank*Nzloc) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + data_cmplx_slaby(i, j, k+src_rank*Nzloc) = recvbuf_sf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) end do end do end do end do - deallocate(sendbuf, recvbuf) end subroutine s_mpi_transpose_slabZ2Y !< transpose domain from y-slabs to z-slabs on each processor subroutine s_mpi_transpose_slabY2Z - complex(c_double_complex), allocatable :: sendbuf(:), recvbuf(:) integer :: dest_rank, src_rank integer :: i, j, k - allocate(sendbuf(NxC*Nyloc*Nzloc*num_procs)) - allocate(recvbuf(NxC*Nyloc*Nzloc*num_procs)) - - !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf) + !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf_sf) do dest_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - sendbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slaby(i, j, k+dest_rank*Nzloc) + sendbuf_sf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slaby(i, j, k+dest_rank*Nzloc) end do end do end do end do - call MPI_Alltoall(sendbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf) + !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf_sf) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - data_cmplx_slabz(i, j+src_rank*Nyloc, k) = recvbuf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + data_cmplx_slabz(i, j+src_rank*Nyloc, k) = recvbuf_sf(i + (j-1)*NxC + (k-1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) end do end do end do end do - deallocate(sendbuf, recvbuf) end subroutine s_mpi_transpose_slabY2Z + !< transpose domain from z-slabs to y-slabs on each processor for batched 9 element tensors + subroutine s_mpi_transpose_slabZ2Y_tensor + integer :: dest_rank, src_rank + integer :: i, j, k, l + + !$acc parallel loop collapse(5) gang vector default(present) copy(sendbuf_tensor) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 9 + sendbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + dest_rank*9*NxC*Nyloc*Nzloc) = data_cmplx_slabz_tensor(l, i, j+dest_rank*Nyloc, k) + end do + end do + end do + end do + end do + + call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc parallel loop collapse(5) gang vector default(present) copy(recvbuf_tensor) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 9 + data_cmplx_slaby_tensor(l, i, j, k+src_rank*Nzloc) = recvbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + src_rank*9*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabZ2Y_tensor + + !< transpose domain from y-slabs to z-slabs on each processor for batched 9 element tensors + subroutine s_mpi_transpose_slabY2Z_tensor + integer :: dest_rank, src_rank + integer :: i, j, k, l + + !$acc parallel loop collapse(5) gang vector default(present) copy(sendbuf_tensor) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 9 + sendbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + dest_rank*9*NxC*Nyloc*Nzloc) = data_cmplx_slaby_tensor(l, i, j, k+dest_rank*Nzloc) + end do + end do + end do + end do + end do + + call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc parallel loop collapse(5) gang vector default(present) copy(recvbuf_tensor) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 9 + data_cmplx_slabz_tensor(l, i, j+src_rank*Nyloc, k) = recvbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + src_rank*9*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabY2Z_tensor + + + + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_filter_tensor_field(q_tensor_in, q_tensor_out) + type(vector_field), dimension(3), intent(inout) :: q_tensor_in + type(vector_field), dimension(3), intent(inout), optional :: q_tensor_out + integer :: i, j, k, l, q + + ! ===== forward FFT ===== + ! outer tensor element loop + do l = 1, 3 + do q = 1, 3 + + !$acc parallel loop collapse(3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = q_tensor_in(l)%vf(q)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) + end do + end do + end do + + ! 3D z-slab -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_tensor((l-1)*3 + q, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + ! pack data_cmplx_slabz_tensor for MPI tranpose + end do + end do + + ! tensor MPI data transpose + call s_mpi_transpose_slabZ2Y_tensor + + ! outer tensor element loop + do l = 1, 3 + do q = 1, 3 + ! 3D y-slab -> 1D z, x, y + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_tensor((l-1)*3 + q, i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! convolution with filtering kernel in Fourier space + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + ! ===== begin backward FFT ===== + ! Z inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! 1D z, x, y -> 3D y-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_tensor((l-1)*3 + q, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + ! pack data_cmplx_slaby_tensor for MPI tranpose + end do + end do + + call s_mpi_transpose_slabY2Z_tensor + + ! outer tensor element loop + do l = 1, 3 + do q = 1, 3 + + ! 3D z-slab -> 1D y, x, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_tensor((l-1)*3 + q, i, j, k) + end do + end do + end do + + ! Y inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! X inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + + ! 1D x, y, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + end do + end do + end do + + if (present(q_tensor_out)) then + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + q_tensor_out(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + else + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + q_tensor_in(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end if + + end do + end do + + end subroutine s_filter_tensor_field + + + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d subroutine s_mpi_FFT_fwd integer :: i, j, k @@ -1288,6 +1566,10 @@ contains @:DEALLOCATE(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) + @:DEALLOCATE(data_cmplx_slabz_tensor, data_cmplx_slaby_tensor) + + deallocate(sendbuf_sf, recvbuf_sf) + deallocate(sendbuf_tensor, recvbuf_tensor) #if defined(MFC_OpenACC) ierr = cufftDestroy(plan_x_fwd_gpu) From 4dfe3ccf5a15aad226bf73a4e5d16e3f5638121f Mon Sep 17 00:00:00 2001 From: conradd3 Date: Thu, 25 Sep 2025 12:20:42 -0500 Subject: [PATCH 20/30] gpu data allocation --- src/simulation/m_volume_filtering.fpp | 46 +++++++++++++++++++-------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index d3981dd55a..a2c0c0efac 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -114,6 +114,8 @@ module m_volume_filtering complex(c_double_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) complex(c_double_complex), allocatable :: sendbuf_tensor(:), recvbuf_tensor(:) + !$acc declare create(sendbuf_sf, recvbuf_sf, sendbuf_tensor, recvbuf_tensor) + contains !< create fft plans to be used for explicit filtering of data @@ -242,10 +244,10 @@ contains @:ALLOCATE(data_cmplx_slabz_tensor(9, NxC, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slaby_tensor(9, NxC, Nyloc, Nz)) - allocate(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) - allocate(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) - allocate(sendbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) - allocate(recvbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(sendbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(recvbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) #if defined(MFC_OpenACC) !< GPU FFT plans @@ -1025,7 +1027,7 @@ contains integer :: dest_rank, src_rank integer :: i, j, k - !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf_sf) + !$acc parallel loop collapse(4) gang vector default(present) do dest_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1036,10 +1038,14 @@ contains end do end do + !$acc update host(sendbuf_sf) + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & recvbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf_sf) + !$acc update device(recvbuf_sf) + + !$acc parallel loop collapse(4) gang vector default(present) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1057,7 +1063,7 @@ contains integer :: dest_rank, src_rank integer :: i, j, k - !$acc parallel loop collapse(4) gang vector default(present) copy(sendbuf_sf) + !$acc parallel loop collapse(4) gang vector default(present) do dest_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1068,10 +1074,14 @@ contains end do end do + !$acc update host(sendbuf_sf) + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & recvbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc parallel loop collapse(4) gang vector default(present) copy(recvbuf_sf) + !$acc update device(recvbuf_sf) + + !$acc parallel loop collapse(4) gang vector default(present) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1089,7 +1099,7 @@ contains integer :: dest_rank, src_rank integer :: i, j, k, l - !$acc parallel loop collapse(5) gang vector default(present) copy(sendbuf_tensor) + !$acc parallel loop collapse(5) gang vector default(present) do dest_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1102,10 +1112,14 @@ contains end do end do + !$acc update host(sendbuf_tensor) + call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc parallel loop collapse(5) gang vector default(present) copy(recvbuf_tensor) + !$acc update device(recvbuf_tensor) + + !$acc parallel loop collapse(5) gang vector default(present) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1125,7 +1139,7 @@ contains integer :: dest_rank, src_rank integer :: i, j, k, l - !$acc parallel loop collapse(5) gang vector default(present) copy(sendbuf_tensor) + !$acc parallel loop collapse(5) gang vector default(present) do dest_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1138,10 +1152,14 @@ contains end do end do + !$acc update host(sendbuf_tensor) + call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc parallel loop collapse(5) gang vector default(present) copy(recvbuf_tensor) + !$acc update device(recvbuf_tensor) + + !$acc parallel loop collapse(5) gang vector default(present) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc @@ -1568,8 +1586,8 @@ contains @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) @:DEALLOCATE(data_cmplx_slabz_tensor, data_cmplx_slaby_tensor) - deallocate(sendbuf_sf, recvbuf_sf) - deallocate(sendbuf_tensor, recvbuf_tensor) + @:DEALLOCATE(sendbuf_sf, recvbuf_sf) + @:DEALLOCATE(sendbuf_tensor, recvbuf_tensor) #if defined(MFC_OpenACC) ierr = cufftDestroy(plan_x_fwd_gpu) From 3e6aed61f128ebffce0c1de82a32bdf0d78bd3c3 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Thu, 25 Sep 2025 15:20:48 -0500 Subject: [PATCH 21/30] batched mpiAlltoall for cons vars and tensors --- src/simulation/m_volume_filtering.fpp | 281 +++++++++++++++++++++++++- 1 file changed, 274 insertions(+), 7 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index a2c0c0efac..5d487c9122 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -29,9 +29,10 @@ module m_volume_filtering private; public :: s_initialize_fftw_explicit_filter_module, & s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & s_initialize_filtered_fluid_indicator_function, s_initialize_fluid_indicator_gradient, & - s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, s_filter_tensor_field, & + s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, s_filter_tensor_field, s_filter_cons_vars, & s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & - s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_transpose_slabZ2Y_tensor, s_mpi_transpose_slabY2Z_tensor, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & + s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_transpose_slabZ2Y_tensor, s_mpi_transpose_slabY2Z_tensor, & + s_mpi_transpose_slabZ2Y_cons, s_mpi_transpose_slabY2Z_cons, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity #if !defined(MFC_OpenACC) @@ -96,6 +97,8 @@ module m_volume_filtering complex(c_double_complex), allocatable :: data_cmplx_slabz(:, :, :), data_cmplx_slaby(:, :, :) ! 3D arrays for slab transposes of tensor quantities complex(c_double_complex), allocatable :: data_cmplx_slabz_tensor(:, :, :, :), data_cmplx_slaby_tensor(:, :, :, :) + ! 3D arrays for slab transpose of conserved variables + complex(c_double_complex), allocatable :: data_cmplx_slabz_cons(:, :, :, :), data_cmplx_slaby_cons(:, :, :, :) ! input/output array for FFT routine real(c_double), allocatable :: data_real_3D_slabz(:, :, :) @@ -113,8 +116,9 @@ module m_volume_filtering ! buffers for data transpose complex(c_double_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) complex(c_double_complex), allocatable :: sendbuf_tensor(:), recvbuf_tensor(:) + complex(c_double_complex), allocatable :: sendbuf_cons(:), recvbuf_cons(:) - !$acc declare create(sendbuf_sf, recvbuf_sf, sendbuf_tensor, recvbuf_tensor) + !$acc declare create(sendbuf_sf, recvbuf_sf, sendbuf_tensor, recvbuf_tensor, sendbuf_cons, recvbuf_cons) contains @@ -243,11 +247,15 @@ contains @:ALLOCATE(data_cmplx_slaby(NxC, Nyloc, Nz)) @:ALLOCATE(data_cmplx_slabz_tensor(9, NxC, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slaby_tensor(9, NxC, Nyloc, Nz)) + @:ALLOCATE(data_cmplx_slabz_cons(4, NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby_cons(4, NxC, Nyloc, Nz)) @:ALLOCATE(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) @:ALLOCATE(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) @:ALLOCATE(sendbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) @:ALLOCATE(recvbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(sendbuf_cons(5*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(recvbuf_cons(5*NxC*Nyloc*Nzloc*num_procs)) #if defined(MFC_OpenACC) !< GPU FFT plans @@ -570,9 +578,10 @@ contains integer :: i, j, k call nvtxStartRange("FILTER-CONS-VARS") - do i = 1, sys_size-1 - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) - end do + call s_filter_cons_vars(q_cons_vf, q_cons_filtered) + ! do i = 1, sys_size-1 + ! call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) + ! end do call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_prim_vf(E_idx), filtered_pressure) call nvtxEndRange @@ -1174,7 +1183,85 @@ contains end subroutine s_mpi_transpose_slabY2Z_tensor + !< transpose domain from z-slabs to y-slabs on each processor for batched 5 element conserved variables + subroutine s_mpi_transpose_slabZ2Y_cons + integer :: dest_rank, src_rank + integer :: i, j, k, l + + !$acc parallel loop collapse(5) gang vector default(present) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 5 + sendbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + dest_rank*5*NxC*Nyloc*Nzloc) = data_cmplx_slabz_cons(l, i, j+dest_rank*Nyloc, k) + end do + end do + end do + end do + end do + + !$acc update host(sendbuf_cons) + + call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + + !$acc update device(recvbuf_cons) + + !$acc parallel loop collapse(5) gang vector default(present) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 5 + data_cmplx_slaby_cons(l, i, j, k+src_rank*Nzloc) = recvbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + src_rank*5*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabZ2Y_cons + + !< transpose domain from y-slabs to z-slabs on each processor for batched 4 element conserved variables + subroutine s_mpi_transpose_slabY2Z_cons + integer :: dest_rank, src_rank + integer :: i, j, k, l + + !$acc parallel loop collapse(5) gang vector default(present) + do dest_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 5 + sendbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + dest_rank*5*NxC*Nyloc*Nzloc) = data_cmplx_slaby_cons(l, i, j, k+dest_rank*Nzloc) + end do + end do + end do + end do + end do + + !$acc update host(sendbuf_cons) + + call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & + recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + !$acc update device(recvbuf_cons) + + !$acc parallel loop collapse(5) gang vector default(present) + do src_rank = 0, num_procs-1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 5 + data_cmplx_slabz_cons(l, i, j+src_rank*Nyloc, k) = recvbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + src_rank*5*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabY2Z_cons !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d subroutine s_filter_tensor_field(q_tensor_in, q_tensor_out) @@ -1372,7 +1459,185 @@ contains end subroutine s_filter_tensor_field + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_filter_cons_vars(q_cons_vf, q_cons_filtered) + type(scalar_field), dimension(5), intent(inout) :: q_cons_vf + type(scalar_field), dimension(5), intent(inout) :: q_cons_filtered + integer :: i, j, k, l + + ! ===== forward FFT ===== + ! outer element loop + do l = 1, 5 + + !$acc parallel loop collapse(3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = q_cons_vf(l)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) + end do + end do + end do + + ! 3D z-slab -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_cons(l, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + ! pack data_cmplx_slabz_cons for MPI tranpose + end do + + ! cons vars MPI data transpose + call s_mpi_transpose_slabZ2Y_cons + + ! outer element loop + do l = 1, 5 + + ! 3D y-slab -> 1D z, x, y + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_cons(l, i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! convolution with filtering kernel in Fourier space + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + + ! ===== begin backward FFT ===== + ! Z inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! 1D z, x, y -> 3D y-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_cons(l, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + ! pack data_cmplx_slaby_cons for MPI tranpose + end do + + call s_mpi_transpose_slabY2Z_cons + + ! outer element loop + do l = 1, 5 + + ! 3D z-slab -> 1D y, x, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_cons(l, i, j, k) + end do + end do + end do + + ! Y inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 1D x, y, z + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do + + ! X inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + + ! 1D x, y, z -> 3D z-slab + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + end do + end do + end do + + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + q_cons_filtered(l)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + + end do + end subroutine s_filter_cons_vars !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d subroutine s_mpi_FFT_fwd @@ -1585,9 +1850,11 @@ contains @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) @:DEALLOCATE(data_cmplx_slabz_tensor, data_cmplx_slaby_tensor) + @:DEALLOCATE(data_cmplx_slabz_cons, data_cmplx_slaby_cons) @:DEALLOCATE(sendbuf_sf, recvbuf_sf) @:DEALLOCATE(sendbuf_tensor, recvbuf_tensor) + @:DEALLOCATE(sendbuf_cons, recvbuf_cons) #if defined(MFC_OpenACC) ierr = cufftDestroy(plan_x_fwd_gpu) @@ -1612,4 +1879,4 @@ contains end subroutine s_finalize_fftw_explicit_filter_module -end module m_volume_filtering \ No newline at end of file +end module m_volume_filtering From 3e81245ed27d41dcc27e5fb4c41b8b407ee18018 Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Sat, 27 Sep 2025 23:30:49 -0500 Subject: [PATCH 22/30] starting runs --- src/simulation/m_start_up.fpp | 2 +- src/simulation/m_volume_filtering.fpp | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index c593da603d..d83d73e9d3 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -1322,7 +1322,7 @@ contains ! Volume filter flow variables, compute unclosed terms and their statistics if (volume_filtering_momentum_eqn) then if (t_step > t_step_stat_start) then - call nvtxStartRange("VOLUME-FILTERED-MOMENTUM-EQUATION") + call nvtxStartRange("VOLUME-FILTER-MOMENTUM-EQUATION") call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf, q_prim_vf) call nvtxEndRange diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 5d487c9122..66844526ba 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -111,7 +111,8 @@ module m_volume_filtering !$acc declare create(Nx, Ny, Nz, NxC, Nyloc, Nzloc) !$acc declare create(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) - !$acc declare create(data_cmplx_slabz, data_cmplx_slaby, data_cmplx_slabz_tensor, data_cmplx_slaby_tensor, data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) + !$acc declare create(data_cmplx_slabz, data_cmplx_slaby, data_cmplx_slabz_tensor, data_cmplx_slaby_tensor, data_cmplx_slabz_cons, data_cmplx_slaby_cons) + !$acc declare create(data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) ! buffers for data transpose complex(c_double_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) @@ -247,8 +248,8 @@ contains @:ALLOCATE(data_cmplx_slaby(NxC, Nyloc, Nz)) @:ALLOCATE(data_cmplx_slabz_tensor(9, NxC, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slaby_tensor(9, NxC, Nyloc, Nz)) - @:ALLOCATE(data_cmplx_slabz_cons(4, NxC, Ny, Nzloc)) - @:ALLOCATE(data_cmplx_slaby_cons(4, NxC, Nyloc, Nz)) + @:ALLOCATE(data_cmplx_slabz_cons(5, NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby_cons(5, NxC, Nyloc, Nz)) @:ALLOCATE(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) @:ALLOCATE(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) @@ -578,10 +579,10 @@ contains integer :: i, j, k call nvtxStartRange("FILTER-CONS-VARS") - call s_filter_cons_vars(q_cons_vf, q_cons_filtered) - ! do i = 1, sys_size-1 - ! call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) - ! end do + !call s_filter_cons_vars(q_cons_vf, q_cons_filtered) + do i = 1, sys_size-1 + call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) + end do call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_prim_vf(E_idx), filtered_pressure) call nvtxEndRange @@ -1223,7 +1224,7 @@ contains end subroutine s_mpi_transpose_slabZ2Y_cons - !< transpose domain from y-slabs to z-slabs on each processor for batched 4 element conserved variables + !< transpose domain from y-slabs to z-slabs on each processor for batched 5 element conserved variables subroutine s_mpi_transpose_slabY2Z_cons integer :: dest_rank, src_rank integer :: i, j, k, l From 303ffab5ca64a8609eb8fc6b62790fff2ec2c6a2 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Wed, 1 Oct 2025 14:32:33 -0500 Subject: [PATCH 23/30] single precision alltoall --- src/simulation/m_volume_filtering.fpp | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 66844526ba..9c1661844e 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -115,9 +115,9 @@ module m_volume_filtering !$acc declare create(data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) ! buffers for data transpose - complex(c_double_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) - complex(c_double_complex), allocatable :: sendbuf_tensor(:), recvbuf_tensor(:) - complex(c_double_complex), allocatable :: sendbuf_cons(:), recvbuf_cons(:) + complex(c_float_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) + complex(c_float_complex), allocatable :: sendbuf_tensor(:), recvbuf_tensor(:) + complex(c_float_complex), allocatable :: sendbuf_cons(:), recvbuf_cons(:) !$acc declare create(sendbuf_sf, recvbuf_sf, sendbuf_tensor, recvbuf_tensor, sendbuf_cons, recvbuf_cons) @@ -1050,8 +1050,8 @@ contains !$acc update host(sendbuf_sf) - call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) !$acc update device(recvbuf_sf) @@ -1086,8 +1086,8 @@ contains !$acc update host(sendbuf_sf) - call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf_sf, NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) !$acc update device(recvbuf_sf) @@ -1124,8 +1124,8 @@ contains !$acc update host(sendbuf_tensor) - call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) !$acc update device(recvbuf_tensor) @@ -1164,8 +1164,8 @@ contains !$acc update host(sendbuf_tensor) - call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) !$acc update device(recvbuf_tensor) @@ -1204,8 +1204,8 @@ contains !$acc update host(sendbuf_cons) - call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) !$acc update device(recvbuf_cons) @@ -1244,8 +1244,8 @@ contains !$acc update host(sendbuf_cons) - call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, & - recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_DOUBLE_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) !$acc update device(recvbuf_cons) From 8ac983847c0297a84493d33ca63cb430c448b28f Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Wed, 8 Oct 2025 18:00:10 -0500 Subject: [PATCH 24/30] 24 batch alltoall --- src/simulation/m_volume_filtering.fpp | 756 ++++++++++++++------------ 1 file changed, 409 insertions(+), 347 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 9c1661844e..3cf904d84d 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -29,10 +29,10 @@ module m_volume_filtering private; public :: s_initialize_fftw_explicit_filter_module, & s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & s_initialize_filtered_fluid_indicator_function, s_initialize_fluid_indicator_gradient, & - s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, s_filter_tensor_field, s_filter_cons_vars, & + s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, s_filter_batch, & s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & - s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_transpose_slabZ2Y_tensor, s_mpi_transpose_slabY2Z_tensor, & - s_mpi_transpose_slabZ2Y_cons, s_mpi_transpose_slabY2Z_cons, s_mpi_FFT_fwd, s_mpi_FFT_bwd, & + s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_transpose_slabZ2Y_batch, s_mpi_transpose_slabY2Z_batch, & + s_mpi_FFT_fwd, s_mpi_FFT_bwd, & s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity #if !defined(MFC_OpenACC) @@ -94,11 +94,9 @@ module m_volume_filtering complex(c_double_complex), allocatable :: data_cmplx_out1dy(:) ! 3D arrays for slab transposes - complex(c_double_complex), allocatable :: data_cmplx_slabz(:, :, :), data_cmplx_slaby(:, :, :) + complex(c_float_complex), allocatable :: data_cmplx_slabz(:, :, :), data_cmplx_slaby(:, :, :) ! 3D arrays for slab transposes of tensor quantities - complex(c_double_complex), allocatable :: data_cmplx_slabz_tensor(:, :, :, :), data_cmplx_slaby_tensor(:, :, :, :) - ! 3D arrays for slab transpose of conserved variables - complex(c_double_complex), allocatable :: data_cmplx_slabz_cons(:, :, :, :), data_cmplx_slaby_cons(:, :, :, :) + complex(c_float_complex), allocatable :: data_cmplx_slabz_batch(:, :, :, :), data_cmplx_slaby_batch(:, :, :, :) ! input/output array for FFT routine real(c_double), allocatable :: data_real_3D_slabz(:, :, :) @@ -111,15 +109,14 @@ module m_volume_filtering !$acc declare create(Nx, Ny, Nz, NxC, Nyloc, Nzloc) !$acc declare create(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) - !$acc declare create(data_cmplx_slabz, data_cmplx_slaby, data_cmplx_slabz_tensor, data_cmplx_slaby_tensor, data_cmplx_slabz_cons, data_cmplx_slaby_cons) + !$acc declare create(data_cmplx_slabz, data_cmplx_slaby, data_cmplx_slabz_batch, data_cmplx_slaby_batch) !$acc declare create(data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d) ! buffers for data transpose complex(c_float_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) - complex(c_float_complex), allocatable :: sendbuf_tensor(:), recvbuf_tensor(:) - complex(c_float_complex), allocatable :: sendbuf_cons(:), recvbuf_cons(:) + complex(c_float_complex), allocatable :: sendbuf_batch(:), recvbuf_batch(:) - !$acc declare create(sendbuf_sf, recvbuf_sf, sendbuf_tensor, recvbuf_tensor, sendbuf_cons, recvbuf_cons) + !$acc declare create(sendbuf_sf, recvbuf_sf, sendbuf_batch, recvbuf_batch) contains @@ -246,17 +243,13 @@ contains @:ALLOCATE(data_real_3D_slabz(Nx, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slabz(NxC, Ny, Nzloc)) @:ALLOCATE(data_cmplx_slaby(NxC, Nyloc, Nz)) - @:ALLOCATE(data_cmplx_slabz_tensor(9, NxC, Ny, Nzloc)) - @:ALLOCATE(data_cmplx_slaby_tensor(9, NxC, Nyloc, Nz)) - @:ALLOCATE(data_cmplx_slabz_cons(5, NxC, Ny, Nzloc)) - @:ALLOCATE(data_cmplx_slaby_cons(5, NxC, Nyloc, Nz)) + @:ALLOCATE(data_cmplx_slabz_batch(24, NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby_batch(24, NxC, Nyloc, Nz)) @:ALLOCATE(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) @:ALLOCATE(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) - @:ALLOCATE(sendbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) - @:ALLOCATE(recvbuf_tensor(9*NxC*Nyloc*Nzloc*num_procs)) - @:ALLOCATE(sendbuf_cons(5*NxC*Nyloc*Nzloc*num_procs)) - @:ALLOCATE(recvbuf_cons(5*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(sendbuf_batch(24*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(recvbuf_batch(24*NxC*Nyloc*Nzloc*num_procs)) #if defined(MFC_OpenACC) !< GPU FFT plans @@ -578,37 +571,15 @@ contains type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf integer :: i, j, k - call nvtxStartRange("FILTER-CONS-VARS") - !call s_filter_cons_vars(q_cons_vf, q_cons_filtered) - do i = 1, sys_size-1 - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_cons_vf(i), q_cons_filtered(i)) - end do - call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., q_prim_vf(E_idx), filtered_pressure) - call nvtxEndRange - - call nvtxStartRange("COMPUTE-UNCLOSED-TERMS") call s_setup_terms_filtering(q_cons_vf, q_prim_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress) - ! pseudo turbulent reynolds stress - ! do i = 1, num_dims - ! do j = 1, num_dims - ! call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., reynolds_stress(i)%vf(j)) - ! end do - ! end do - call s_filter_tensor_field(reynolds_stress) - ! effective viscosity - ! do i = 1, num_dims - ! do j = 1, num_dims - ! call s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, .true., visc_stress(i)%vf(j), eff_visc(i)%vf(j)) - ! end do - ! end do - call s_filter_tensor_field(visc_stress, eff_visc) + call s_filter_batch(q_cons_vf, q_cons_filtered, q_prim_vf(E_idx), filtered_pressure, reynolds_stress, visc_stress, eff_visc) + ! interphase momentum exchange call s_compute_interphase_momentum_exchange(filtered_fluid_indicator_function, grad_fluid_indicator, pres_visc_stress, int_mom_exch) call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress) - call nvtxEndRange end subroutine s_volume_filter_momentum_eqn @@ -1104,8 +1075,8 @@ contains end subroutine s_mpi_transpose_slabY2Z - !< transpose domain from z-slabs to y-slabs on each processor for batched 9 element tensors - subroutine s_mpi_transpose_slabZ2Y_tensor + !< transpose domain from z-slabs to y-slabs on each processor for batched 24 element tensors + subroutine s_mpi_transpose_slabZ2Y_batch integer :: dest_rank, src_rank integer :: i, j, k, l @@ -1114,38 +1085,38 @@ contains do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - do l = 1, 9 - sendbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + dest_rank*9*NxC*Nyloc*Nzloc) = data_cmplx_slabz_tensor(l, i, j+dest_rank*Nyloc, k) + do l = 1, 24 + sendbuf_batch(l + (i-1)*24 + (j-1)*24*NxC + (k-1)*24*NxC*Nyloc + dest_rank*24*NxC*Nyloc*Nzloc) = data_cmplx_slabz_batch(l, i, j+dest_rank*Nyloc, k) end do end do end do end do end do - !$acc update host(sendbuf_tensor) + !$acc update host(sendbuf_batch) - call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, & - recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc update device(recvbuf_tensor) + !$acc update device(recvbuf_batch) !$acc parallel loop collapse(5) gang vector default(present) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - do l = 1, 9 - data_cmplx_slaby_tensor(l, i, j, k+src_rank*Nzloc) = recvbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + src_rank*9*NxC*Nyloc*Nzloc) + do l = 1, 24 + data_cmplx_slaby_batch(l, i, j, k+src_rank*Nzloc) = recvbuf_batch(l + (i-1)*24 + (j-1)*24*NxC + (k-1)*24*NxC*Nyloc + src_rank*24*NxC*Nyloc*Nzloc) end do end do end do end do end do - end subroutine s_mpi_transpose_slabZ2Y_tensor + end subroutine s_mpi_transpose_slabZ2Y_batch - !< transpose domain from y-slabs to z-slabs on each processor for batched 9 element tensors - subroutine s_mpi_transpose_slabY2Z_tensor + !< transpose domain from y-slabs to z-slabs on each processor for batched 24 element tensors + subroutine s_mpi_transpose_slabY2Z_batch integer :: dest_rank, src_rank integer :: i, j, k, l @@ -1154,137 +1125,197 @@ contains do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - do l = 1, 9 - sendbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + dest_rank*9*NxC*Nyloc*Nzloc) = data_cmplx_slaby_tensor(l, i, j, k+dest_rank*Nzloc) + do l = 1, 24 + sendbuf_batch(l + (i-1)*24 + (j-1)*24*NxC + (k-1)*24*NxC*Nyloc + dest_rank*24*NxC*Nyloc*Nzloc) = data_cmplx_slaby_batch(l, i, j, k+dest_rank*Nzloc) end do end do end do end do end do - !$acc update host(sendbuf_tensor) + !$acc update host(sendbuf_batch) - call MPI_Alltoall(sendbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, & - recvbuf_tensor, 9*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) + call MPI_Alltoall(sendbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc update device(recvbuf_tensor) + !$acc update device(recvbuf_batch) !$acc parallel loop collapse(5) gang vector default(present) do src_rank = 0, num_procs-1 do k = 1, Nzloc do j = 1, Nyloc do i = 1, NxC - do l = 1, 9 - data_cmplx_slabz_tensor(l, i, j+src_rank*Nyloc, k) = recvbuf_tensor(l + (i-1)*9 + (j-1)*9*NxC + (k-1)*9*NxC*Nyloc + src_rank*9*NxC*Nyloc*Nzloc) + do l = 1, 24 + data_cmplx_slabz_batch(l, i, j+src_rank*Nyloc, k) = recvbuf_batch(l + (i-1)*24 + (j-1)*24*NxC + (k-1)*24*NxC*Nyloc + src_rank*24*NxC*Nyloc*Nzloc) end do end do end do end do end do - end subroutine s_mpi_transpose_slabY2Z_tensor - - !< transpose domain from z-slabs to y-slabs on each processor for batched 5 element conserved variables - subroutine s_mpi_transpose_slabZ2Y_cons - integer :: dest_rank, src_rank - integer :: i, j, k, l - - !$acc parallel loop collapse(5) gang vector default(present) - do dest_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - do l = 1, 5 - sendbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + dest_rank*5*NxC*Nyloc*Nzloc) = data_cmplx_slabz_cons(l, i, j+dest_rank*Nyloc, k) - end do - end do - end do - end do - end do - - !$acc update host(sendbuf_cons) + end subroutine s_mpi_transpose_slabY2Z_batch - call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, & - recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) - !$acc update device(recvbuf_cons) + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_filter_batch(q_cons_vf, q_cons_filtered, pressure, filtered_pressure, reynolds_stress, visc_stress, eff_visc) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(5), intent(inout) :: q_cons_filtered + type(scalar_field), intent(inout) :: pressure + type(scalar_field), intent(inout) :: filtered_pressure + type(vector_field), dimension(3), intent(inout) :: reynolds_stress + type(vector_field), dimension(3), intent(inout) :: visc_stress + type(vector_field), dimension(3), intent(inout) :: eff_visc + integer :: i, j, k, l, q - !$acc parallel loop collapse(5) gang vector default(present) - do src_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - do l = 1, 5 - data_cmplx_slaby_cons(l, i, j, k+src_rank*Nzloc) = recvbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + src_rank*5*NxC*Nyloc*Nzloc) - end do - end do + ! cons vars + do l = 1, 5 + !$acc parallel loop collapse(3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = q_cons_vf(l)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) + end do end do end do - end do - - end subroutine s_mpi_transpose_slabZ2Y_cons - - !< transpose domain from y-slabs to z-slabs on each processor for batched 5 element conserved variables - subroutine s_mpi_transpose_slabY2Z_cons - integer :: dest_rank, src_rank - integer :: i, j, k, l - - !$acc parallel loop collapse(5) gang vector default(present) - do dest_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - do l = 1, 5 - sendbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + dest_rank*5*NxC*Nyloc*Nzloc) = data_cmplx_slaby_cons(l, i, j, k+dest_rank*Nzloc) - end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) end do end do end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(l, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do end do - !$acc update host(sendbuf_cons) - - call MPI_Alltoall(sendbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, & - recvbuf_cons, 5*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) - - !$acc update device(recvbuf_cons) + ! pressure + !$acc parallel loop collapse(3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = pressure%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) + end do + end do + end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(6, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do - !$acc parallel loop collapse(5) gang vector default(present) - do src_rank = 0, num_procs-1 - do k = 1, Nzloc - do j = 1, Nyloc - do i = 1, NxC - do l = 1, 5 - data_cmplx_slabz_cons(l, i, j+src_rank*Nyloc, k) = recvbuf_cons(l + (i-1)*5 + (j-1)*5*NxC + (k-1)*5*NxC*Nyloc + src_rank*5*NxC*Nyloc*Nzloc) + ! reynolds stress + do l = 1, 3 + do q = 1, 3 + !$acc parallel loop collapse(3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i+1, j+1, k+1) = reynolds_stress(l)%vf(q)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) end do - end do + end do + end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(6 + 3*(l-1) + q, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do end do end do end do - - end subroutine s_mpi_transpose_slabY2Z_cons - !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d - subroutine s_filter_tensor_field(q_tensor_in, q_tensor_out) - type(vector_field), dimension(3), intent(inout) :: q_tensor_in - type(vector_field), dimension(3), intent(inout), optional :: q_tensor_out - integer :: i, j, k, l, q - - ! ===== forward FFT ===== - ! outer tensor element loop + ! effective viscosity do l = 1, 3 do q = 1, 3 - !$acc parallel loop collapse(3) do i = 0, m do j = 0, n do k = 0, p - data_real_3D_slabz(i+1, j+1, k+1) = q_tensor_in(l)%vf(q)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) + data_real_3D_slabz(i+1, j+1, k+1) = visc_stress(l)%vf(q)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) end do end do end do - - ! 3D z-slab -> 1D x, y, z !$acc parallel loop collapse(3) gang vector default(present) do i = 1, Nx do j = 1, Ny @@ -1293,15 +1324,11 @@ contains end do end do end do - - ! X FFT #if defined(MFC_OpenACC) ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) #else call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) #endif - - ! 1D x, y, z -> 1D y, x, z (CMPLX) !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC do j = 1, Ny @@ -1310,51 +1337,116 @@ contains end do end do end do - - ! Y FFT #if defined(MFC_OpenACC) ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) #else call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) #endif - - ! 1D y, x, z -> 3D z-slab !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC do j = 1, Ny do k = 1, Nzloc - data_cmplx_slabz_tensor((l-1)*3 + q, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + data_cmplx_slabz_batch(15 + 3*(l-1) + q, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) end do end do end do - ! pack data_cmplx_slabz_tensor for MPI tranpose end do - end do + end do + + + call s_mpi_transpose_slabZ2Y_batch + + + ! cons vars + do l = 1, 5 + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_batch(l, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(l, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do + end do - ! tensor MPI data transpose - call s_mpi_transpose_slabZ2Y_tensor + ! pressure + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_batch(6, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(6, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + end do + end do + end do - ! outer tensor element loop + ! reynolds stress do l = 1, 3 do q = 1, 3 - ! 3D y-slab -> 1D z, x, y !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC do j = 1, Nyloc do k = 1, Nz - data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_tensor((l-1)*3 + q, i, j, k) + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_batch(6 + 3*(l-1) + q, i, j, k) end do end do end do - - ! Z FFT #if defined(MFC_OpenACC) ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) #else call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) #endif - - ! convolution with filtering kernel in Fourier space !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC do j = 1, Nyloc @@ -1363,282 +1455,254 @@ contains end do end do end do - - ! ===== begin backward FFT ===== - ! Z inv FFT #if defined(MFC_OpenACC) ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) #else call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) #endif - - ! 1D z, x, y -> 3D y-slab !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC do j = 1, Nyloc do k = 1, Nz - data_cmplx_slaby_tensor((l-1)*3 + q, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) + data_cmplx_slaby_batch(6 + 3*(l-1) + q, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) end do end do end do - ! pack data_cmplx_slaby_tensor for MPI tranpose end do end do - call s_mpi_transpose_slabY2Z_tensor - - ! outer tensor element loop + ! effective viscosity do l = 1, 3 do q = 1, 3 - - ! 3D z-slab -> 1D y, x, z !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_tensor((l-1)*3 + q, i, j, k) + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_batch(15 + 3*(l-1) + q, i, j, k) end do end do end do - - ! Y inv FFT #if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) #else - call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) #endif - - ! 1D y, x, z -> 1D x, y, z !$acc parallel loop collapse(3) gang vector default(present) do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) end do end do end do - - ! X inv FFT #if defined(MFC_OpenACC) - ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) #else - call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) #endif - - ! 1D x, y, z -> 3D z-slab !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, Nx - do j = 1, Ny - do k = 1, Nzloc - data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(15 + 3*(l-1) + q, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) end do end do end do - - if (present(q_tensor_out)) then - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - q_tensor_out(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) - end do - end do - end do - else - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - q_tensor_in(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) - end do - end do - end do - end if - end do end do - end subroutine s_filter_tensor_field - !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d - subroutine s_filter_cons_vars(q_cons_vf, q_cons_filtered) - type(scalar_field), dimension(5), intent(inout) :: q_cons_vf - type(scalar_field), dimension(5), intent(inout) :: q_cons_filtered - integer :: i, j, k, l + call s_mpi_transpose_slabY2Z_batch - ! ===== forward FFT ===== - ! outer element loop - do l = 1, 5 - !$acc parallel loop collapse(3) - do i = 0, m - do j = 0, n - do k = 0, p - data_real_3D_slabz(i+1, j+1, k+1) = q_cons_vf(l)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) - end do - end do - end do - - ! 3D z-slab -> 1D x, y, z + ! cons vars + do l = 1, 5 !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, Nx + do i = 1, NxC do j = 1, Ny do k = 1, Nzloc - data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_batch(l, i, j, k) end do end do end do - - ! X FFT #if defined(MFC_OpenACC) - ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) #else - call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) #endif - - ! 1D x, y, z -> 1D y, x, z (CMPLX) !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC + do i = 1, NxC do j = 1, Ny do k = 1, Nzloc - data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) end do end do end do - - ! Y FFT #if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) #else - call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) -#endif - - ! 1D y, x, z -> 3D z-slab + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC + do i = 1, Nx do j = 1, Ny do k = 1, Nzloc - data_cmplx_slabz_cons(l, i, j, k) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) end do end do - end do - ! pack data_cmplx_slabz_cons for MPI tranpose - end do - - ! cons vars MPI data transpose - call s_mpi_transpose_slabZ2Y_cons - - ! outer element loop - do l = 1, 5 - - ! 3D y-slab -> 1D z, x, y + end do !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Nyloc - do k = 1, Nz - data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_slaby_cons(l, i, j, k) + do i = 0, m + do j = 0, n + do k = 0, p + q_cons_filtered(l)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do + end do - ! Z FFT + ! pressure + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_batch(6, i, j, k) + end do + end do + end do #if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) #else - call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) #endif - - ! convolution with filtering kernel in Fourier space - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Nyloc - do k = 1, Nz - data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) * cmplx_kernelG1d(k + (i-1)*Nz + (j-1)*Nz*NxC) - end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) end do - end do - - ! ===== begin backward FFT ===== - ! Z inv FFT + end do + end do #if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) #else - call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) #endif - - ! 1D z, x, y -> 3D y-slab - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Nyloc - do k = 1, Nz - data_cmplx_slaby_cons(l, i, j, k) = data_cmplx_out1d(k + (i-1)*Nz + (j-1)*Nz*NxC) - end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) end do - end do - ! pack data_cmplx_slaby_cons for MPI tranpose + end do end do - - call s_mpi_transpose_slabY2Z_cons - - ! outer element loop - do l = 1, 5 - - ! 3D z-slab -> 1D y, x, z - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_cons(l, i, j, k) - end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + filtered_pressure%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) end do - end do + end do + end do - ! Y inv FFT + ! reynolds stress + do l = 1, 3 + do q = 1, 3 + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_batch(6 + 3*(l-1) + q, i, j, k) + end do + end do + end do #if defined(MFC_OpenACC) - ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) #else - call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) #endif - - ! 1D y, x, z -> 1D x, y, z - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, NxC - do j = 1, Ny - do k = 1, Nzloc - data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do end do - end do - end do - - ! X inv FFT + end do #if defined(MFC_OpenACC) - ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) #else - call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) #endif - - ! 1D x, y, z -> 3D z-slab - !$acc parallel loop collapse(3) gang vector default(present) - do i = 1, Nx - do j = 1, Ny - do k = 1, Nzloc - data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + end do end do - end do + end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + reynolds_stress(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do end do + end do - !$acc parallel loop collapse(3) gang vector default(present) - do i = 0, m - do j = 0, n - do k = 0, p - q_cons_filtered(l)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + ! effective viscosity + do l = 1, 3 + do q = 1, 3 + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) = data_cmplx_slabz_batch(15 + 3*(l-1) + q, i, j, k) + end do end do - end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j-1)*NxC + (k-1)*NxC*Ny) = data_cmplx_out1dy(j + (i-1)*Ny + (k-1)*Ny*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + !$acc parallel loop collapse(3) gang vector default(present) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j-1)*Nx + (k-1)*Nx*Ny) + end do + end do + end do + !$acc parallel loop collapse(3) gang vector default(present) + do i = 0, m + do j = 0, n + do k = 0, p + eff_visc(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i+1, j+1, k+1) / (real(Nx*Ny*Nz, dp) * filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do end do - end do - end subroutine s_filter_cons_vars + end subroutine s_filter_batch + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d subroutine s_mpi_FFT_fwd @@ -1850,12 +1914,10 @@ contains @:DEALLOCATE(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) - @:DEALLOCATE(data_cmplx_slabz_tensor, data_cmplx_slaby_tensor) - @:DEALLOCATE(data_cmplx_slabz_cons, data_cmplx_slaby_cons) + @:DEALLOCATE(data_cmplx_slabz_batch, data_cmplx_slaby_batch) @:DEALLOCATE(sendbuf_sf, recvbuf_sf) - @:DEALLOCATE(sendbuf_tensor, recvbuf_tensor) - @:DEALLOCATE(sendbuf_cons, recvbuf_cons) + @:DEALLOCATE(sendbuf_batch, recvbuf_batch) #if defined(MFC_OpenACC) ierr = cufftDestroy(plan_x_fwd_gpu) From f5731937f622169b40daa7e210c7616bcb5ff1ee Mon Sep 17 00:00:00 2001 From: Conrad Delgado Date: Wed, 8 Oct 2025 18:17:26 -0500 Subject: [PATCH 25/30] energy forcing --- src/simulation/m_additional_forcing.fpp | 26 ++++++++++++++----------- src/simulation/m_global_parameters.fpp | 6 +++--- src/simulation/m_mpi_proxy.fpp | 2 +- src/simulation/m_start_up.fpp | 4 ++-- toolchain/mfc/run/case_dicts.py | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp index b3b6807b55..a99dc186a6 100644 --- a/src/simulation/m_additional_forcing.fpp +++ b/src/simulation/m_additional_forcing.fpp @@ -20,11 +20,11 @@ module m_additional_forcing type(scalar_field), allocatable, dimension(:) :: q_periodic_force real(wp) :: volfrac_phi integer :: N_x_total_glb - real(wp) :: spatial_rho, spatial_u - real(wp) :: phase_rho, phase_u + real(wp) :: spatial_rho, spatial_u, spatial_E + real(wp) :: phase_rho, phase_u, phase_E !$acc declare create(q_periodic_force, volfrac_phi, N_x_total_glb) - !$acc declare create(spatial_rho, spatial_u, phase_rho, phase_u) + !$acc declare create(spatial_rho, spatial_u, spatial_E, phase_rho, phase_u, phase_E) contains @@ -65,35 +65,39 @@ contains subroutine s_compute_periodic_forcing(q_cons_vf, t_step) type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf integer, intent(in) :: t_step - real(wp) :: spatial_rho_glb, spatial_u_glb + real(wp) :: spatial_rho_glb, spatial_u_glb, spatial_E_glb integer :: i, j, k ! zero spatial averages spatial_rho = 0._wp spatial_u = 0._wp - !$acc update device(spatial_rho, spatial_u) + spatial_E = 0._wp + !$acc update device(spatial_rho, spatial_u, spatial_E) ! compute spatial averages - !$acc parallel loop collapse(3) gang vector default(present) reduction(+:spatial_rho, spatial_u) + !$acc parallel loop collapse(3) gang vector default(present) reduction(+:spatial_rho, spatial_u, spatial_E) do i = 0, m do j = 0, n do k = 0, p spatial_rho = spatial_rho + q_cons_vf(1)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! rho - spatial_u = spatial_u + q_cons_vf(2)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! u + spatial_u = spatial_u + q_cons_vf(2)%sf(i, j, k) * fluid_indicator_function%sf(i, j, k) ! rho*u + spatial_E = spatial_E + q_cons_vf(5)%sf(i, j, k)* fluid_indicator_function%sf(i, j, k) ! E end do end do end do - !$acc update host(spatial_rho, spatial_u) + !$acc update host(spatial_rho, spatial_u, spatial_E) ! reduction sum across entire domain call s_mpi_allreduce_sum(spatial_rho, spatial_rho_glb) call s_mpi_allreduce_sum(spatial_u, spatial_u_glb) - + call s_mpi_allreduce_sum(spatial_E, spatial_E_glb) + ! compute phase averages phase_rho = phase_rho + (spatial_rho_glb / real(N_x_total_glb, wp) - phase_rho) / real(t_step, wp) phase_u = phase_u + (spatial_u_glb / real(N_x_total_glb, wp) - phase_u) / real(t_step, wp) - !$acc update device(phase_rho, phase_u) + phase_E = phase_E + (spatial_E_glb / real(N_x_total_glb, wp) - phase_E) / real(t_step, wp) + !$acc update device(phase_rho, phase_u, phase_E) ! compute periodic forcing terms for mass, momentum, energy !$acc parallel loop collapse(3) gang vector default(present) @@ -107,7 +111,7 @@ contains q_periodic_force(2)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - phase_u/(1._wp - volfrac_phi)) / dt ! u*f_u - q_periodic_force(3)%sf(i, j, k) = q_cons_vf(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) * q_periodic_force(2)%sf(i, j, k) + q_periodic_force(3)%sf(i, j, k) = (P_inf_ref*gammas(1) + 0.5_wp*rho_inf_ref*u_inf_ref**2 - phase_E/(1._wp - volfrac_phi)) / dt end do end do end do diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 50590a26fe..276c3287fd 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -505,7 +505,7 @@ module m_global_parameters logical :: compute_particle_drag real(wp) :: u_inf_ref !< reference freestream velocity real(wp) :: rho_inf_ref !< reference freestream density - real(wp) :: T_inf_ref !< reference freestream temperature + real(wp) :: P_inf_ref !< reference freestream temperature logical :: periodic_forcing logical :: volume_filtering_momentum_eqn logical :: store_levelset @@ -515,7 +515,7 @@ module m_global_parameters real(wp) :: filter_width logical :: q_filtered_wrt - !$acc declare create(u_inf_ref, rho_inf_ref, T_inf_ref, filter_width) + !$acc declare create(u_inf_ref, rho_inf_ref, P_inf_ref, filter_width) contains @@ -796,7 +796,7 @@ contains compute_particle_drag = .false. u_inf_ref = dflt_real rho_inf_ref = dflt_real - T_inf_ref = dflt_real + P_inf_ref = dflt_real periodic_forcing = .false. volume_filtering_momentum_eqn = .false. store_levelset = .true. diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index bac8259b81..6af2a0363e 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -134,7 +134,7 @@ contains & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & & 'z_b', 't_stop', 't_save', 'cfl_target', 'rkck_tolerance', 'Bx0', & & 'tau_star', 'cont_damage_s', 'alpha_bar', 'u_inf_ref', & - & 'rho_inf_ref', 'T_inf_ref', 'filter_width' ] + & 'rho_inf_ref', 'P_inf_ref', 'filter_width' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index d83d73e9d3..ae76d86d40 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -187,7 +187,7 @@ contains rkck_adap_dt, rkck_tolerance, & hyperelasticity, R0ref, num_bc_patches, Bx0, powell, & cont_damage, tau_star, cont_damage_s, alpha_bar, & - periodic_ibs, compute_particle_drag, u_inf_ref, rho_inf_ref, T_inf_ref, & + periodic_ibs, compute_particle_drag, u_inf_ref, rho_inf_ref, P_inf_ref, & periodic_forcing, volume_filtering_momentum_eqn, store_levelset, & slab_domain_decomposition, compute_autocorrelation, t_step_stat_start, & filter_width, q_filtered_wrt @@ -1755,7 +1755,7 @@ contains !$acc update device(ib_markers%sf) end if - !$acc update device(u_inf_ref, rho_inf_ref, T_inf_ref, filter_width) + !$acc update device(u_inf_ref, rho_inf_ref, P_inf_ref, filter_width) end subroutine s_initialize_gpu_vars diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index b8ac4ba7c7..47357896ec 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -303,7 +303,7 @@ def analytic(self): 'compute_particle_drag': ParamType.LOG, 'u_inf_ref': ParamType.REAL, 'rho_inf_ref': ParamType.REAL, - 'T_inf_ref': ParamType.REAL, + 'P_inf_ref': ParamType.REAL, 'periodic_forcing': ParamType.LOG, 'volume_filtering_momentum_eqn': ParamType.LOG, 'compute_autocorrelation': ParamType.LOG, From d3142618269ca9bbf996cd95b4326df3a326ff95 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Tue, 21 Oct 2025 11:52:05 -0500 Subject: [PATCH 26/30] div stress tensor fix --- src/simulation/m_volume_filtering.fpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 3cf904d84d..f2d31374fb 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -764,16 +764,16 @@ contains do j = 0, n do k = 0, p div_stress_tensor(1)%sf(i, j, k) = (stress_tensor(1)%vf(1)%sf(i+1, j, k) - stress_tensor(1)%vf(1)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & - + (stress_tensor(2)%vf(1)%sf(i, j+1, k) - stress_tensor(2)%vf(1)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & - + (stress_tensor(3)%vf(1)%sf(i, j, k+1) - stress_tensor(3)%vf(1)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) + + (stress_tensor(1)%vf(2)%sf(i, j+1, k) - stress_tensor(1)%vf(2)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & + + (stress_tensor(1)%vf(3)%sf(i, j, k+1) - stress_tensor(1)%vf(3)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) - div_stress_tensor(2)%sf(i, j, k) = (stress_tensor(1)%vf(2)%sf(i+1, j, k) - stress_tensor(1)%vf(2)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & + div_stress_tensor(2)%sf(i, j, k) = (stress_tensor(2)%vf(1)%sf(i+1, j, k) - stress_tensor(2)%vf(1)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & + (stress_tensor(2)%vf(2)%sf(i, j+1, k) - stress_tensor(2)%vf(2)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & - + (stress_tensor(3)%vf(2)%sf(i, j, k+1) - stress_tensor(3)%vf(2)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) + + (stress_tensor(2)%vf(3)%sf(i, j, k+1) - stress_tensor(2)%vf(3)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) - div_stress_tensor(3)%sf(i, j, k) = (stress_tensor(1)%vf(3)%sf(i+1, j, k) - stress_tensor(1)%vf(3)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & - + (stress_tensor(2)%vf(3)%sf(i, j+1, k) - stress_tensor(2)%vf(3)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & - + (stress_tensor(3)%vf(3)%sf(i, j, k+1) - stress_tensor(3)%vf(3)%sf(i, j ,k-1)) / (dz(k-1) + dz(k+1)) + div_stress_tensor(3)%sf(i, j, k) = (stress_tensor(3)%vf(1)%sf(i+1, j, k) - stress_tensor(3)%vf(1)%sf(i-1, j, k)) / (dx(i-1) + dx(i+1)) & + + (stress_tensor(3)%vf(2)%sf(i, j+1, k) - stress_tensor(3)%vf(2)%sf(i, j-1, k)) / (dy(j-1) + dy(j+1)) & + + (stress_tensor(3)%vf(3)%sf(i, j, k+1) - stress_tensor(3)%vf(3)%sf(i, j, k-1)) / (dz(k-1) + dz(k+1)) end do end do end do @@ -972,11 +972,11 @@ contains do k = 0, p dvol = dx(i) * dy(j) * dz(k) !$acc atomic - particle_forces(ib_markers%sf(i, j, k), 1) = particle_forces(ib_markers%sf(i, j, k), 1) - div_pres_visc_stress(1)%sf(i, j, k) * dvol + particle_forces(ib_markers%sf(i, j, k), 1) = particle_forces(ib_markers%sf(i, j, k), 1) - (div_pres_visc_stress(1)%sf(i, j, k) * dvol) !$acc atomic - particle_forces(ib_markers%sf(i, j, k), 2) = particle_forces(ib_markers%sf(i, j, k), 2) - div_pres_visc_stress(2)%sf(i, j, k) * dvol + particle_forces(ib_markers%sf(i, j, k), 2) = particle_forces(ib_markers%sf(i, j, k), 2) - (div_pres_visc_stress(2)%sf(i, j, k) * dvol) !$acc atomic - particle_forces(ib_markers%sf(i, j, k), 3) = particle_forces(ib_markers%sf(i, j, k), 3) - div_pres_visc_stress(3)%sf(i, j, k) * dvol + particle_forces(ib_markers%sf(i, j, k), 3) = particle_forces(ib_markers%sf(i, j, k), 3) - (div_pres_visc_stress(3)%sf(i, j, k) * dvol) end do end do end do @@ -998,6 +998,7 @@ contains ! write particle forces to file if (proc_rank == 0) then write(100) force_glb + flush(100) end if end subroutine s_compute_particle_forces From 646831b8795838eb28720e8dba0dd7ff605b7a04 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Tue, 4 Nov 2025 19:49:00 -0600 Subject: [PATCH 27/30] src/common --- src/post_process/m_data_input.f90 | 8 ++--- src/post_process/m_global_parameters.fpp | 10 +++---- src/simulation/m_data_output.fpp | 18 +++++------ src/simulation/m_ibm.fpp | 38 ++++++++++++------------ src/simulation/m_volume_filtering.fpp | 14 ++++----- 5 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index 6b90daec08..72c200a15c 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -674,7 +674,7 @@ impure subroutine s_read_parallel_filtered_data(t_step, m_MOK, n_MOK, p_MOK, WP_ ! Initialize MPI data I/O - call s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & + call s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & stat_q_cons_filtered, stat_filtered_pressure, & stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) @@ -691,7 +691,7 @@ impure subroutine s_read_parallel_filtered_data(t_step, m_MOK, n_MOK, p_MOK, WP_ WP_MOK = int(8._wp, MPI_OFFSET_KIND) MOK = int(1._wp, MPI_OFFSET_KIND) str_MOK = int(name_len, MPI_OFFSET_KIND) - NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND) + NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND) call s_setup_mpi_io_params(data_size, m_MOK, n_MOK, p_MOK, WP_MOK, MOK, str_MOK, NVARS_MOK) @@ -703,9 +703,9 @@ impure subroutine s_read_parallel_filtered_data(t_step, m_MOK, n_MOK, p_MOK, WP_ disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & - 'native', mpi_info_int, ierr) + 'native', mpi_info_int, ierr) call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & - mpi_p, status, ierr) + mpi_p, status, ierr) end do call s_mpi_barrier() diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 0108979c0a..930123a6d9 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -842,9 +842,9 @@ contains #ifdef MFC_MPI if (q_filtered_wrt) then - allocate (MPI_IO_DATA%view(1:sys_size+1+4*9+4*9+3*4+6*4)) - allocate (MPI_IO_DATA%var (1:sys_size+1+4*9+4*9+3*4+6*4)) - do i = 1, sys_size+1+4*9+4*9+3*4+6*4 + allocate (MPI_IO_DATA%view(1:sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4)) + allocate (MPI_IO_DATA%var(1:sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4)) + do i = 1, sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do @@ -1034,8 +1034,8 @@ contains MPI_IO_DATA%var(i)%sf => null() end do - if (q_filtered_wrt) then - do i = sys_size+1, sys_size+1+4*9+4*9+3*4+6*4 + if (q_filtered_wrt) then + do i = sys_size + 1, sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4 MPI_IO_DATA%var(i)%sf => null() end do end if diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 8e2eeff299..c2568ab415 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -82,9 +82,9 @@ contains !! @param q_cons_vf Conservative variables !! @param q_prim_vf Primitive variables !! @param t_step Current time step - impure subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta, & - filtered_fluid_indicator_function, & - stat_q_cons_filtered, stat_filtered_pressure, & + impure subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta, & + filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), & @@ -117,9 +117,9 @@ contains if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta) else - call s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta, & - filtered_fluid_indicator_function, & - stat_q_cons_filtered, stat_filtered_pressure, & + call s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta, & + filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) end if @@ -798,7 +798,7 @@ contains !! @param beta Eulerian void fraction from lagrangian bubbles impure subroutine s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta, & filtered_fluid_indicator_function, & - stat_q_cons_filtered, stat_filtered_pressure, & + stat_q_cons_filtered, stat_filtered_pressure, & stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf @@ -957,8 +957,8 @@ contains call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then call s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & - stat_q_cons_filtered, stat_filtered_pressure, & - stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) end if elseif (present(beta)) then call s_initialize_mpi_data(q_cons_vf, beta=beta) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 559c2088e4..7413fa48e9 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -430,7 +430,7 @@ contains ! Calculate and store the precise location of the image point patch_id = gp%ib_patch_id - if (store_levelset) then + if (store_levelset) then dist = abs(levelset%sf(i, j, k, patch_id)) norm(:) = levelset_norm%sf(i, j, k, patch_id, :) else ! compute levelset and levelset_norm on the fly @@ -446,54 +446,54 @@ contains if (periodic_ibs) then if ((x_centroid - x_domain_beg_glb) <= radius) then x_pcen = x_domain_end_glb + (x_centroid - x_domain_beg_glb) - else if ((x_domain_end_glb - x_centroid) <= radius) then + else if ((x_domain_end_glb - x_centroid) <= radius) then x_pcen = x_domain_beg_glb - (x_domain_end_glb - x_centroid) - else + else x_pcen = x_centroid end if if ((y_centroid - y_domain_beg_glb) <= radius) then y_pcen = y_domain_end_glb + (y_centroid - y_domain_beg_glb) - else if ((y_domain_end_glb - y_centroid) <= radius) then + else if ((y_domain_end_glb - y_centroid) <= radius) then y_pcen = y_domain_beg_glb - (y_domain_end_glb - y_centroid) - else + else y_pcen = y_centroid end if if ((z_centroid - z_domain_beg_glb) <= radius) then z_pcen = z_domain_end_glb + (z_centroid - z_domain_beg_glb) - else if ((z_domain_end_glb - z_centroid) <= radius) then + else if ((z_domain_end_glb - z_centroid) <= radius) then z_pcen = z_domain_beg_glb - (z_domain_end_glb - z_centroid) - else + else z_pcen = z_centroid end if - dist_vec_per(1, 1) = x_cc(i) - x_pcen + dist_vec_per(1, 1) = x_cc(i) - x_pcen dist_vec_per(1, 2) = y_cc(j) - y_pcen dist_vec_per(1, 3) = z_cc(k) - z_pcen dist_per(1) = sqrt(sum(dist_vec_per(1, :)**2)) - if (dist_per(1) < dist_calc) then + if (dist_per(1) < dist_calc) then dist_calc = dist_per(1) dist_vec = dist_vec_per(1, :) - end if - dist_vec_per(2, 1) = x_cc(i) - x_pcen + end if + dist_vec_per(2, 1) = x_cc(i) - x_pcen dist_vec_per(2, 2) = y_cc(j) - y_centroid dist_vec_per(2, 3) = z_cc(k) - z_pcen dist_per(2) = sqrt(sum(dist_vec_per(2, :)**2)) - if (dist_per(2) < dist_calc) then + if (dist_per(2) < dist_calc) then dist_calc = dist_per(2) dist_vec = dist_vec_per(2, :) end if - dist_vec_per(3, 1) = x_cc(i) - x_pcen + dist_vec_per(3, 1) = x_cc(i) - x_pcen dist_vec_per(3, 2) = y_cc(j) - y_pcen dist_vec_per(3, 3) = z_cc(k) - z_centroid dist_per(3) = sqrt(sum(dist_vec_per(3, :)**2)) - if (dist_per(3) < dist_calc) then + if (dist_per(3) < dist_calc) then dist_calc = dist_per(3) dist_vec = dist_vec_per(3, :) end if - dist_vec_per(4, 1) = x_cc(i) - x_pcen + dist_vec_per(4, 1) = x_cc(i) - x_pcen dist_vec_per(4, 2) = y_cc(j) - y_centroid dist_vec_per(4, 3) = z_cc(k) - z_centroid dist_per(4) = sqrt(sum(dist_vec_per(4, :)**2)) - if (dist_per(4) < dist_calc) then + if (dist_per(4) < dist_calc) then dist_calc = dist_per(4) dist_vec = dist_vec_per(4, :) end if @@ -501,7 +501,7 @@ contains dist_vec_per(5, 2) = y_cc(j) - y_pcen dist_vec_per(5, 3) = z_cc(k) - z_pcen dist_per(5) = sqrt(sum(dist_vec_per(5, :)**2)) - if (dist_per(5) < dist_calc) then + if (dist_per(5) < dist_calc) then dist_calc = dist_per(5) dist_vec = dist_vec_per(5, :) end if @@ -509,7 +509,7 @@ contains dist_vec_per(6, 2) = y_cc(j) - y_pcen dist_vec_per(6, 3) = z_cc(k) - z_centroid dist_per(6) = sqrt(sum(dist_vec_per(6, :)**2)) - if (dist_per(6) < dist_calc) then + if (dist_per(6) < dist_calc) then dist_calc = dist_per(6) dist_vec = dist_vec_per(6, :) end if @@ -517,7 +517,7 @@ contains dist_vec_per(7, 2) = y_cc(j) - y_centroid dist_vec_per(7, 3) = z_cc(k) - z_pcen dist_per(7) = sqrt(sum(dist_vec_per(7, :)**2)) - if (dist_per(7) < dist_calc) then + if (dist_per(7) < dist_calc) then dist_calc = dist_per(7) dist_vec = dist_vec_per(7, :) end if diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 63909895e9..2db1eb8517 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -338,9 +338,9 @@ contains ! file for particle forces if (compute_particle_drag) then - if (proc_rank == 0) then - open (unit=100, file='particle_force.bin', status='replace', form='unformatted', access='stream', action='write') - end if + if (proc_rank == 0) then + open (unit=100, file='particle_force.bin', status='replace', form='unformatted', access='stream', action='write') + end if end if end subroutine s_initialize_fftw_explicit_filter_module @@ -1915,10 +1915,10 @@ contains call fftw_destroy_plan(plan_z_c2c_kernelG) #endif - if (compute_particle_drag) then - if (proc_rank == 0) then - close (100) - end if + if (compute_particle_drag) then + if (proc_rank == 0) then + close (100) + end if end if end subroutine s_finalize_fftw_explicit_filter_module From 0a4d4e5c17aba91890ed44157b1ef4627dac3358 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Tue, 4 Nov 2025 19:49:37 -0600 Subject: [PATCH 28/30] formatting --- src/common/m_boundary_common.fpp | 2 +- src/common/m_mpi_common.fpp | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index 73dd1bafa5..4c01b10f22 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -35,7 +35,7 @@ module m_boundary_common private; public :: s_initialize_boundary_common_module, & s_populate_variables_buffers, & - s_populate_scalarfield_buffers, & + s_populate_scalarfield_buffers, & s_create_mpi_types, & s_populate_capillary_buffers, & s_populate_F_igr_buffers, & diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 79f1e41556..fd7f002884 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -98,14 +98,14 @@ contains #ifdef MFC_SIMULATION if (volume_filtering_momentum_eqn) then - halo_size_sf = nint(-1._wp + 1._wp*buff_size* & - & (m + 2*buff_size + 1)* & - & (n + 2*buff_size + 1)* & - & (p + 2*buff_size + 1)/ & - & (cells_bounds%mnp_min + 2*buff_size + 1)) - allocate (buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) - $:GPU_ENTER_DATA(create='[capture:buff_send_scalarfield]') - $:GPU_ENTER_DATA(create='[capture:buff_recv_scalarfield]') + halo_size_sf = nint(-1._wp + 1._wp*buff_size* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (cells_bounds%mnp_min + 2*buff_size + 1)) + allocate (buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) + $:GPU_ENTER_DATA(create='[capture:buff_send_scalarfield]') + $:GPU_ENTER_DATA(create='[capture:buff_recv_scalarfield]') end if #endif #endif @@ -308,7 +308,7 @@ contains !! @param stat_reynolds_stress 1-4 order statistics of reynolds stress tensor !! @param stat_eff_visc 1-4 order statistics of unclosed effective viscosity tensor !! @param stat_int_mom_exch 1-4 order statistics of interphase momentum exchange vector - impure subroutine s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & + impure subroutine s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & stat_q_cons_filtered, stat_filtered_pressure, & stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) @@ -1536,13 +1536,13 @@ contains end do - ! Decompose domain into z-slabs - else if (slab_domain_decomposition) then + ! Decompose domain into z-slabs + else if (slab_domain_decomposition) then num_procs_x = 1 num_procs_y = 1 num_procs_z = num_procs ierr = -1 - if (mod((p+1), num_procs_z) == 0) then + if (mod((p + 1), num_procs_z) == 0) then ierr = 0 end if else From 2d90828a0f5dd05eeba40ab56e46fc9a419ef287 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Thu, 13 Nov 2025 14:36:06 -0600 Subject: [PATCH 29/30] periodic ib bug, gp selection alg --- src/common/m_mpi_common.fpp | 6 ++ src/simulation/m_ibm.fpp | 142 +++++++++++++++++++++++--- src/simulation/m_volume_filtering.fpp | 38 +++---- 3 files changed, 151 insertions(+), 35 deletions(-) diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index fd7f002884..62429f300d 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -51,7 +51,9 @@ module m_mpi_common real(wp), private, allocatable, dimension(:), target :: buff_recv_scalarfield !! This variable is utilized to receive and unpack the buffer of any scalar field from neighboring processors +#ifndef __NVCOMPILER_GPU_UNIFIED_MEM $:GPU_DECLARE(create='[buff_send_scalarfield, buff_recv_scalarfield]') +#endif contains @@ -103,9 +105,13 @@ contains & (n + 2*buff_size + 1)* & & (p + 2*buff_size + 1)/ & & (cells_bounds%mnp_min + 2*buff_size + 1)) +#ifndef __NVCOMPILER_GPU_UNIFIED_MEM + @:ALLOCATE(buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) +#else allocate (buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) $:GPU_ENTER_DATA(create='[capture:buff_send_scalarfield]') $:GPU_ENTER_DATA(create='[capture:buff_recv_scalarfield]') +#endif end if #endif #endif diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 7413fa48e9..33812d17d7 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -147,6 +147,7 @@ contains end subroutine s_ibm_setup subroutine s_populate_ib_buffers() + integer :: j, k, l #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] #:for LOCC, LOCI in [('beg', -1), ('end', 1)] @@ -156,6 +157,77 @@ contains #:endfor #:endfor + if (periodic_ibs) then + ! Population of Buffers in x-direction + do l = 0, p + do k = 0, n + if (bc_x%beg == BC_PERIODIC) then + do j = 1, buff_size + ib_markers%sf(-j, k, l) = & + ib_markers%sf(m - (j - 1), k, l) + end do + end if + end do + end do + + do l = 0, p + do k = 0, n + if (bc_x%end == BC_PERIODIC) then + do j = 1, buff_size + ib_markers%sf(m + j, k, l) = & + ib_markers%sf(j - 1, k, l) + end do + end if + end do + end do + + ! Population of Buffers in y-direction + do l = 0, p + do k = -buff_size, m + buff_size + if (bc_y%beg == BC_PERIODIC) then + do j = 1, buff_size + ib_markers%sf(k, -j, l) = & + ib_markers%sf(k, n - (j - 1), l) + end do + end if + end do + end do + + do l = 0, p + do k = -buff_size, m + buff_size + if (bc_y%end == BC_PERIODIC) then + do j = 1, buff_size + ib_markers%sf(k, n + j, l) = & + ib_markers%sf(k, j - 1, l) + end do + end if + end do + end do + + ! Population of Buffers in z-direction + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + if (bc_z%beg == BC_PERIODIC) then + do j = 1, buff_size + ib_markers%sf(k, l, -j) = & + ib_markers%sf(k, l, p - (j - 1)) + end do + end if + end do + end do + + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + if (bc_z%end == BC_PERIODIC) then + do j = 1, buff_size + ib_markers%sf(k, l, p + j) = & + ib_markers%sf(k, l, j - 1) + end do + end if + end do + end do + end if + end subroutine s_populate_ib_buffers !> Subroutine that updates the conservative variables at the ghost points @@ -529,6 +601,7 @@ contains norm(:) = dist_vec(:)/dist_calc end if end if ! end store_levelset if statement + ghost_points_in(q)%ip_loc(:) = physical_loc(:) + 2*dist*norm(:) ! Find the closest grid point to the image point @@ -537,13 +610,13 @@ contains ! s_cc points to the dim array we need if (dim == 1) then s_cc => x_cc - bound = m + buff_size - 1 + bound = m + buff_size elseif (dim == 2) then s_cc => y_cc - bound = n + buff_size - 1 + bound = n + buff_size else s_cc => z_cc - bound = p + buff_size - 1 + bound = p + buff_size end if if (f_approx_equal(norm(dim), 0._wp)) then @@ -562,7 +635,10 @@ contains .or. temp_loc > s_cc(index + 1))) index = index + dir if (index < -buff_size .or. index > bound) then - print *, "temp_loc=", temp_loc, " s_cc(index)=", s_cc(index), " s_cc(index+1)=", s_cc(index + 1) + print *, "proc_rank=", proc_rank, "temp_loc=", temp_loc, " index=", index, "ib=", patch_id, "dim", dim, "dir", dir + print *, i, j, k, physical_loc, ghost_points_in(q)%ip_loc(:) + print *, x_centroid, y_centroid, z_centroid + print *, norm, dist print *, "Increase buff_size further in m_helper_basic (currently set to a minimum of 10)" error stop "Increase buff_size" end if @@ -590,6 +666,9 @@ contains :: subsection_2D integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & :: subsection_3D + integer, dimension(2*gp_layers + 1) :: subsection_x + integer, dimension(2*gp_layers + 1) :: subsection_y + integer, dimension(2*gp_layers + 1) :: subsection_z integer :: i, j, k!< Iterator variables num_gps_out = 0 @@ -611,14 +690,26 @@ contains else do k = 0, p if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then - num_gps_out = num_gps_out + 1 + ! subsection_3D = ib_markers%sf( & + ! i - gp_layers:i + gp_layers, & + ! j - gp_layers:j + gp_layers, & + ! k - gp_layers:k + gp_layers) + ! if (any(subsection_3D == 0)) then + ! num_gps_out = num_gps_out + 1 + ! else + ! num_inner_gps_out = num_inner_gps_out + 1 + ! end if + + subsection_x = ib_markers%sf(i - gp_layers:i + gp_layers, j, k) + subsection_y = ib_markers%sf(i, j - gp_layers:j + gp_layers, k) + subsection_z = ib_markers%sf(i, j, k - gp_layers:k + gp_layers) + + if (any(subsection_x == 0) .or. & + any(subsection_y == 0) .or. & + any(subsection_z == 0)) then + num_gps_out = num_gps_out + 1 else - num_inner_gps_out = num_inner_gps_out + 1 + num_inner_gps_out = num_inner_gps_out + 1 end if end if end do @@ -637,6 +728,9 @@ contains :: subsection_2D integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & :: subsection_3D + integer, dimension(2*gp_layers + 1) :: subsection_x + integer, dimension(2*gp_layers + 1) :: subsection_y + integer, dimension(2*gp_layers + 1) :: subsection_z integer :: i, j, k !< Iterator variables integer :: count, count_i integer :: patch_id @@ -693,11 +787,27 @@ contains ! 3D do k = 0, p if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then + ! subsection_3D = ib_markers%sf( & + ! i - gp_layers:i + gp_layers, & + ! j - gp_layers:j + gp_layers, & + ! k - gp_layers:k + gp_layers) + + subsection_x = ib_markers%sf(i - gp_layers:i + gp_layers, j, k) + subsection_y = ib_markers%sf(i, j - gp_layers:j + gp_layers, k) + subsection_z = ib_markers%sf(i, j, k - gp_layers:k + gp_layers) + + if (any(subsection_x == 0) .or. & + any(subsection_y == 0) .or. & + any(subsection_z == 0)) then + + if (i== 7 .and. j== 26 .and. k== 0) then + print *, 'HERE' + print *, 'x', subsection_x, 'y', subsection_y, 'z', subsection_z + print *, proc_rank, ib_markers%sf(7, 26, -1) + end if + + + ! if (any(subsection_3D == 0)) then ghost_points_in(count)%loc = [i, j, k] patch_id = ib_markers%sf(i, j, k) ghost_points_in(count)%ib_patch_id = & diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp index 2db1eb8517..94f9804955 100644 --- a/src/simulation/m_volume_filtering.fpp +++ b/src/simulation/m_volume_filtering.fpp @@ -347,11 +347,11 @@ contains !< initialize the gaussian filtering kernel in real space and then compute its DFT subroutine s_initialize_filtering_kernel - real(dp) :: sigma_stddev - real(dp) :: Lx, Ly, Lz - real(dp) :: x_r, y_r, z_r - real(dp) :: r2 - real(dp) :: G_norm_int, G_norm_int_glb + real(wp) :: sigma_stddev + real(wp) :: Lx, Ly, Lz + real(wp) :: x_r, y_r, z_r + real(wp) :: r2 + real(wp) :: G_norm_int, G_norm_int_glb integer :: i, j, k ! gaussian filter @@ -361,7 +361,7 @@ contains Ly = y_domain_end_glb - y_domain_beg_glb Lz = z_domain_end_glb - z_domain_beg_glb - G_norm_int = 0.0_dp + G_norm_int = 0.0_wp $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[G_norm_int]]', reductionOp='[+]', copyin='[Lx, Ly, Lz, sigma_stddev]', private='[x_r, y_r, z_r, r2]') do i = 0, m @@ -373,7 +373,7 @@ contains r2 = x_r**2 + y_r**2 + z_r**2 - real_kernelG_in(i + 1, j + 1, k + 1) = exp(-r2/(2.0_dp*sigma_stddev**2)) + real_kernelG_in(i + 1, j + 1, k + 1) = exp(-r2/(2.0_wp*sigma_stddev**2)) G_norm_int = G_norm_int + real_kernelG_in(i + 1, j + 1, k + 1)*dx(i)*dy(j)*dz(k) end do @@ -462,7 +462,7 @@ contains do i = 1, NxC do j = 1, Nyloc do k = 1, Nz - cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)/(real(Nx*Ny*Nz, dp)) + cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)/(real(Nx*Ny*Nz, wp)) end do end do end do @@ -486,9 +486,9 @@ contains do j = 0, n do k = 0, p if (ib_markers%sf(i, j, k) == 0) then - fluid_indicator_function%sf(i, j, k) = 1.0_dp + fluid_indicator_function%sf(i, j, k) = 1.0_wp else - fluid_indicator_function%sf(i, j, k) = 0.0_dp + fluid_indicator_function%sf(i, j, k) = 0.0_wp end if end do end do @@ -531,7 +531,7 @@ contains do i = 1, Nx do j = 1, Ny do k = 1, Nzloc - filtered_fluid_indicator_function%sf(i - 1, j - 1, k - 1) = data_real_3D_slabz(i, j, k)/(real(Nx*Ny*Nz, dp)) + filtered_fluid_indicator_function%sf(i - 1, j - 1, k - 1) = data_real_3D_slabz(i, j, k)/(real(Nx*Ny*Nz, wp)) end do end do end do @@ -616,7 +616,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - data_real_3D_slabz(i + 1, j + 1, k + 1) = q_temp_in%sf(i, j, k)*(1.0_dp - fluid_indicator_function%sf(i, j, k)) + data_real_3D_slabz(i + 1, j + 1, k + 1) = q_temp_in%sf(i, j, k)*(1.0_wp - fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -644,7 +644,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - q_temp_out%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)*filtered_fluid_indicator_function%sf(i, j, k)) + q_temp_out%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -653,7 +653,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - q_temp_in%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)*filtered_fluid_indicator_function%sf(i, j, k)) + q_temp_in%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -938,7 +938,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - int_mom_exch(l)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)) + int_mom_exch(l)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)) end do end do end do @@ -1535,7 +1535,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - q_cons_filtered(l)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)*filtered_fluid_indicator_function%sf(i, j, k)) + q_cons_filtered(l)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -1580,7 +1580,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - filtered_pressure%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)*filtered_fluid_indicator_function%sf(i, j, k)) + filtered_pressure%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -1626,7 +1626,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - reynolds_stress(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)*filtered_fluid_indicator_function%sf(i, j, k)) + reynolds_stress(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do @@ -1674,7 +1674,7 @@ contains do i = 0, m do j = 0, n do k = 0, p - eff_visc(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, dp)*filtered_fluid_indicator_function%sf(i, j, k)) + eff_visc(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) end do end do end do From 9c6d3fa99165e2a7745fca817ee8ec5b43fde509 Mon Sep 17 00:00:00 2001 From: conradd3 Date: Thu, 13 Nov 2025 16:03:36 -0600 Subject: [PATCH 30/30] gpu ib buff populate bug fix --- runs/phi01/case.py | 59 +++++++++-------- src/common/m_mpi_common.fpp | 2 +- src/simulation/m_ibm.fpp | 126 +++++++++++++++++------------------- 3 files changed, 90 insertions(+), 97 deletions(-) diff --git a/runs/phi01/case.py b/runs/phi01/case.py index 9751518117..c67369d9c7 100644 --- a/runs/phi01/case.py +++ b/runs/phi01/case.py @@ -2,45 +2,34 @@ import math import numpy as np -''' -need to store -full stats of unclosed term tensors (1, 2, 3, 4) - only at end time -stats of flow quantities - only at end time -flow quantities -filtered fluid indicator function -drag force on each particle -''' - -Mu = 1.84e-05 + gam_a = 1.4 -R = 287.0 D = 0.1 +L = 10 * D -P = 101325 # Pa -rho = 1.225 # kg/m^3 - -T = P/(rho*R) - -M = 1.2 +M = 0.8 Re = 1500.0 -v1 = M*(gam_a*P/rho)**(1.0/2.0) -mu = rho*v1*D/Re # dynamic viscosity for current case +P = 101325 +rho = 1.225 + +v1 = M * np.sqrt(gam_a * P / rho) +mu = rho * v1 * D / Re #print('mu: ', mu) #print('v1: ', v1) #print('rho: ', rho) #print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow -dt = 4.0E-06 -Nt = 100 -t_save = 10 -t_step_start_stats = 50 +dt = 5.0E-06 +Nt = 200 #int(1 * L / v1 / dt) +t_save = Nt//5 +t_step_start_stats = Nt//2 -Nx = 99 -Ny = 99 -Nz = 99 +Nx = 199 +Ny = Nx +Nz = Ny # load initial sphere locations sphere_loc = np.loadtxt('sphere_array_locations.txt') @@ -58,6 +47,15 @@ f"patch_ib({i+1})%slip": "F", }) +# ib_dict.update({ +# f"patch_ib({1})%geometry": 8, +# f"patch_ib({1})%x_centroid": sphere_loc[20, 0], +# f"patch_ib({1})%y_centroid": sphere_loc[20, 1], +# f"patch_ib({1})%z_centroid": sphere_loc[20, 2], +# f"patch_ib({1})%radius": D / 2, +# f"patch_ib({1})%slip": "F", +# }) + # Configuring case dictionary case_dict = { # Logistics @@ -78,8 +76,8 @@ "p": Nz, "dt": dt, "t_step_start": 0, - "t_step_stop": Nt, # 3000 - "t_step_save": t_save, # 10 + "t_step_stop": Nt, + "t_step_save": t_save, "t_step_stat_start": t_step_start_stats, # Simulation Algorithm Parameters # Only one patches are necessary, the air tube @@ -154,11 +152,12 @@ "periodic_forcing": "T", "periodic_ibs": "T", "volume_filtering_momentum_eqn": "T", - "filter_width": 3.0*D/2, + "filter_width": 3.0*D/2 * np.sqrt(2/(9*np.pi)), + "compute_particle_drag": "T", "u_inf_ref": v1, "rho_inf_ref": rho, - "T_inf_ref": T, + "P_inf_ref": P, "store_levelset": "F", "slab_domain_decomposition": "T", diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 62429f300d..c5f926c51c 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -105,7 +105,7 @@ contains & (n + 2*buff_size + 1)* & & (p + 2*buff_size + 1)/ & & (cells_bounds%mnp_min + 2*buff_size + 1)) -#ifndef __NVCOMPILER_GPU_UNIFIED_MEM +#ifndef __NVCOMPILER_GPU_UNIFIED_MEM @:ALLOCATE(buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) #else allocate (buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 33812d17d7..444b4ab8d5 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -147,7 +147,7 @@ contains end subroutine s_ibm_setup subroutine s_populate_ib_buffers() - integer :: j, k, l + integer :: j, k, l #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] #:for LOCC, LOCI in [('beg', -1), ('end', 1)] @@ -157,75 +157,81 @@ contains #:endfor #:endfor - if (periodic_ibs) then + if (periodic_ibs) then ! Population of Buffers in x-direction - do l = 0, p - do k = 0, n - if (bc_x%beg == BC_PERIODIC) then + if (bc_x%beg == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = 0, n do j = 1, buff_size ib_markers%sf(-j, k, l) = & - ib_markers%sf(m - (j - 1), k, l) + ib_markers%sf(m - (j - 1), k, l) end do - end if + end do end do - end do + end if - do l = 0, p - do k = 0, n - if (bc_x%end == BC_PERIODIC) then + if (bc_x%end == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = 0, n do j = 1, buff_size - ib_markers%sf(m + j, k, l) = & - ib_markers%sf(j - 1, k, l) - end do - end if + ib_markers%sf(m + j, k, l) = & + ib_markers%sf(j - 1, k, l) + end do + end do end do - end do - + end if + ! Population of Buffers in y-direction - do l = 0, p - do k = -buff_size, m + buff_size - if (bc_y%beg == BC_PERIODIC) then + if (bc_y%beg == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = -buff_size, m + buff_size do j = 1, buff_size ib_markers%sf(k, -j, l) = & - ib_markers%sf(k, n - (j - 1), l) + ib_markers%sf(k, n - (j - 1), l) end do - end if + end do end do - end do + end if - do l = 0, p - do k = -buff_size, m + buff_size - if (bc_y%end == BC_PERIODIC) then + if (bc_y%end == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = -buff_size, m + buff_size do j = 1, buff_size ib_markers%sf(k, n + j, l) = & - ib_markers%sf(k, j - 1, l) + ib_markers%sf(k, j - 1, l) end do - end if + end do end do - end do + end if ! Population of Buffers in z-direction - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (bc_z%beg == BC_PERIODIC) then + if (bc_z%beg == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size do j = 1, buff_size ib_markers%sf(k, l, -j) = & - ib_markers%sf(k, l, p - (j - 1)) + ib_markers%sf(k, l, p - (j - 1)) end do - end if + end do end do - end do + end if - do l = -buff_size, n + buff_size - do k = -buff_size, m + buff_size - if (bc_z%end == BC_PERIODIC) then + if (bc_z%end == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size do j = 1, buff_size ib_markers%sf(k, l, p + j) = & - ib_markers%sf(k, l, j - 1) + ib_markers%sf(k, l, j - 1) end do - end if + end do end do - end do + end if end if end subroutine s_populate_ib_buffers @@ -610,13 +616,13 @@ contains ! s_cc points to the dim array we need if (dim == 1) then s_cc => x_cc - bound = m + buff_size + bound = m + buff_size elseif (dim == 2) then s_cc => y_cc - bound = n + buff_size + bound = n + buff_size else s_cc => z_cc - bound = p + buff_size + bound = p + buff_size end if if (f_approx_equal(norm(dim), 0._wp)) then @@ -635,10 +641,7 @@ contains .or. temp_loc > s_cc(index + 1))) index = index + dir if (index < -buff_size .or. index > bound) then - print *, "proc_rank=", proc_rank, "temp_loc=", temp_loc, " index=", index, "ib=", patch_id, "dim", dim, "dir", dir - print *, i, j, k, physical_loc, ghost_points_in(q)%ip_loc(:) - print *, x_centroid, y_centroid, z_centroid - print *, norm, dist + print *, "proc_rank=", proc_rank, "temp_loc=", temp_loc, " index=", index, "ib=", patch_id, "dim", dim, "dir", dir, "i, j, k", i, j, k print *, "Increase buff_size further in m_helper_basic (currently set to a minimum of 10)" error stop "Increase buff_size" end if @@ -703,13 +706,12 @@ contains subsection_x = ib_markers%sf(i - gp_layers:i + gp_layers, j, k) subsection_y = ib_markers%sf(i, j - gp_layers:j + gp_layers, k) subsection_z = ib_markers%sf(i, j, k - gp_layers:k + gp_layers) - - if (any(subsection_x == 0) .or. & - any(subsection_y == 0) .or. & - any(subsection_z == 0)) then - num_gps_out = num_gps_out + 1 + if (any(subsection_x == 0) .or. & + any(subsection_y == 0) .or. & + any(subsection_z == 0)) then + num_gps_out = num_gps_out + 1 else - num_inner_gps_out = num_inner_gps_out + 1 + num_inner_gps_out = num_inner_gps_out + 1 end if end if end do @@ -791,23 +793,15 @@ contains ! i - gp_layers:i + gp_layers, & ! j - gp_layers:j + gp_layers, & ! k - gp_layers:k + gp_layers) + ! if (any(subsection_3D == 0)) then subsection_x = ib_markers%sf(i - gp_layers:i + gp_layers, j, k) subsection_y = ib_markers%sf(i, j - gp_layers:j + gp_layers, k) subsection_z = ib_markers%sf(i, j, k - gp_layers:k + gp_layers) + if (any(subsection_x == 0) .or. & + any(subsection_y == 0) .or. & + any(subsection_z == 0)) then - if (any(subsection_x == 0) .or. & - any(subsection_y == 0) .or. & - any(subsection_z == 0)) then - - if (i== 7 .and. j== 26 .and. k== 0) then - print *, 'HERE' - print *, 'x', subsection_x, 'y', subsection_y, 'z', subsection_z - print *, proc_rank, ib_markers%sf(7, 26, -1) - end if - - - ! if (any(subsection_3D == 0)) then ghost_points_in(count)%loc = [i, j, k] patch_id = ib_markers%sf(i, j, k) ghost_points_in(count)%ib_patch_id = &