diff --git a/runs/3d_1sphere_filtering/case.py b/runs/3d_1sphere_filtering/case.py new file mode 100644 index 0000000000..c6d138c110 --- /dev/null +++ b/runs/3d_1sphere_filtering/case.py @@ -0,0 +1,154 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 1000 +t_save = 10 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + "t_step_stat_start": 100, + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "q_filtered_wrt": "T", + "parallel_io": "T", + # Patch: Constant Tube filled with air + # Specify the cylindrical air tube grid geometry + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": 1.0 / mu, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + "volume_filtering_momentum_eqn": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + + "store_levelset": "F", + "slab_domain_decomposition": "T", + "compute_autocorrelation": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/3d_1sphere_periodic/case.py b/runs/3d_1sphere_periodic/case.py new file mode 100644 index 0000000000..05f7efc429 --- /dev/null +++ b/runs/3d_1sphere_periodic/case.py @@ -0,0 +1,153 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 10 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + #"q_filtered_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + #"compute_particle_drag_vi": "F", + #"compute_particle_drag_si": "F", + #"volume_filtering_momentum_eqn": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + + "store_levelset": "F", + "slab_domain_decomposition": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/3d_drag_test/case.py b/runs/3d_drag_test/case.py new file mode 100644 index 0000000000..00eb6a3c30 --- /dev/null +++ b/runs/3d_drag_test/case.py @@ -0,0 +1,144 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 1 + +Nx = 99 +Ny = 99 +Nz = 99 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # ghost cell extrapolation + "bc_x%beg": -3, + "bc_x%end": -3, + "bc_y%beg": -3, + "bc_y%end": -3, + "bc_z%beg": -3, + "bc_z%end": -3, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": Re, + + # new case additions + "compute_particle_drag": "T", + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/3d_periodic_ibs_test/centered/case.py b/runs/3d_periodic_ibs_test/centered/case.py new file mode 100644 index 0000000000..a3199b3933 --- /dev/null +++ b/runs/3d_periodic_ibs_test/centered/case.py @@ -0,0 +1,146 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*np.sqrt((gam_a*P/rho)) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 10 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 0.0, + f"patch_ib({1})%y_centroid": 0.0, + f"patch_ib({1})%z_centroid": 0.0, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": 1.0 / mu, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/3d_periodic_ibs_test/off-center/case.py b/runs/3d_periodic_ibs_test/off-center/case.py new file mode 100644 index 0000000000..ecd3d7c9f5 --- /dev/null +++ b/runs/3d_periodic_ibs_test/off-center/case.py @@ -0,0 +1,146 @@ +import json +import math +import numpy as np + +Mu = 1.84e-05 +gam_a = 1.4 +R = 287.0 + +D = 0.1 + +P = 101325 # Pa +rho = 1.225 # kg/m^3 + +T = P/(rho*R) + +M = 1.2 +Re = 1500.0 +v1 = M*(gam_a*P/rho)**(1.0/2.0) + +mu = rho*v1*D/Re # dynamic viscosity for current case + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 4.0E-06 +Nt = 100 +t_save = 10 + +Nx = 63 +Ny = 63 +Nz = 63 + +# immersed boundary dictionary +ib_dict = {} +ib_dict.update({ + f"patch_ib({1})%geometry": 8, + f"patch_ib({1})%x_centroid": 15.0 * D, + f"patch_ib({1})%y_centroid": 15.0 * D, + f"patch_ib({1})%z_centroid": 15.0 * D, + f"patch_ib({1})%radius": D / 2, + f"patch_ib({1})%slip": "F", + }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": 5.0 * D, + "x_domain%end": 15.0 * D, + # y direction + "y_domain%beg": 5.0 * D, + "y_domain%end": 15.0 * D, + # z direction + "z_domain%beg": 5.0 * D, + "z_domain%end": 15.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, # 3000 + "t_step_save": t_save, # 10 + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": 1, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "parallel_io": "T", + + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 10.0*D, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 10.0*D, + "patch_icpp(1)%z_centroid": 10.0*D, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": 1.0 / mu, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "T_inf_ref": T, + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/phi01/case.py b/runs/phi01/case.py new file mode 100644 index 0000000000..c67369d9c7 --- /dev/null +++ b/runs/phi01/case.py @@ -0,0 +1,168 @@ +import json +import math +import numpy as np + + +gam_a = 1.4 + +D = 0.1 +L = 10 * D + +M = 0.8 +Re = 1500.0 + +P = 101325 +rho = 1.225 + +v1 = M * np.sqrt(gam_a * P / rho) +mu = rho * v1 * D / Re + +#print('mu: ', mu) +#print('v1: ', v1) +#print('rho: ', rho) +#print('Kn = ' + str( np.sqrt(np.pi*gam_a/2)*(M/Re) )) # Kn < 0.01 = continuum flow + +dt = 5.0E-06 +Nt = 200 #int(1 * L / v1 / dt) +t_save = Nt//5 +t_step_start_stats = Nt//2 + +Nx = 199 +Ny = Nx +Nz = Ny + +# load initial sphere locations +sphere_loc = np.loadtxt('sphere_array_locations.txt') +N_sphere = len(sphere_loc) + +# immersed boundary dictionary +ib_dict = {} +for i in range(N_sphere): + ib_dict.update({ + f"patch_ib({i+1})%geometry": 8, + f"patch_ib({i+1})%x_centroid": sphere_loc[i, 0], + f"patch_ib({i+1})%y_centroid": sphere_loc[i, 1], + f"patch_ib({i+1})%z_centroid": sphere_loc[i, 2], + f"patch_ib({i+1})%radius": D / 2, + f"patch_ib({i+1})%slip": "F", + }) + +# ib_dict.update({ +# f"patch_ib({1})%geometry": 8, +# f"patch_ib({1})%x_centroid": sphere_loc[20, 0], +# f"patch_ib({1})%y_centroid": sphere_loc[20, 1], +# f"patch_ib({1})%z_centroid": sphere_loc[20, 2], +# f"patch_ib({1})%radius": D / 2, +# f"patch_ib({1})%slip": "F", +# }) + +# Configuring case dictionary +case_dict = { + # Logistics + "run_time_info": "T", + # Computational Domain Parameters + # x direction + "x_domain%beg": -5.0 * D, + "x_domain%end": 5.0 * D, + # y direction + "y_domain%beg": -5.0 * D, + "y_domain%end": 5.0 * D, + # z direction + "z_domain%beg": -5.0 * D, + "z_domain%end": 5.0 * D, + "cyl_coord": "F", + "m": Nx, + "n": Ny, + "p": Nz, + "dt": dt, + "t_step_start": 0, + "t_step_stop": Nt, + "t_step_save": t_save, + "t_step_stat_start": t_step_start_stats, + # Simulation Algorithm Parameters + # Only one patches are necessary, the air tube + "num_patches": 1, + # Use the 5 equation model + "model_eqns": 2, + # 6 equations model does not need the K \div(u) term + "alt_soundspeed": "F", + # One fluids: air + "num_fluids": 1, + # time step + "mpp_lim": "F", + # Correct errors when computing speed of sound + "mixture_err": "T", + # Use TVD RK3 for time marching + "time_stepper": 3, + # Reconstruct the primitive variables to minimize spurious + # Use WENO5 + "weno_order": 5, + "weno_eps": 1.0e-14, + "weno_Re_flux": "T", + "weno_avg": "T", + "avg_state": 2, + "mapped_weno": "T", + "null_weights": "F", + "mp_weno": "T", + "riemann_solver": 2, + "low_Mach": 1, + "wave_speeds": 1, + # periodic bc + "bc_x%beg": -1, + "bc_x%end": -1, + "bc_y%beg": -1, + "bc_y%end": -1, + "bc_z%beg": -1, + "bc_z%end": -1, + # Set IB to True and add 1 patch + "ib": "T", + "num_ibs": N_sphere, + "viscous": "T", + # Formatted Database Files Structure Parameters + "format": 1, + "precision": 2, + "prim_vars_wrt": "T", + "E_wrt": "T", + "q_filtered_wrt": "T", + "parallel_io": "T", + # Patch: Constant Tube filled with air + # Specify the cylindrical air tube grid geometry + "patch_icpp(1)%geometry": 9, + "patch_icpp(1)%x_centroid": 0.0, + # Uniform medium density, centroid is at the center of the domain + "patch_icpp(1)%y_centroid": 0.0, + "patch_icpp(1)%z_centroid": 0.0, + "patch_icpp(1)%length_x": 10 * D, + "patch_icpp(1)%length_y": 10 * D, + "patch_icpp(1)%length_z": 10 * D, + # Specify the patch primitive variables + "patch_icpp(1)%vel(1)": v1, + "patch_icpp(1)%vel(2)": 0.0e00, + "patch_icpp(1)%vel(3)": 0.0e00, + "patch_icpp(1)%pres": P, + "patch_icpp(1)%alpha_rho(1)": rho, + "patch_icpp(1)%alpha(1)": 1.0e00, + # Patch: Sphere Immersed Boundary + # Fluids Physical Parameters + "fluid_pp(1)%gamma": 1.0e00 / (gam_a - 1.0e00), # 2.50(Not 1.40) + "fluid_pp(1)%pi_inf": 0, + "fluid_pp(1)%Re(1)": 1.0 / mu, + + # new case additions + "periodic_forcing": "T", + "periodic_ibs": "T", + "volume_filtering_momentum_eqn": "T", + "filter_width": 3.0*D/2 * np.sqrt(2/(9*np.pi)), + "compute_particle_drag": "T", + + "u_inf_ref": v1, + "rho_inf_ref": rho, + "P_inf_ref": P, + + "store_levelset": "F", + "slab_domain_decomposition": "T", + } + +case_dict.update(ib_dict) + +print(json.dumps(case_dict)) diff --git a/runs/phi01/sphere_array_locations.txt b/runs/phi01/sphere_array_locations.txt new file mode 100644 index 0000000000..047707ef90 --- /dev/null +++ b/runs/phi01/sphere_array_locations.txt @@ -0,0 +1,190 @@ +-8.877599239349365234e-02 1.935560703277587891e-01 -6.486654281616210938e-02 +-3.341052532196044922e-01 4.142935276031494141e-01 -4.567451477050781250e-01 +2.565863132476806641e-01 -4.949223995208740234e-02 -4.442641735076904297e-01 +3.103950023651123047e-01 -2.099078893661499023e-01 -4.642441272735595703e-01 +-3.521966934204101562e-02 -1.745276451110839844e-01 -3.202521800994873047e-01 +-1.949143409729003906e-02 -1.775810718536376953e-01 -3.603804111480712891e-02 +-1.835894584655761719e-01 3.262339830398559570e-01 -3.085057735443115234e-01 +-1.445159912109375000e-01 1.513528823852539062e-01 -2.023205757141113281e-01 +-4.898538589477539062e-01 -4.509705305099487305e-01 -1.682095527648925781e-01 +3.143328428268432617e-01 4.728571176528930664e-01 1.526627540588378906e-01 +1.280879974365234375e-01 1.239399909973144531e-01 -3.574787378311157227e-01 +-1.123933792114257812e-01 -3.207942247390747070e-01 9.310150146484375000e-02 +-1.386029720306396484e-01 -1.205575466156005859e-02 2.014696598052978516e-01 +-2.808933258056640625e-01 3.925647735595703125e-01 2.450205087661743164e-01 +4.294252395629882812e-02 2.894115447998046875e-01 -2.536165714263916016e-02 +1.801455020904541016e-01 5.933284759521484375e-02 4.247887134552001953e-01 +1.872421503067016602e-01 3.063344955444335938e-02 8.561480045318603516e-02 +2.484493255615234375e-01 -4.173127412796020508e-01 3.008729219436645508e-01 +8.203792572021484375e-02 1.318891048431396484e-01 -1.190292835235595703e-02 +-4.555282592773437500e-01 -3.696656227111816406e-01 2.237200736999511719e-02 +-1.931151151657104492e-01 5.374908447265625000e-02 5.545830726623535156e-02 +-6.292748451232910156e-02 1.790912151336669922e-01 1.174246072769165039e-01 +-2.316267490386962891e-01 -4.075572490692138672e-01 4.597637653350830078e-01 +-3.437596559524536133e-01 4.005973339080810547e-01 -2.290433645248413086e-01 +-1.910818815231323242e-01 -4.736427068710327148e-01 -2.076803445816040039e-01 +-4.528397321701049805e-01 7.907927036285400391e-02 3.940449953079223633e-01 +1.893968582153320312e-01 4.864903688430786133e-01 -3.449935913085937500e-01 +7.300472259521484375e-02 -3.667246103286743164e-01 3.762015104293823242e-01 +-1.821663379669189453e-01 -4.775607585906982422e-02 3.386561870574951172e-01 +5.136466026306152344e-02 4.852104187011718750e-01 -4.752502441406250000e-01 +-3.295025825500488281e-01 -5.519819259643554688e-02 5.781412124633789062e-02 +4.343043565750122070e-01 2.689909934997558594e-01 3.341940641403198242e-01 +-3.969779014587402344e-01 -2.916865348815917969e-01 -1.138211488723754883e-01 +-4.619355201721191406e-01 2.032375335693359375e-02 -1.161878108978271484e-01 +7.124900817871093750e-03 1.223111152648925781e-02 4.087531566619873047e-01 +-3.908715248107910156e-01 1.400717496871948242e-01 2.354013919830322266e-02 +-1.070375442504882812e-01 3.122891187667846680e-01 2.600712776184082031e-01 +4.667922258377075195e-01 -2.228868007659912109e-01 2.890402078628540039e-01 +9.751558303833007812e-03 3.652515411376953125e-01 1.688425540924072266e-01 +-7.598793506622314453e-02 1.410543918609619141e-02 -6.586468219757080078e-02 +-3.012117147445678711e-01 -1.333975791931152344e-02 -2.475223541259765625e-01 +1.425679922103881836e-01 -1.594284772872924805e-01 4.271366596221923828e-01 +-3.488619327545166016e-01 3.043293952941894531e-01 1.312527656555175781e-01 +1.347296237945556641e-01 -2.548012733459472656e-01 2.497346401214599609e-01 +1.558208465576171875e-01 -1.695448160171508789e-01 8.221673965454101562e-02 +2.994102239608764648e-01 -2.616212368011474609e-01 3.708097934722900391e-01 +4.749594926834106445e-01 4.012154340744018555e-01 -1.113747358322143555e-01 +4.658288955688476562e-01 -2.405116558074951172e-01 -4.019365310668945312e-01 +-4.477721452713012695e-01 1.802740097045898438e-01 2.297303676605224609e-01 +2.828998565673828125e-01 3.781812191009521484e-01 -4.897345304489135742e-01 +-1.556029319763183594e-01 -1.499896049499511719e-01 -1.702260971069335938e-01 +-2.203900814056396484e-01 4.228965044021606445e-01 3.943344354629516602e-01 +-7.529938220977783203e-02 -4.034370183944702148e-01 -4.895013570785522461e-01 +-2.633322477340698242e-01 2.260003089904785156e-01 3.617374897003173828e-01 +-2.043257951736450195e-01 -2.201197147369384766e-01 4.399769306182861328e-01 +2.097340822219848633e-01 -3.915596008300781250e-02 -2.276867628097534180e-01 +-1.167770624160766602e-01 4.129269123077392578e-01 -4.588322639465332031e-01 +3.195565938949584961e-01 2.821329832077026367e-01 2.030262947082519531e-01 +4.332208633422851562e-02 2.999825477600097656e-01 -2.426314353942871094e-01 +-2.900393009185791016e-01 7.278752326965332031e-02 3.351804018020629883e-01 +-3.045821189880371094e-02 -1.478650569915771484e-01 3.491390943527221680e-01 +-2.793753147125244141e-02 -1.773738861083984375e-01 1.675630807876586914e-01 +-3.188729286193847656e-01 -4.904426336288452148e-01 -6.549203395843505859e-02 +-4.071967601776123047e-01 -1.066761016845703125e-01 -4.441113471984863281e-01 +4.105618000030517578e-01 -3.848595619201660156e-01 1.863635778427124023e-01 +-1.051111221313476562e-01 -7.725274562835693359e-02 -4.898943901062011719e-01 +3.737279176712036133e-01 1.056033372879028320e-01 4.786680936813354492e-01 +2.511825561523437500e-01 -3.347592353820800781e-01 1.227176189422607422e-01 +-3.208853006362915039e-01 -1.442481279373168945e-01 -9.813189506530761719e-02 +3.365310430526733398e-01 -4.063715934753417969e-01 -4.750763177871704102e-01 +-3.066674470901489258e-01 -2.005393505096435547e-01 -2.603935003280639648e-01 +4.633438587188720703e-02 -3.628603219985961914e-01 -3.448045253753662109e-01 +-1.228909492492675781e-01 4.968223571777343750e-01 1.755017042160034180e-01 +4.529950618743896484e-01 -4.122850894927978516e-01 3.542938232421875000e-01 +3.015396595001220703e-01 6.062459945678710938e-02 -5.255222320556640625e-02 +7.875204086303710938e-02 -3.220939636230468750e-01 2.097034454345703125e-02 +-3.075191974639892578e-01 -4.913786649703979492e-01 1.174443960189819336e-01 +-2.157187461853027344e-01 -1.293109655380249023e-01 -3.813669681549072266e-01 +-2.569644451141357422e-01 -4.775856733322143555e-01 -3.842570781707763672e-01 +3.374536037445068359e-01 2.595454454421997070e-01 -1.862519979476928711e-01 +-2.484831809997558594e-01 1.898849010467529297e-02 -1.008712053298950195e-01 +-3.550199270248413086e-01 -3.802776336669921875e-03 2.112603187561035156e-01 +-4.047393798828125000e-02 -3.331716060638427734e-01 -1.580150127410888672e-01 +2.301404476165771484e-01 1.020783185958862305e-01 2.300353050231933594e-01 +-4.886188507080078125e-01 -4.335124492645263672e-01 -3.716624975204467773e-01 +3.109852075576782227e-01 -3.871500492095947266e-02 1.583197116851806641e-01 +4.864922761917114258e-01 -2.506246566772460938e-01 4.611170291900634766e-01 +4.114500284194946289e-01 -2.497513294219970703e-01 8.945560455322265625e-02 +-2.041511535644531250e-01 -3.061387538909912109e-01 -1.002895832061767578e-01 +-3.356888294219970703e-01 -2.898548841476440430e-01 -4.294934272766113281e-01 +6.349623203277587891e-02 -4.237914085388183594e-01 1.809575557708740234e-01 +1.638014316558837891e-01 -3.412141799926757812e-01 -4.808696508407592773e-01 +4.292991161346435547e-01 -7.350444793701171875e-02 4.452385902404785156e-01 +-2.837867736816406250e-01 2.394533157348632812e-02 -4.843814373016357422e-01 +-2.125334739685058594e-01 1.921176910400390625e-01 -2.379369735717773438e-02 +1.759276390075683594e-01 4.892826080322265625e-01 4.419517517089843750e-01 +-4.233963489532470703e-01 7.077014446258544922e-02 -3.061563968658447266e-01 +-3.712041378021240234e-01 4.946417212486267090e-01 3.635656833648681641e-01 +-4.665093421936035156e-01 4.070787429809570312e-01 -3.274630308151245117e-01 +3.692833185195922852e-01 -8.178091049194335938e-02 -1.193681955337524414e-01 +6.124496459960937500e-03 -2.011668682098388672e-02 8.408391475677490234e-02 +-1.337385177612304688e-02 -2.435498237609863281e-01 -4.735767841339111328e-01 +2.590975761413574219e-01 -3.270063400268554688e-01 -5.099523067474365234e-02 +3.800438642501831055e-01 4.123662710189819336e-01 -3.175902366638183594e-01 +2.355668544769287109e-01 2.839933633804321289e-01 -3.255009651184082031e-01 +-4.340230226516723633e-01 -4.109045267105102539e-01 4.977314472198486328e-01 +2.350783348083496094e-02 -7.954597473144531250e-02 -2.089430093765258789e-01 +2.528522014617919922e-01 2.231028079986572266e-01 -4.818900823593139648e-01 +3.285017013549804688e-01 -1.968045234680175781e-01 2.016012668609619141e-01 +3.276336193084716797e-01 3.824212551116943359e-01 -2.195405960083007812e-02 +4.347554445266723633e-01 -1.944565773010253906e-02 -3.952792882919311523e-01 +-2.355787754058837891e-01 2.512185573577880859e-01 -4.705796241760253906e-01 +2.304534912109375000e-01 2.335491180419921875e-01 3.436188697814941406e-01 +4.291563034057617188e-01 2.084137201309204102e-01 -3.515939712524414062e-01 +4.610210657119750977e-01 2.877938747406005859e-01 9.413146972656250000e-02 +3.239741325378417969e-01 4.200505018234252930e-01 3.377312421798706055e-01 +-4.339945316314697266e-01 -1.799043416976928711e-01 1.667797565460205078e-01 +4.162905216217041016e-01 -2.838604450225830078e-01 -1.204760074615478516e-01 +4.708716869354248047e-01 4.452165365219116211e-01 4.702655076980590820e-01 +3.935134410858154297e-01 -4.494274854660034180e-01 -1.000511646270751953e-02 +-3.325940370559692383e-01 -3.989632129669189453e-01 -2.595729827880859375e-01 +-4.726890325546264648e-01 -1.577985286712646484e-01 -2.004265785217285156e-02 +-2.578830718994140625e-01 1.816778182983398438e-01 1.800514459609985352e-01 +2.873079776763916016e-01 -1.582661867141723633e-01 1.000881195068359375e-03 +1.284685134887695312e-01 -2.347108125686645508e-01 -1.527856588363647461e-01 +-4.975929260253906250e-01 4.154947996139526367e-01 2.424190044403076172e-01 +1.319632530212402344e-01 2.181564569473266602e-01 1.456822156906127930e-01 +4.251360893249511719e-02 5.486690998077392578e-02 2.446963787078857422e-01 +5.265474319458007812e-03 -4.930623769760131836e-01 1.795315742492675781e-02 +3.435378074645996094e-01 -1.437039375305175781e-01 -2.955729961395263672e-01 +-1.589361429214477539e-01 3.439151048660278320e-01 -1.269352436065673828e-01 +-2.996790409088134766e-01 -2.977983951568603516e-01 5.047678947448730469e-02 +1.387677192687988281e-01 -4.051816463470458984e-02 -6.590497493743896484e-02 +-4.859859943389892578e-01 4.686148166656494141e-01 6.054759025573730469e-02 +3.058031797409057617e-01 -4.722125530242919922e-01 -1.649188995361328125e-01 +3.712953329086303711e-01 -3.612419366836547852e-01 -2.953444719314575195e-01 +-2.350592613220214844e-01 1.253683567047119141e-01 -3.582476377487182617e-01 +-4.282865524291992188e-01 -3.783413171768188477e-01 1.956710815429687500e-01 +-1.545268297195434570e-01 -3.127627372741699219e-01 -3.272031545639038086e-01 +2.250815629959106445e-01 -3.367059230804443359e-01 -2.811298370361328125e-01 +-5.611097812652587891e-02 2.276177406311035156e-01 -3.761705160140991211e-01 +1.843569278717041016e-01 3.698165416717529297e-01 -1.461877822875976562e-01 +-3.651070594787597656e-01 3.224494457244873047e-01 -4.502046108245849609e-02 +-1.052534580230712891e-01 1.394950151443481445e-01 3.169180154800415039e-01 +-7.266819477081298828e-02 -3.203969001770019531e-01 3.076763153076171875e-01 +-1.534210443496704102e-01 -1.421678066253662109e-02 -2.598439455032348633e-01 +4.644811153411865234e-01 -2.855896949768066406e-02 6.111550331115722656e-02 +1.615400314331054688e-01 4.353706836700439453e-01 2.680056095123291016e-01 +-4.789991378784179688e-01 -2.737338542938232422e-01 -2.684531211853027344e-01 +-4.801630973815917969e-01 -1.131765842437744141e-01 -2.253174781799316406e-01 +4.725518226623535156e-01 2.924776077270507812e-01 -4.712775945663452148e-01 +3.934500217437744141e-01 6.538939476013183594e-02 -2.147150039672851562e-01 +5.674338340759277344e-02 1.684566736221313477e-01 4.750093221664428711e-01 +-3.127444982528686523e-01 1.864537000656127930e-01 -1.828011274337768555e-01 +-6.377077102661132812e-02 3.063268661499023438e-01 4.461523294448852539e-01 +-2.393376827239990234e-01 -2.101924419403076172e-01 2.160568237304687500e-01 +-4.714767932891845703e-01 2.386778593063354492e-01 -1.962506771087646484e-01 +-4.175131320953369141e-01 1.262202262878417969e-01 -4.906876087188720703e-01 +1.526114940643310547e-01 -1.855427026748657227e-01 -3.443827629089355469e-01 +6.579875946044921875e-02 -4.886317253112792969e-02 -4.445745944976806641e-01 +1.098661422729492188e-01 3.471816778182983398e-01 4.010045528411865234e-01 +2.641906738281250000e-01 -2.310740947723388672e-01 -1.801049709320068359e-01 +2.215981483459472656e-02 1.125121116638183594e-01 -2.007805109024047852e-01 +4.692313671112060547e-01 -3.348422050476074219e-02 2.421901226043701172e-01 +3.015110492706298828e-01 -7.356131076812744141e-02 3.514482975006103516e-01 +-3.965770006179809570e-01 2.962644100189208984e-01 3.929857015609741211e-01 +1.106926202774047852e-01 -4.377689361572265625e-01 -1.675007343292236328e-01 +1.297621726989746094e-01 -8.046376705169677734e-02 2.488052845001220703e-01 +1.898322105407714844e-01 1.719188690185546875e-01 -1.696370840072631836e-01 +4.060682058334350586e-01 1.258714199066162109e-01 1.274476051330566406e-01 +1.603732109069824219e-01 3.966591358184814453e-01 6.766164302825927734e-02 +5.054616928100585938e-02 2.127890586853027344e-01 3.031399250030517578e-01 +-1.690447330474853516e-01 -1.416635513305664062e-01 3.728961944580078125e-02 +-1.341120004653930664e-01 1.080242395401000977e-01 4.635136127471923828e-01 +-2.457776069641113281e-01 -3.851659297943115234e-01 2.513883113861083984e-01 +-1.634557247161865234e-01 -4.583904743194580078e-01 -2.824854850769042969e-02 +-1.784324645996093750e-03 4.497978687286376953e-01 -1.161942481994628906e-01 +4.503953456878662109e-01 1.885429620742797852e-01 -4.877877235412597656e-02 +2.600491046905517578e-01 2.236571311950683594e-01 2.091717720031738281e-02 +-3.822712898254394531e-01 2.547247409820556641e-01 -3.687927722930908203e-01 +-3.667194843292236328e-01 -1.171383857727050781e-01 3.846424818038940430e-01 +-3.668913841247558594e-01 -2.955377101898193359e-01 3.536789417266845703e-01 +2.957736253738403320e-01 8.799576759338378906e-02 -3.451507091522216797e-01 +-1.604117155075073242e-01 3.587515354156494141e-01 5.187714099884033203e-02 +1.919094324111938477e-01 -4.781463146209716797e-01 4.655241966247558594e-03 +-3.640174865722656250e-02 4.754726886749267578e-01 -2.942006587982177734e-01 +-6.335353851318359375e-02 4.125511646270751953e-02 -3.732511997222900391e-01 +9.152126312255859375e-02 3.327772617340087891e-01 -4.209873676300048828e-01 +-5.436992645263671875e-02 4.926524162292480469e-01 3.434299230575561523e-01 +3.771104812622070312e-01 9.526658058166503906e-02 2.973334789276123047e-01 diff --git a/src/common/m_boundary_common.fpp b/src/common/m_boundary_common.fpp index f89278a86d..4c01b10f22 100644 --- a/src/common/m_boundary_common.fpp +++ b/src/common/m_boundary_common.fpp @@ -35,6 +35,7 @@ module m_boundary_common private; public :: s_initialize_boundary_common_module, & s_populate_variables_buffers, & + s_populate_scalarfield_buffers, & s_create_mpi_types, & s_populate_capillary_buffers, & s_populate_F_igr_buffers, & @@ -276,6 +277,109 @@ contains end subroutine s_populate_variables_buffers + !> The purpose of this procedure is to populate the buffers + !! of any scalarfield variable, currently only with periodic boundary conditions + impure subroutine s_populate_scalarfield_buffers(bc_type, q_temp) + + type(scalar_field), intent(inout) :: q_temp + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + + integer :: k, l + + ! Population of Buffers in x-direction + if (bc_x%beg >= 0) then + call s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, 1, -1) + else + $:GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = 0, n + select case (int(bc_type(1, -1)%sf(0, k, l))) + case (BC_PERIODIC) + call s_periodic_scalarfield(q_temp, 1, -1, k, l) + end select + end do + end do + end if + + if (bc_x%end >= 0) then + call s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, 1, 1) + else + $:GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = 0, n + select case (int(bc_type(1, 1)%sf(0, k, l))) + case (BC_PERIODIC) + call s_periodic_scalarfield(q_temp, 1, 1, k, l) + end select + end do + end do + end if + + ! Population of Buffers in y-direction + if (n == 0) return + + if (bc_y%beg >= 0) then + call s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, 2, -1) + else + $:GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, -1)%sf(k, 0, l))) + case (BC_PERIODIC) + call s_periodic_scalarfield(q_temp, 2, -1, k, l) + end select + end do + end do + end if + + if (bc_y%end >= 0) then + call s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, 2, 1) + else + $:GPU_PARALLEL_LOOP(collapse=2) + do l = 0, p + do k = -buff_size, m + buff_size + select case (int(bc_type(2, 1)%sf(k, 0, l))) + case (BC_PERIODIC) + call s_periodic_scalarfield(q_temp, 2, 1, k, l) + end select + end do + end do + end if + + ! Population of Buffers in z-direction + + if (p == 0) return + if (bc_z%beg >= 0) then + call s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, 3, -1) + else + $:GPU_PARALLEL_LOOP(collapse=2) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, -1)%sf(k, l, 0))) + case (BC_PERIODIC) + call s_periodic_scalarfield(q_temp, 3, -1, k, l) + end select + end do + end do + end if + + if (bc_z%end >= 0) then + call s_mpi_sendrecv_variables_buffers_scalarfield(q_temp, 3, 1) + else + $:GPU_PARALLEL_LOOP(collapse=2) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + select case (int(bc_type(3, 1)%sf(k, l, 0))) + case (BC_PERIODIC) + call s_periodic_scalarfield(q_temp, 3, 1, k, l) + end select + end do + end do + end if + ! END: Population of Buffers in z-direction + + end subroutine s_populate_scalarfield_buffers + pure subroutine s_ghost_cell_extrapolation(q_prim_vf, bc_dir, bc_loc, k, l) $:GPU_ROUTINE(function_name='s_ghost_cell_extrapolation', & & parallelism='[seq]', cray_inline=True) @@ -736,6 +840,61 @@ contains end subroutine s_periodic + !< Populate periodic boundaries for arbitrary scalarfield quantity + pure subroutine s_periodic_scalarfield(q_temp, bc_dir, bc_loc, k, l) + $:GPU_ROUTINE(parallelism='[seq]') + type(scalar_field), intent(inout) :: q_temp + integer, intent(in) :: bc_dir, bc_loc + integer, intent(in) :: k, l + + integer :: j, q + + if (bc_dir == 1) then !< x-direction + if (bc_loc == -1) then !< bc_x%beg + do j = 1, buff_size + q_temp%sf(-j, k, l) = & + q_temp%sf(m - (j - 1), k, l) + end do + + else !< bc_x%end + do j = 1, buff_size + q_temp%sf(m + j, k, l) = & + q_temp%sf(j - 1, k, l) + end do + end if + + elseif (bc_dir == 2) then !< y-direction + if (bc_loc == -1) then !< bc_y%beg + do j = 1, buff_size + q_temp%sf(k, -j, l) = & + q_temp%sf(k, n - (j - 1), l) + end do + + else !< bc_y%end + do j = 1, buff_size + q_temp%sf(k, n + j, l) = & + q_temp%sf(k, j - 1, l) + end do + end if + + elseif (bc_dir == 3) then !< z-direction + if (bc_loc == -1) then !< bc_z%beg + do j = 1, buff_size + q_temp%sf(k, l, -j) = & + q_temp%sf(k, l, p - (j - 1)) + end do + + else !< bc_z%end + do j = 1, buff_size + q_temp%sf(k, l, p + j) = & + q_temp%sf(k, l, j - 1) + end do + + end if + end if + + end subroutine s_periodic_scalarfield + pure subroutine s_axis(q_prim_vf, pb_in, mv_in, k, l) $:GPU_ROUTINE(parallelism='[seq]') type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf diff --git a/src/common/m_checker_common.fpp b/src/common/m_checker_common.fpp index 1ec314536a..0a0d81e748 100644 --- a/src/common/m_checker_common.fpp +++ b/src/common/m_checker_common.fpp @@ -184,6 +184,11 @@ contains @:PROHIBIT(ib .and. n <= 0, "Immersed Boundaries do not work in 1D") @:PROHIBIT(ib .and. (num_ibs <= 0 .or. num_ibs > num_patches_max), "num_ibs must be between 1 and num_patches_max") @:PROHIBIT((.not. ib) .and. num_ibs > 0, "num_ibs is set, but ib is not enabled") + #:for X in ['x', 'y', 'z'] + #:for BOUND in ['beg', 'end'] + @:PROHIBIT(periodic_ibs .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, "periodic ibs requires periodic BCs, bc_${X}$%${BOUND}$ must = -1") + #:endfor + #:endfor end subroutine s_check_inputs_ibm #endif diff --git a/src/common/m_compute_levelset.fpp b/src/common/m_compute_levelset.fpp index cc473ea544..f6a7097533 100644 --- a/src/common/m_compute_levelset.fpp +++ b/src/common/m_compute_levelset.fpp @@ -442,7 +442,7 @@ contains end subroutine s_cuboid_levelset - pure subroutine s_sphere_levelset(ib_patch_id, levelset, levelset_norm) + impure subroutine s_sphere_levelset(ib_patch_id, levelset, levelset_norm) type(levelset_field), intent(INOUT), optional :: levelset type(levelset_norm_field), intent(INOUT), optional :: levelset_norm @@ -452,13 +452,50 @@ contains real(wp) :: x_centroid, y_centroid, z_centroid real(wp), dimension(3) :: dist_vec + real(wp) :: x_domain_beg, x_domain_end, y_domain_beg, y_domain_end, z_domain_beg, z_domain_end + real(wp) :: x_pcen, y_pcen, z_pcen !< periodically projected centroids of sphere + real(wp), dimension(7, 3) :: dist_vec_per + real(wp), dimension(7) :: dist_per + integer :: i, j, k !< Loop index variables + integer :: ierr radius = patch_ib(ib_patch_id)%radius x_centroid = patch_ib(ib_patch_id)%x_centroid y_centroid = patch_ib(ib_patch_id)%y_centroid z_centroid = patch_ib(ib_patch_id)%z_centroid + call s_mpi_allreduce_min(x_domain%beg, x_domain_beg) + call s_mpi_allreduce_max(x_domain%end, x_domain_end) + call s_mpi_allreduce_min(y_domain%beg, y_domain_beg) + call s_mpi_allreduce_max(y_domain%end, y_domain_end) + call s_mpi_allreduce_min(z_domain%beg, z_domain_beg) + call s_mpi_allreduce_max(z_domain%end, z_domain_end) + + if (periodic_ibs) then + if ((x_centroid - x_domain_beg) <= radius) then + x_pcen = x_domain_end + (x_centroid - x_domain_beg) + else if ((x_domain_end - x_centroid) <= radius) then + x_pcen = x_domain_beg - (x_domain_end - x_centroid) + else + x_pcen = x_centroid + end if + if ((y_centroid - y_domain_beg) <= radius) then + y_pcen = y_domain_end + (y_centroid - y_domain_beg) + else if ((y_domain_end - y_centroid) <= radius) then + y_pcen = y_domain_beg - (y_domain_end - y_centroid) + else + y_pcen = y_centroid + end if + if ((z_centroid - z_domain_beg) <= radius) then + z_pcen = z_domain_end + (z_centroid - z_domain_beg) + else if ((z_domain_end - z_centroid) <= radius) then + z_pcen = z_domain_beg - (z_domain_end - z_centroid) + else + z_pcen = z_centroid + end if + end if + do i = 0, m do j = 0, n do k = 0, p @@ -466,6 +503,73 @@ contains dist_vec(2) = y_cc(j) - y_centroid dist_vec(3) = z_cc(k) - z_centroid dist = sqrt(sum(dist_vec**2)) + + ! all permutations of periodically projected ib + if (periodic_ibs) then + dist_vec_per(1, 1) = x_cc(i) - x_pcen + dist_vec_per(1, 2) = y_cc(j) - y_pcen + dist_vec_per(1, 3) = z_cc(k) - z_pcen + dist_per(1) = sqrt(sum(dist_vec_per(1, :)**2)) + if (dist_per(1) < dist) then + dist = dist_per(1) + dist_vec = dist_vec_per(1, :) + end if + + dist_vec_per(2, 1) = x_cc(i) - x_pcen + dist_vec_per(2, 2) = y_cc(j) - y_centroid + dist_vec_per(2, 3) = z_cc(k) - z_pcen + dist_per(2) = sqrt(sum(dist_vec_per(2, :)**2)) + if (dist_per(2) < dist) then + dist = dist_per(2) + dist_vec = dist_vec_per(2, :) + end if + + dist_vec_per(3, 1) = x_cc(i) - x_pcen + dist_vec_per(3, 2) = y_cc(j) - y_pcen + dist_vec_per(3, 3) = z_cc(k) - z_centroid + dist_per(3) = sqrt(sum(dist_vec_per(3, :)**2)) + if (dist_per(3) < dist) then + dist = dist_per(3) + dist_vec = dist_vec_per(3, :) + end if + + dist_vec_per(4, 1) = x_cc(i) - x_pcen + dist_vec_per(4, 2) = y_cc(j) - y_centroid + dist_vec_per(4, 3) = z_cc(k) - z_centroid + dist_per(4) = sqrt(sum(dist_vec_per(4, :)**2)) + if (dist_per(4) < dist) then + dist = dist_per(4) + dist_vec = dist_vec_per(4, :) + end if + + dist_vec_per(5, 1) = x_cc(i) - x_centroid + dist_vec_per(5, 2) = y_cc(j) - y_pcen + dist_vec_per(5, 3) = z_cc(k) - z_pcen + dist_per(5) = sqrt(sum(dist_vec_per(5, :)**2)) + if (dist_per(5) < dist) then + dist = dist_per(5) + dist_vec = dist_vec_per(5, :) + end if + + dist_vec_per(6, 1) = x_cc(i) - x_centroid + dist_vec_per(6, 2) = y_cc(j) - y_pcen + dist_vec_per(6, 3) = z_cc(k) - z_centroid + dist_per(6) = sqrt(sum(dist_vec_per(6, :)**2)) + if (dist_per(6) < dist) then + dist = dist_per(6) + dist_vec = dist_vec_per(6, :) + end if + + dist_vec_per(7, 1) = x_cc(i) - x_centroid + dist_vec_per(7, 2) = y_cc(j) - y_centroid + dist_vec_per(7, 3) = z_cc(k) - z_pcen + dist_per(7) = sqrt(sum(dist_vec_per(7, :)**2)) + if (dist_per(7) < dist) then + dist = dist_per(7) + dist_vec = dist_vec_per(7, :) + end if + end if + levelset%sf(i, j, k, ib_patch_id) = dist - radius if (f_approx_equal(dist, 0._wp)) then levelset_norm%sf(i, j, k, ib_patch_id, :) = (/1, 0, 0/) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index 114286f53b..d4d59b728a 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -21,7 +21,7 @@ module m_constants integer, parameter :: fourier_rings = 5 !< Fourier filter ring limit integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation - integer, parameter :: num_patches_max = 10 + integer, parameter :: num_patches_max = 1000 integer, parameter :: num_bc_patches_max = 10 integer, parameter :: pathlen_max = 400 integer, parameter :: nnode = 4 !< Number of QBMM nodes diff --git a/src/common/m_ib_patches.fpp b/src/common/m_ib_patches.fpp index de1b1f1908..1070858111 100644 --- a/src/common/m_ib_patches.fpp +++ b/src/common/m_ib_patches.fpp @@ -557,6 +557,8 @@ contains ! Generic loop iterators integer :: i, j, k real(wp) :: radius + real(wp) :: x_domain_beg, x_domain_end, y_domain_beg, y_domain_end, z_domain_beg, z_domain_end + real(wp) :: x_pcen, y_pcen, z_pcen !! Variables to initialize the pressure field that corresponds to the !! bubble-collapse test case found in Tiwari et al. (2013) @@ -577,6 +579,38 @@ contains ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. + call s_mpi_allreduce_min(x_domain%beg, x_domain_beg) + call s_mpi_allreduce_max(x_domain%end, x_domain_end) + call s_mpi_allreduce_min(y_domain%beg, y_domain_beg) + call s_mpi_allreduce_max(y_domain%end, y_domain_end) + call s_mpi_allreduce_min(z_domain%beg, z_domain_beg) + call s_mpi_allreduce_max(z_domain%end, z_domain_end) + + ! periodically projected sphere centroid + if (periodic_ibs) then + if ((x_centroid - x_domain_beg) <= radius) then + x_pcen = x_domain_end + (x_centroid - x_domain_beg) + else if ((x_domain_end - x_centroid) <= radius) then + x_pcen = x_domain_beg - (x_domain_end - x_centroid) + else + x_pcen = x_centroid + end if + if ((y_centroid - y_domain_beg) <= radius) then + y_pcen = y_domain_end + (y_centroid - y_domain_beg) + else if ((y_domain_end - y_centroid) <= radius) then + y_pcen = y_domain_beg - (y_domain_end - y_centroid) + else + y_pcen = y_centroid + end if + if ((z_centroid - z_domain_beg) <= radius) then + z_pcen = z_domain_end + (z_centroid - z_domain_beg) + else if ((z_domain_end - z_centroid) <= radius) then + z_pcen = z_domain_beg - (z_domain_end - z_centroid) + else + z_pcen = z_centroid + end if + end if + do k = 0, p do j = 0, n do i = 0, m @@ -592,6 +626,34 @@ contains + (cart_z - z_centroid)**2 <= radius**2)) then ib_markers_sf(i, j, k) = patch_id end if + + if (periodic_ibs) then ! check every permutation of the projected cell location + if (((x_cc(i) - x_pcen)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_pcen)**2 <= radius**2) & + .or. ((x_cc(i) - x_pcen)**2 & + + (cart_y - y_centroid)**2 & + + (cart_z - z_centroid)**2 <= radius**2) & + .or. ((x_cc(i) - x_pcen)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_centroid)**2 <= radius**2) & + .or. ((x_cc(i) - x_pcen)**2 & + + (cart_y - y_centroid)**2 & + + (cart_z - z_pcen)**2 <= radius**2) & + .or. ((x_cc(i) - x_centroid)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_centroid)**2 <= radius**2) & + .or. ((x_cc(i) - x_centroid)**2 & + + (cart_y - y_pcen)**2 & + + (cart_z - z_pcen)**2 <= radius**2) & + .or. ((x_cc(i) - x_centroid)**2 & + + (cart_y - y_centroid)**2 & + + (cart_z - z_pcen)**2 <= radius**2)) & + then + ! Updating the patch identities bookkeeping variable + ib_markers_sf(i, j, k) = patch_id + end if + end if end do end do end do diff --git a/src/common/m_mpi_common.fpp b/src/common/m_mpi_common.fpp index 0c705e908f..c5f926c51c 100644 --- a/src/common/m_mpi_common.fpp +++ b/src/common/m_mpi_common.fpp @@ -45,13 +45,23 @@ module m_mpi_common integer :: halo_size $:GPU_DECLARE(create='[halo_size]') + real(wp), private, allocatable, dimension(:), target :: buff_send_scalarfield + !! This variable is utilized to pack and send the buffer of any scalar field to neighboring processors + + real(wp), private, allocatable, dimension(:), target :: buff_recv_scalarfield + !! This variable is utilized to receive and unpack the buffer of any scalar field from neighboring processors + +#ifndef __NVCOMPILER_GPU_UNIFIED_MEM + $:GPU_DECLARE(create='[buff_send_scalarfield, buff_recv_scalarfield]') +#endif + contains !> The computation of parameters, the allocation of memory, !! the association of pointers and/or the execution of any !! other procedures that are necessary to setup the module. impure subroutine s_initialize_mpi_common_module - + integer :: halo_size_sf #ifdef MFC_MPI ! Allocating buff_send/recv and. Please note that for the sake of ! simplicity, both variables are provided sufficient storage to hold @@ -87,6 +97,23 @@ contains $:GPU_ENTER_DATA(create='[capture:buff_send]') $:GPU_ENTER_DATA(create='[capture:buff_recv]') #endif + +#ifdef MFC_SIMULATION + if (volume_filtering_momentum_eqn) then + halo_size_sf = nint(-1._wp + 1._wp*buff_size* & + & (m + 2*buff_size + 1)* & + & (n + 2*buff_size + 1)* & + & (p + 2*buff_size + 1)/ & + & (cells_bounds%mnp_min + 2*buff_size + 1)) +#ifndef __NVCOMPILER_GPU_UNIFIED_MEM + @:ALLOCATE(buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) +#else + allocate (buff_send_scalarfield(0:halo_size_sf), buff_recv_scalarfield(0:halo_size_sf)) + $:GPU_ENTER_DATA(create='[capture:buff_send_scalarfield]') + $:GPU_ENTER_DATA(create='[capture:buff_recv_scalarfield]') +#endif + end if +#endif #endif end subroutine s_initialize_mpi_common_module @@ -207,14 +234,18 @@ contains #ifdef MFC_PRE_PROCESS MPI_IO_IB_DATA%var%sf => ib_markers%sf - MPI_IO_levelset_DATA%var%sf => levelset%sf - MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf + if (store_levelset) then + MPI_IO_levelset_DATA%var%sf => levelset%sf + MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf + end if #else MPI_IO_IB_DATA%var%sf => ib_markers%sf(0:m, 0:n, 0:p) #ifndef MFC_POST_PROCESS - MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs) - MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3) + if (store_levelset) then + MPI_IO_levelset_DATA%var%sf => levelset%sf(0:m, 0:n, 0:p, 1:num_ibs) + MPI_IO_levelsetnorm_DATA%var%sf => levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3) + end if #endif #endif @@ -277,6 +308,81 @@ contains end subroutine s_initialize_mpi_data + !! @param filtered_fluid_indicator_function volume filtered fluid indicator function + !! @param stat_q_cons_filtered 1-4 order statistical moments of volume filtered conservative variables + !! @param stat_filtered_pressure 1-4 order statistical moments of volume filtered pressure + !! @param stat_reynolds_stress 1-4 order statistics of reynolds stress tensor + !! @param stat_eff_visc 1-4 order statistics of unclosed effective viscosity tensor + !! @param stat_int_mom_exch 1-4 order statistics of interphase momentum exchange vector + impure subroutine s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + + type(scalar_field), intent(in) :: filtered_fluid_indicator_function + type(vector_field), dimension(5), intent(in) :: stat_q_cons_filtered + type(scalar_field), dimension(1:4), intent(in) :: stat_filtered_pressure + type(vector_field), dimension(1:9), intent(in) :: stat_reynolds_stress + type(vector_field), dimension(1:9), intent(in) :: stat_eff_visc + type(vector_field), dimension(1:3), intent(in) :: stat_int_mom_exch + + integer, dimension(num_dims) :: sizes_glb, sizes_loc + +#ifdef MFC_MPI + + ! Generic loop iterator + integer :: i, j + integer :: ierr !< Generic flag used to identify and report MPI errors + + !total system size + integer :: alt_sys + + alt_sys = sys_size + 1 + 9*4 + 9*4 + 3*4 + 6*4 ! 109 + + MPI_IO_DATA%var(sys_size + 1)%sf => filtered_fluid_indicator_function%sf(0:m, 0:n, 0:p) + do i = 1, 9 + do j = 1, 4 + MPI_IO_DATA%var(sys_size + 1 + (i - 1)*4 + j)%sf => stat_reynolds_stress(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do + end do + do i = 1, 9 + do j = 1, 4 + MPI_IO_DATA%var(sys_size + 37 + (i - 1)*4 + j)%sf => stat_eff_visc(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do + end do + do i = 1, 3 + do j = 1, 4 + MPI_IO_DATA%var(sys_size + 73 + (i - 1)*4 + j)%sf => stat_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do + end do + do i = 1, sys_size - 1 + do j = 1, 4 + MPI_IO_DATA%var(sys_size + 85 + (i - 1)*4 + j)%sf => stat_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p) + end do + end do + do i = 1, 4 + MPI_IO_DATA%var(sys_size + 105 + i)%sf => stat_filtered_pressure(i)%sf(0:m, 0:n, 0:p) + end do + + ! Define global(g) and local(l) sizes for flow variables + sizes_glb(1) = m_glb + 1; sizes_loc(1) = m + 1 + if (n > 0) then + sizes_glb(2) = n_glb + 1; sizes_loc(2) = n + 1 + if (p > 0) then + sizes_glb(3) = p_glb + 1; sizes_loc(3) = p + 1 + end if + end if + + ! Define the view for each variable + do i = sys_size, alt_sys + call MPI_TYPE_CREATE_SUBARRAY(num_dims, sizes_glb, sizes_loc, start_idx, & + MPI_ORDER_FORTRAN, mpi_p, MPI_IO_DATA%view(i), ierr) + call MPI_TYPE_COMMIT(MPI_IO_DATA%view(i), ierr) + end do + +#endif + + end subroutine s_initialize_mpi_data_filtered + !! @param q_cons_vf Conservative variables subroutine s_initialize_mpi_data_ds(q_cons_vf) @@ -1119,6 +1225,225 @@ contains end subroutine s_mpi_sendrecv_variables_buffers + !> The goal of this procedure is to populate the buffers of + !! any scalarfield variable by communicating + !! with the neighboring processors. + !! @param q_comm scalarfield variable + !! @param mpi_dir MPI communication coordinate direction + !! @param pbc_loc Processor boundary condition (PBC) location + subroutine s_mpi_sendrecv_variables_buffers_scalarfield(q_comm, & + mpi_dir, & + pbc_loc) + + type(scalar_field), intent(inout) :: q_comm + integer, intent(in) :: mpi_dir, pbc_loc + + integer :: i, j, k, l, r, q !< Generic loop iterators + + integer :: buffer_counts(1:3), buffer_count + + type(int_bounds_info) :: boundary_conditions(1:3) + integer :: beg_end(1:2), grid_dims(1:3) + integer :: dst_proc, src_proc, recv_tag, send_tag + + logical :: beg_end_geq_0 + + integer :: pack_offset, unpack_offset + +#ifdef MFC_MPI + integer :: ierr !< Generic flag used to identify and report MPI errors + + call nvtxStartRange("RHS-COMM-PACKBUF") + + buffer_counts = (/ & + buff_size*(n + 1)*(p + 1), & + buff_size*(m + 2*buff_size + 1)*(p + 1), & + buff_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & + /) + + buffer_count = buffer_counts(mpi_dir) + boundary_conditions = (/bc_x, bc_y, bc_z/) + beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) + beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 + + ! Implements: + ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc + ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] + ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] + ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] + ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] + + send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) + recv_tag = f_logical_to_int(pbc_loc == 1) + + dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) + src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) + + grid_dims = (/m, n, p/) + + pack_offset = 0 + if (f_xor(pbc_loc == 1, beg_end_geq_0)) then + pack_offset = grid_dims(mpi_dir) - buff_size + 1 + end if + + unpack_offset = 0 + if (pbc_loc == 1) then + unpack_offset = grid_dims(mpi_dir) + buff_size + 1 + end if + + ! Pack Buffer to Send + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + do l = 0, p + do k = 0, n + do j = 0, buff_size - 1 + r = (j + buff_size*(k + (n + 1)*l)) + buff_send_scalarfield(r) = q_comm%sf(j + pack_offset, k, l) + end do + end do + end do + + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + do l = 0, p + do k = 0, buff_size - 1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + (k + buff_size*l)) + buff_send_scalarfield(r) = q_comm%sf(j, k + pack_offset, l) + end do + end do + end do + + #:else + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + do l = 0, buff_size - 1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)*l)) + buff_send_scalarfield(r) = q_comm%sf(j, k, l + pack_offset) + end do + end do + end do + + #:endif + end if + #:endfor + call nvtxEndRange ! Packbuf + + ! Send/Recv +#ifdef MFC_SIMULATION + #:for rdma_mpi in [False, True] + if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then + #:if rdma_mpi + #:call GPU_HOST_DATA(use_device='[buff_send_scalarfield, buff_recv_scalarfield]') + call nvtxStartRange("RHS-COMM-SENDRECV-RDMA") + + call MPI_SENDRECV( & + buff_send_scalarfield, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv_scalarfield, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + #:endcall GPU_HOST_DATA + $:GPU_WAIT() + #:else + call nvtxStartRange("RHS-COMM-DEV2HOST") + $:GPU_UPDATE(host='[buff_send_scalarfield]') + call nvtxEndRange + call nvtxStartRange("RHS-COMM-SENDRECV-NO-RMDA") + + call MPI_SENDRECV( & + buff_send_scalarfield, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv_scalarfield, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + call nvtxEndRange ! RHS-MPI-SENDRECV-(NO)-RDMA + + call nvtxStartRange("RHS-COMM-HOST2DEV") + $:GPU_UPDATE(device='[buff_recv_scalarfield]') + call nvtxEndRange + #:endif + end if + #:endfor +#else + call MPI_SENDRECV( & + buff_send_scalarfield, buffer_count, mpi_p, dst_proc, send_tag, & + buff_recv_scalarfield, buffer_count, mpi_p, src_proc, recv_tag, & + MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) +#endif + + ! Unpack Received Buffer + call nvtxStartRange("RHS-COMM-UNPACKBUF") + #:for mpi_dir in [1, 2, 3] + if (mpi_dir == ${mpi_dir}$) then + #:if mpi_dir == 1 + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + do l = 0, p + do k = 0, n + do j = -buff_size, -1 + r = (j + buff_size*((k + 1) + (n + 1)*l)) + q_comm%sf(j + unpack_offset, k, l) = buff_recv_scalarfield(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_comm%sf(j, k, l))) then + print *, "Error", j, k, l + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + + #:elif mpi_dir == 2 + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + do l = 0, p + do k = -buff_size, -1 + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + buff_size*l)) + q_comm%sf(j, k + unpack_offset, l) = buff_recv_scalarfield(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_comm%sf(j, k, l))) then + print *, "Error", j, k, l + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + + #:else + ! Unpacking buffer from bc_z%beg + $:GPU_PARALLEL_LOOP(collapse=3,private='[r]') + do l = -buff_size, -1 + do k = -buff_size, n + buff_size + do j = -buff_size, m + buff_size + r = ((j + buff_size) + (m + 2*buff_size + 1)* & + ((k + buff_size) + (n + 2*buff_size + 1)* & + (l + buff_size))) + q_comm%sf(j, k, l + unpack_offset) = buff_recv_scalarfield(r) +#if defined(__INTEL_COMPILER) + if (ieee_is_nan(q_comm%sf(j, k, l))) then + print *, "Error", j, k, l + error stop "NaN(s) in recv" + end if +#endif + end do + end do + end do + + #:endif + end if + #:endfor + call nvtxEndRange +#endif + + end subroutine s_mpi_sendrecv_variables_buffers_scalarfield + !> The purpose of this procedure is to optimally decompose !! the computational domain among the available processors. !! This is performed by attempting to award each processor, @@ -1216,6 +1541,16 @@ contains end if end do + + ! Decompose domain into z-slabs + else if (slab_domain_decomposition) then + num_procs_x = 1 + num_procs_y = 1 + num_procs_z = num_procs + ierr = -1 + if (mod((p + 1), num_procs_z) == 0) then + ierr = 0 + end if else if (cyl_coord .and. p > 0) then @@ -1849,6 +2184,12 @@ contains #ifdef MFC_MPI deallocate (buff_send, buff_recv) +#ifdef MFC_SIMULATION + if (volume_filtering_momentum_eqn) then + @:DEALLOCATE(buff_send_scalarfield) + @:DEALLOCATE(buff_recv_scalarfield) + end if +#endif #endif end subroutine s_finalize_mpi_common_module diff --git a/src/post_process/m_data_input.f90 b/src/post_process/m_data_input.f90 index de1517e079..72c200a15c 100644 --- a/src/post_process/m_data_input.f90 +++ b/src/post_process/m_data_input.f90 @@ -64,6 +64,13 @@ end subroutine s_read_abstract_data_files ! type(scalar_field), public :: ib_markers !< type(integer_field), public :: ib_markers + type(scalar_field), public :: filtered_fluid_indicator_function + type(vector_field), allocatable, dimension(:), public :: stat_reynolds_stress + type(vector_field), allocatable, dimension(:), public :: stat_eff_visc + type(vector_field), allocatable, dimension(:), public :: stat_int_mom_exch + type(vector_field), allocatable, dimension(:), public :: stat_q_cons_filtered + type(scalar_field), allocatable, dimension(:), public :: stat_filtered_pressure + procedure(s_read_abstract_data_files), pointer :: s_read_data_files => null() contains @@ -218,6 +225,58 @@ impure subroutine s_allocate_field_arrays(local_start_idx, end_x, end_y, end_z) end subroutine s_allocate_field_arrays + !> Helper subroutine to allocate filtered variable arrays for given dimensionality + !! @param start_idx Starting index for allocation + !! @param end_x, end_y, end_z End indices for each dimension + impure subroutine s_allocate_filtered_arrays(local_start_idx, end_x, end_y, end_z) + + integer, intent(in) :: local_start_idx, end_x, end_y, end_z + integer :: i, j + + allocate (filtered_fluid_indicator_function%sf(local_start_idx:end_x, local_start_idx:end_y, local_start_idx:end_z)) + + do i = 1, sys_size - 1 + allocate (stat_q_cons_filtered(i)%vf(1:4)) + end do + do i = 1, sys_size - 1 + do j = 1, 4 + allocate (stat_q_cons_filtered(i)%vf(j)%sf(local_start_idx:end_x, local_start_idx:end_y, local_start_idx:end_z)) + end do + end do + + do i = 1, 4 + allocate (stat_filtered_pressure(i)%sf(local_start_idx:end_x, local_start_idx:end_y, local_start_idx:end_z)) + end do + + do i = 1, 9 + allocate (stat_reynolds_stress(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + allocate (stat_reynolds_stress(i)%vf(j)%sf(local_start_idx:end_x, local_start_idx:end_y, local_start_idx:end_z)) + end do + end do + + do i = 1, 9 + allocate (stat_eff_visc(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + allocate (stat_eff_visc(i)%vf(j)%sf(local_start_idx:end_x, local_start_idx:end_y, local_start_idx:end_z)) + end do + end do + + do i = 1, 3 + allocate (stat_int_mom_exch(i)%vf(1:4)) + end do + do i = 1, 3 + do j = 1, 4 + allocate (stat_int_mom_exch(i)%vf(j)%sf(local_start_idx:end_x, local_start_idx:end_y, local_start_idx:end_z)) + end do + end do + + end subroutine s_allocate_filtered_arrays + !> This subroutine is called at each time-step that has to !! be post-processed in order to read the raw data files !! present in the corresponding time-step directory and to @@ -449,6 +508,10 @@ impure subroutine s_read_parallel_data_files(t_step) call s_read_parallel_conservative_data(t_step, m_MOK, n_MOK, p_MOK, WP_MOK, MOK, str_MOK, NVARS_MOK) + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + call s_read_parallel_filtered_data(t_step, m_MOK, n_MOK, p_MOK, WP_MOK, MOK, str_MOK, NVARS_MOK) + end if + deallocate (x_cb_glb, y_cb_glb, z_cb_glb) if (bc_io) then @@ -581,6 +644,81 @@ impure subroutine s_read_parallel_conservative_data(t_step, m_MOK, n_MOK, p_MOK, end subroutine s_read_parallel_conservative_data #endif +#ifdef MFC_MPI + !> Helper subroutine to read parallel volume filtered data + !! @param t_step Current time-step + !! @param m_MOK, n_MOK, p_MOK MPI offset kinds for dimensions + !! @param WP_MOK, MOK, str_MOK, NVARS_MOK Other MPI offset kinds + impure subroutine s_read_parallel_filtered_data(t_step, m_MOK, n_MOK, p_MOK, WP_MOK, MOK, str_MOK, NVARS_MOK) + + integer, intent(in) :: t_step + integer(KIND=MPI_OFFSET_KIND), intent(inout) :: m_MOK, n_MOK, p_MOK + integer(KIND=MPI_OFFSET_KIND), intent(inout) :: WP_MOK, MOK, str_MOK, NVARS_MOK + + integer :: ifile, ierr, data_size + integer, dimension(MPI_STATUS_SIZE) :: status + integer(KIND=MPI_OFFSET_KIND) :: disp, var_MOK + character(LEN=path_len + 2*name_len) :: file_loc + logical :: file_exist + character(len=10) :: t_step_string + integer :: alt_sys + integer :: i + + ! Open the file to read volume filtered variables + write (file_loc, '(I0,A)') t_step, '.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + + if (file_exist) then + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + + ! Initialize MPI data I/O + + call s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + + ! Size of local arrays + data_size = (m + 1)*(n + 1)*(p + 1) + + ! Total size of system + alt_sys = sys_size + 1 + 9*4 + 9*4 + 3*4 + 6*4 ! 109, filtered indicator, stats of: R_u, R_mu, F_imet, q_cons_filtered + + ! Resize some integers so MPI can read even the biggest file + m_MOK = int(m_glb + 1, MPI_OFFSET_KIND) + n_MOK = int(n_glb + 1, MPI_OFFSET_KIND) + p_MOK = int(p_glb + 1, MPI_OFFSET_KIND) + WP_MOK = int(8._wp, MPI_OFFSET_KIND) + MOK = int(1._wp, MPI_OFFSET_KIND) + str_MOK = int(name_len, MPI_OFFSET_KIND) + NVARS_MOK = int(alt_sys, MPI_OFFSET_KIND) + + call s_setup_mpi_io_params(data_size, m_MOK, n_MOK, p_MOK, WP_MOK, MOK, str_MOK, NVARS_MOK) + + ! Read the data for each variable + do i = sys_size, alt_sys + var_MOK = int(i, MPI_OFFSET_KIND) + + ! Initial displacement to skip at beginning of file + disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) + + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & + mpi_p, status, ierr) + end do + + call s_mpi_barrier() + call MPI_FILE_CLOSE(ifile, ierr) + + call s_read_ib_data_files(trim(case_dir)//'/restart_data'//trim(mpiiofs)) + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if + + end subroutine s_read_parallel_filtered_data +#endif + !> Computation of parameters, allocation procedures, and/or !! any other tasks needed to properly setup the module impure subroutine s_initialize_data_input_module @@ -594,10 +732,20 @@ impure subroutine s_initialize_data_input_module allocate (q_prim_vf(1:sys_size)) allocate (q_cons_temp(1:sys_size)) + if (q_filtered_wrt) then + allocate (stat_q_cons_filtered(1:sys_size - 1)) + allocate (stat_filtered_pressure(1:4)) + allocate (stat_reynolds_stress(1:9)) + allocate (stat_eff_visc(1:9)) + allocate (stat_int_mom_exch(1:3)) + end if + ! Allocating the parts of the conservative and primitive variables ! that do require the direct knowledge of the dimensionality of ! the simulation using helper subroutine + if (q_filtered_wrt) call s_allocate_filtered_arrays(-buff_size, m + buff_size, n + buff_size, p + buff_size) + ! Simulation is at least 2D if (n > 0) then ! Simulation is 3D @@ -642,7 +790,7 @@ end subroutine s_initialize_data_input_module !> Deallocation procedures for the module impure subroutine s_finalize_data_input_module - integer :: i !< Generic loop iterator + integer :: i, j !< Generic loop iterator ! Deallocating the conservative and primitive variables do i = 1, sys_size @@ -677,6 +825,47 @@ impure subroutine s_finalize_data_input_module s_read_data_files => null() + if (q_filtered_wrt) then + deallocate (filtered_fluid_indicator_function%sf) + + do i = 1, sys_size - 1 + do j = 1, 4 + deallocate (stat_q_cons_filtered(i)%vf(j)%sf) + end do + deallocate (stat_q_cons_filtered(i)%vf) + end do + deallocate (stat_q_cons_filtered) + + do i = 1, 4 + deallocate (stat_filtered_pressure(i)%sf) + end do + deallocate (stat_filtered_pressure) + + do i = 1, 9 + do j = 1, 4 + deallocate (stat_reynolds_stress(i)%vf(j)%sf) + end do + deallocate (stat_reynolds_stress(i)%vf) + end do + deallocate (stat_reynolds_stress) + + do i = 1, 9 + do j = 1, 4 + deallocate (stat_eff_visc(i)%vf(j)%sf) + end do + deallocate (stat_eff_visc(i)%vf) + end do + deallocate (stat_eff_visc) + + do i = 1, 3 + do j = 1, 4 + deallocate (stat_int_mom_exch(i)%vf(j)%sf) + end do + deallocate (stat_int_mom_exch(i)%vf) + end do + deallocate (stat_int_mom_exch) + end if + end subroutine s_finalize_data_input_module end module m_data_input diff --git a/src/post_process/m_global_parameters.fpp b/src/post_process/m_global_parameters.fpp index 2274e1152f..930123a6d9 100644 --- a/src/post_process/m_global_parameters.fpp +++ b/src/post_process/m_global_parameters.fpp @@ -348,6 +348,10 @@ module m_global_parameters real(wp) :: Bx0 !< Constant magnetic field in the x-direction (1D) real(wp) :: wall_time, wall_time_avg !< Wall time measurements + logical :: periodic_ibs !< Periodic immersed boundaries + logical :: store_levelset !< Store immersed boundary levelset info + logical :: slab_domain_decomposition !< MPI domain decomposition into slabs + logical :: q_filtered_wrt !< write FFT filtered quantities contains @@ -517,6 +521,11 @@ contains ! MHD Bx0 = dflt_real + periodic_ibs = .false. + store_levelset = .true. + slab_domain_decomposition = .false. + q_filtered_wrt = .false. + end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or @@ -832,16 +841,25 @@ contains chemxe = species_idx%end #ifdef MFC_MPI - allocate (MPI_IO_DATA%view(1:sys_size)) - allocate (MPI_IO_DATA%var(1:sys_size)) - do i = 1, sys_size - if (down_sample) then - allocate (MPI_IO_DATA%var(i)%sf(-1:m + 1, -1:n + 1, -1:p + 1)) - else + if (q_filtered_wrt) then + allocate (MPI_IO_DATA%view(1:sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4)) + allocate (MPI_IO_DATA%var(1:sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4)) + do i = 1, sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4 allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) - end if - MPI_IO_DATA%var(i)%sf => null() - end do + MPI_IO_DATA%var(i)%sf => null() + end do + else + allocate (MPI_IO_DATA%view(1:sys_size)) + allocate (MPI_IO_DATA%var(1:sys_size)) + do i = 1, sys_size + if (down_sample) then + allocate (MPI_IO_DATA%var(i)%sf(-1:m + 1, -1:n + 1, -1:p + 1)) + else + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + end if + MPI_IO_DATA%var(i)%sf => null() + end do + end if if (ib) allocate (MPI_IO_IB_DATA%var%sf(0:m, 0:n, 0:p)) #endif @@ -1016,6 +1034,12 @@ contains MPI_IO_DATA%var(i)%sf => null() end do + if (q_filtered_wrt) then + do i = sys_size + 1, sys_size + 1 + 4*9 + 4*9 + 3*4 + 6*4 + MPI_IO_DATA%var(i)%sf => null() + end do + end if + deallocate (MPI_IO_DATA%var) deallocate (MPI_IO_DATA%view) end if diff --git a/src/post_process/m_mpi_proxy.fpp b/src/post_process/m_mpi_proxy.fpp index 8d72568d3e..7d8f15ee5e 100644 --- a/src/post_process/m_mpi_proxy.fpp +++ b/src/post_process/m_mpi_proxy.fpp @@ -105,7 +105,8 @@ contains & 'adv_n', 'ib', 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', & & 'surface_tension', 'hyperelasticity', 'bubbles_lagrange', & & 'output_partial_domain', 'relativity', 'cont_damage', 'bc_io', & - & 'down_sample','fft_wrt' ] + & 'down_sample','fft_wrt', 'periodic_ibs', 'store_levelset', & + & 'slab_domain_decomposition', 'q_filtered_wrt' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor diff --git a/src/post_process/m_start_up.fpp b/src/post_process/m_start_up.fpp index 151f25ccee..94892f1ced 100644 --- a/src/post_process/m_start_up.fpp +++ b/src/post_process/m_start_up.fpp @@ -118,7 +118,9 @@ contains lag_id_wrt, lag_pos_wrt, lag_pos_prev_wrt, lag_vel_wrt, & lag_rad_wrt, lag_rvel_wrt, lag_r0_wrt, lag_rmax_wrt, & lag_rmin_wrt, lag_dphidt_wrt, lag_pres_wrt, lag_mv_wrt, & - lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt + lag_mg_wrt, lag_betaT_wrt, lag_betaC_wrt, & + periodic_ibs, store_levelset, slab_domain_decomposition, & + q_filtered_wrt ! Inquiring the status of the post_process.inp file file_loc = 'post_process.inp' @@ -374,6 +376,61 @@ contains end if end do + ! Adding filtered quantities + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + ! filtered fluid indicator + q_sf = filtered_fluid_indicator_function%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A)') 'filtered_fluid_indicator_function' + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + + ! filtered vars stats + do i = 1, 9 + do j = 1, 4 + q_sf = stat_reynolds_stress(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_reynolds_stress', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + end do + do i = 1, 9 + do j = 1, 4 + q_sf = stat_eff_visc(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_eff_visc', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + end do + do i = 1, 3 + do j = 1, 4 + q_sf = stat_int_mom_exch(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_int_mom_exch', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + end do + do i = 1, sys_size - 1 + do j = 1, 4 + q_sf = stat_q_cons_filtered(i)%vf(j)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0,A,I0)') 'stat_q_cons_filtered', i, '_m', j + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + end do + do i = 1, 4 + q_sf = stat_filtered_pressure(i)%sf(x_beg:x_end, y_beg:y_end, z_beg:z_end) + write (varname, '(A,I0)') 'stat_filtered_pressure_m', i + call s_write_variable_to_formatted_database_file(varname, t_step) + + varname(:) = ' ' + end do + end if + ! Adding the species' concentrations to the formatted database file if (chemistry) then do i = 1, num_species diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp index fb26a14184..f856d85b21 100644 --- a/src/pre_process/m_data_output.fpp +++ b/src/pre_process/m_data_output.fpp @@ -825,45 +825,47 @@ contains call MPI_FILE_CLOSE(ifile, ierr) - ! Levelset - write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) + if (store_levelset) then + ! Levelset + write (file_loc, '(A)') 'levelset.dat' + file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist .and. proc_rank == 0) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - ! Initial displacement to skip at beginning of file - disp = 0 + ! Initial displacement to skip at beginning of file + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelset_DATA%var%sf, data_size*num_ibs, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelset_DATA%var%sf, data_size*num_ibs, & + mpi_p, status, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_CLOSE(ifile, ierr) - ! Levelset Norm - write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist .and. proc_rank == 0) then - call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) - end if - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & - mpi_info_int, ifile, ierr) + ! Levelset Norm + write (file_loc, '(A)') 'levelset_norm.dat' + file_loc = trim(restart_dir)//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) + if (file_exist .and. proc_rank == 0) then + call MPI_FILE_DELETE(file_loc, mpi_info_int, ierr) + end if + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, ior(MPI_MODE_WRONLY, MPI_MODE_CREATE), & + mpi_info_int, ifile, ierr) - ! Initial displacement to skip at beginning of file - disp = 0 + ! Initial displacement to skip at beginning of file + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size*num_ibs*3, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size*num_ibs*3, & + mpi_p, status, ierr) - call MPI_FILE_CLOSE(ifile, ierr) + call MPI_FILE_CLOSE(ifile, ierr) + end if end if if (ib) then diff --git a/src/pre_process/m_global_parameters.fpp b/src/pre_process/m_global_parameters.fpp index 00d979fd7b..972c57cfc5 100644 --- a/src/pre_process/m_global_parameters.fpp +++ b/src/pre_process/m_global_parameters.fpp @@ -289,6 +289,9 @@ module m_global_parameters !! to the next time-step. logical :: fft_wrt + logical :: periodic_ibs + logical :: store_levelset + logical :: slab_domain_decomposition contains @@ -576,6 +579,10 @@ contains Bx0 = dflt_real + periodic_ibs = .false. + store_levelset = .true. + slab_domain_decomposition = .false. + end subroutine s_assign_default_values_to_user_inputs !> Computation of parameters, allocation procedures, and/or diff --git a/src/pre_process/m_mpi_proxy.fpp b/src/pre_process/m_mpi_proxy.fpp index b45526164c..a67b5b44e4 100644 --- a/src/pre_process/m_mpi_proxy.fpp +++ b/src/pre_process/m_mpi_proxy.fpp @@ -59,7 +59,8 @@ contains & 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'hyperelasticity', 'pre_stress', 'elliptic_smoothing', 'viscous',& & 'bubbles_lagrange', 'bc_io', 'mhd', 'relativity', 'cont_damage', & - & 'igr', 'down_sample','fft_wrt' ] + & 'igr', 'down_sample','fft_wrt', & + & 'periodic_ibs', 'store_levelset', 'slab_domain_decomposition' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor call MPI_BCAST(fluid_rho(1), num_fluids_max, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) diff --git a/src/pre_process/m_start_up.fpp b/src/pre_process/m_start_up.fpp index 2129ae74ec..555d43acd6 100644 --- a/src/pre_process/m_start_up.fpp +++ b/src/pre_process/m_start_up.fpp @@ -152,7 +152,8 @@ contains elliptic_smoothing, elliptic_smoothing_iters, & viscous, bubbles_lagrange, bc_x, bc_y, bc_z, num_bc_patches, & patch_bc, Bx0, relativity, cont_damage, igr, igr_order, & - down_sample, recon_type, muscl_order, fft_wrt + down_sample, recon_type, muscl_order, fft_wrt, & + periodic_ibs, store_levelset, slab_domain_decomposition ! Inquiring the status of the pre_process.inp file file_loc = 'pre_process.inp' diff --git a/src/simulation/m_additional_forcing.fpp b/src/simulation/m_additional_forcing.fpp new file mode 100644 index 0000000000..a6b391cc67 --- /dev/null +++ b/src/simulation/m_additional_forcing.fpp @@ -0,0 +1,181 @@ +#:include 'macros.fpp' + +module m_additional_forcing + use m_derived_types + + use m_global_parameters + + use m_ibm + + use m_mpi_proxy + + use m_volume_filtering + + implicit none + + private; public :: s_initialize_additional_forcing_module, & + s_finalize_additional_forcing_module, s_compute_periodic_forcing + + type(scalar_field), allocatable, dimension(:) :: q_periodic_force + real(wp) :: volfrac_phi + integer :: N_x_total_glb + real(wp) :: avg_coeff + real(wp) :: spatial_rho, spatial_u, spatial_eps + real(wp), allocatable, dimension(:) :: rho_window, u_window, eps_window + real(wp) :: sum_rho, sum_u, sum_eps + real(wp) :: phase_rho, phase_u, phase_eps + real(wp) :: tau_dt + integer :: forcing_window, window_loc, window_fill + + $:GPU_DECLARE(create='[q_periodic_force, volfrac_phi, N_x_total_glb, avg_coeff, tau_dt]') + $:GPU_DECLARE(create='[spatial_rho, spatial_u, spatial_eps, phase_rho, phase_u, phase_eps]') + +contains + + subroutine s_initialize_additional_forcing_module + integer :: i + + @:ALLOCATE(q_periodic_force(1:3)) + do i = 1, 3 + @:ALLOCATE(q_periodic_force(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(q_periodic_force(i)) + end do + + volfrac_phi = num_ibs*4._wp/3._wp*pi*patch_ib(1)%radius**3/((x_domain%end - x_domain%beg)*(y_domain%end - y_domain%beg)*(z_domain%end - z_domain%beg)) + $:GPU_UPDATE(device='[volfrac_phi]') + + N_x_total_glb = (m_glb + 1)*(n_glb + 1)*(p_glb + 1) + $:GPU_UPDATE(device='[N_x_total_glb]') + + avg_coeff = 1._wp/(real(N_x_total_glb, wp)*(1._wp - volfrac_phi)) + $:GPU_UPDATE(device='[avg_coeff]') + + forcing_window = 1 ! forcing time window size + window_loc = 0 + window_fill = 0 + + @:ALLOCATE(rho_window(forcing_window)) + @:ALLOCATE(u_window(forcing_window)) + @:ALLOCATE(eps_window(forcing_window)) + + rho_window = 0.0_wp + u_window = 0.0_wp + eps_window = 0.0_wp + + sum_rho = 0.0_wp + sum_u = 0.0_wp + sum_eps = 0.0_wp + + phase_rho = 0._wp + phase_u = 0._wp + phase_eps = 0._wp + + tau_dt = 1._wp/(0.5_wp*dt) + $:GPU_UPDATE(device='[tau_dt]') + + end subroutine s_initialize_additional_forcing_module + + !< compute the space and time average of quantities, compute the periodic forcing terms described in Khalloufi and Capecelatro + subroutine s_compute_periodic_forcing(rhs_vf, q_cons_vf, q_prim_vf, t_step) + type(scalar_field), dimension(sys_size), intent(in) :: rhs_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + integer, intent(in) :: t_step + real(wp) :: spatial_rho_glb, spatial_u_glb, spatial_eps_glb + integer :: i, j, k + + ! zero spatial averages + spatial_rho = 0._wp + spatial_u = 0._wp + spatial_eps = 0._wp + $:GPU_UPDATE(device='[spatial_rho, spatial_u, spatial_eps]') + + ! compute spatial averages + $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[spatial_rho, spatial_u, spatial_eps]]', reductionOp='[+]') + do i = 0, m + do j = 0, n + do k = 0, p + spatial_rho = spatial_rho + (q_cons_vf(1)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k)) ! rho + spatial_u = spatial_u + (q_cons_vf(2)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k)) ! rho*u + spatial_eps = spatial_eps + ((q_cons_vf(5)%sf(i, j, k) - 0.5_wp*( & + q_cons_vf(2)%sf(i, j, k)**2 + & + q_cons_vf(3)%sf(i, j, k)**2 + & + q_cons_vf(4)%sf(i, j, k)**2)/q_cons_vf(1)%sf(i, j, k))*fluid_indicator_function%sf(i, j, k)) ! rho*e + end do + end do + end do + + $:GPU_UPDATE(host='[spatial_rho, spatial_u, spatial_eps]') + + ! reduction sum across entire domain + call s_mpi_allreduce_sum(spatial_rho, spatial_rho_glb) + call s_mpi_allreduce_sum(spatial_u, spatial_u_glb) + call s_mpi_allreduce_sum(spatial_eps, spatial_eps_glb) + + spatial_rho_glb = spatial_rho_glb*avg_coeff + spatial_u_glb = spatial_u_glb*avg_coeff + spatial_eps_glb = spatial_eps_glb*avg_coeff + + ! update time average window location + window_loc = 1 + mod(t_step - 1, forcing_window) + + ! update time average sum + sum_rho = sum_rho - rho_window(window_loc) + spatial_rho_glb + sum_u = sum_u - u_window(window_loc) + spatial_u_glb + sum_eps = sum_eps - eps_window(window_loc) + spatial_eps_glb + + ! update window arrays + rho_window(window_loc) = spatial_rho_glb + u_window(window_loc) = spatial_u_glb + eps_window(window_loc) = spatial_eps_glb + + ! update number of time samples + if (window_fill < forcing_window) window_fill = window_fill + 1 + + ! compute phase averages + phase_rho = sum_rho/real(window_fill, wp) + phase_u = sum_u/real(window_fill, wp) + phase_eps = sum_eps/real(window_fill, wp) + $:GPU_UPDATE(device='[phase_rho, phase_u, phase_eps]') + + ! compute periodic forcing terms for mass, momentum, energy + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + ! f_rho + q_periodic_force(1)%sf(i, j, k) = (rho_inf_ref - phase_rho)*tau_dt + + ! f_u + q_periodic_force(2)%sf(i, j, k) = (rho_inf_ref*u_inf_ref - phase_u)*tau_dt + + ! f_E + q_periodic_force(3)%sf(i, j, k) = (P_inf_ref*gammas(1) - phase_eps)*tau_dt & + + q_cons_vf(2)%sf(i, j, k)*q_periodic_force(2)%sf(i, j, k)/q_cons_vf(1)%sf(i, j, k) + end do + end do + end do + + ! add the forcing terms to the RHS + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + rhs_vf(1)%sf(i, j, k) = rhs_vf(1)%sf(i, j, k) + q_periodic_force(1)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) ! continuity + rhs_vf(2)%sf(i, j, k) = rhs_vf(2)%sf(i, j, k) + q_periodic_force(2)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) ! x momentum + rhs_vf(5)%sf(i, j, k) = rhs_vf(5)%sf(i, j, k) + q_periodic_force(3)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) ! energy + end do + end do + end do + + end subroutine s_compute_periodic_forcing + + subroutine s_finalize_additional_forcing_module + integer :: i + do i = 1, 3 + @:DEALLOCATE(q_periodic_force(i)%sf) + end do + @:DEALLOCATE(q_periodic_force) + end subroutine s_finalize_additional_forcing_module + +end module m_additional_forcing diff --git a/src/simulation/m_checker.fpp b/src/simulation/m_checker.fpp index da82e0f37b..c0ca8e851a 100644 --- a/src/simulation/m_checker.fpp +++ b/src/simulation/m_checker.fpp @@ -401,6 +401,14 @@ contains impure subroutine s_check_inputs_misc @:PROHIBIT(probe_wrt .and. fd_order == dflt_int, "fd_order must be specified for probe_wrt") @:PROHIBIT(integral_wrt .and. (.not. bubbles_euler)) + #:for X in ['x', 'y', 'z'] + #:for BOUND in ['beg', 'end'] + @:PROHIBIT(periodic_forcing .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & + "Periodic forcing requires all BCs to be periodic") + @:PROHIBIT(volume_filtering_momentum_eqn .and. bc_${X}$%${BOUND}$ /= BC_PERIODIC, & + "Explicit filtering of flow data requires all BCs to be periodic due to fourier transform") + #:endfor + #:endfor end subroutine s_check_inputs_misc impure subroutine s_check_inputs_mhd diff --git a/src/simulation/m_compute_statistics.fpp b/src/simulation/m_compute_statistics.fpp new file mode 100644 index 0000000000..a8ef036fa4 --- /dev/null +++ b/src/simulation/m_compute_statistics.fpp @@ -0,0 +1,316 @@ +#:include 'macros.fpp' + +module m_compute_statistics + use m_derived_types + + use m_global_parameters + + use m_mpi_proxy + + use m_additional_forcing + + use m_nvtx + + implicit none + + private; public :: s_initialize_statistics_module, s_finalize_statistics_module, & + s_compute_statistics_momentum_unclosed_terms, s_update_statistics, & + s_compute_statistical_moments + + ! terms for computing 1st, 2nd, 3rd, and 4th order statistical moments + type(vector_field), allocatable, dimension(:) :: Msn_reynolds_stress + type(vector_field), allocatable, dimension(:) :: Msn_eff_visc + type(vector_field), allocatable, dimension(:) :: Msn_int_mom_exch + type(vector_field), allocatable, dimension(:) :: Msn_q_cons_filtered + type(scalar_field), allocatable, dimension(:) :: Msn_filtered_pressure + + ! 1st, 2nd, 3rd, and 4th statistical moments for unclosed terms in volume filtered momentum equation + type(vector_field), allocatable, dimension(:), public :: stat_reynolds_stress + type(vector_field), allocatable, dimension(:), public :: stat_eff_visc + type(vector_field), allocatable, dimension(:), public :: stat_int_mom_exch + type(vector_field), allocatable, dimension(:), public :: stat_q_cons_filtered + type(scalar_field), allocatable, dimension(:), public :: stat_filtered_pressure + + $:GPU_DECLARE(create='[Msn_reynolds_stress, Msn_eff_visc, Msn_int_mom_exch, Msn_q_cons_filtered, Msn_filtered_pressure]') + + $:GPU_DECLARE(create='[stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch, stat_q_cons_filtered, stat_filtered_pressure]') + +contains + + subroutine s_initialize_statistics_module + integer :: i, j + + @:ALLOCATE(Msn_reynolds_stress(1:9)) + do i = 1, 9 + @:ALLOCATE(Msn_reynolds_stress(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(Msn_reynolds_stress(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_reynolds_stress(i)) + end do + + @:ALLOCATE(Msn_eff_visc(1:9)) + do i = 1, 9 + @:ALLOCATE(Msn_eff_visc(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(Msn_eff_visc(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_eff_visc(i)) + end do + + @:ALLOCATE(Msn_int_mom_exch(1:3)) + do i = 1, 3 + @:ALLOCATE(Msn_int_mom_exch(i)%vf(1:4)) + end do + do i = 1, 3 + do j = 1, 4 + @:ALLOCATE(Msn_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_int_mom_exch(i)) + end do + + @:ALLOCATE(Msn_q_cons_filtered(1:sys_size-1)) + do i = 1, sys_size - 1 + @:ALLOCATE(Msn_q_cons_filtered(i)%vf(1:4)) + end do + do i = 1, sys_size - 1 + do j = 1, 4 + @:ALLOCATE(Msn_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(Msn_q_cons_filtered) + end do + + @:ALLOCATE(Msn_filtered_pressure(1:4)) + do i = 1, 4 + @:ALLOCATE(Msn_filtered_pressure(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(Msn_filtered_pressure(i)) + end do + + @:ALLOCATE(stat_reynolds_stress(1:9)) + do i = 1, 9 + @:ALLOCATE(stat_reynolds_stress(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(stat_reynolds_stress(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_reynolds_stress(i)) + end do + + @:ALLOCATE(stat_eff_visc(1:9)) + do i = 1, 9 + @:ALLOCATE(stat_eff_visc(i)%vf(1:4)) + end do + do i = 1, 9 + do j = 1, 4 + @:ALLOCATE(stat_eff_visc(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_eff_visc(i)) + end do + + @:ALLOCATE(stat_int_mom_exch(1:3)) + do i = 1, 3 + @:ALLOCATE(stat_int_mom_exch(i)%vf(1:4)) + end do + do i = 1, 3 + do j = 1, 4 + @:ALLOCATE(stat_int_mom_exch(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_int_mom_exch(i)) + end do + + @:ALLOCATE(stat_q_cons_filtered(1:sys_size-1)) + do i = 1, sys_size - 1 + @:ALLOCATE(stat_q_cons_filtered(i)%vf(1:4)) + end do + do i = 1, sys_size - 1 + do j = 1, 4 + @:ALLOCATE(stat_q_cons_filtered(i)%vf(j)%sf(0:m, 0:n, 0:p)) + end do + @:ACC_SETUP_VFs(stat_q_cons_filtered) + end do + + @:ALLOCATE(stat_filtered_pressure(1:4)) + do i = 1, 4 + @:ALLOCATE(stat_filtered_pressure(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(stat_filtered_pressure(i)) + end do + + end subroutine s_initialize_statistics_module + + subroutine s_compute_statistics_momentum_unclosed_terms(t_step, t_step_stat_start, reynolds_stress, eff_visc, int_mom_exch, q_cons_filtered, filtered_pressure) + type(vector_field), dimension(1:3), intent(in) :: reynolds_stress + type(vector_field), dimension(1:3), intent(in) :: eff_visc + type(scalar_field), dimension(1:3), intent(in) :: int_mom_exch + type(scalar_field), dimension(sys_size - 1), intent(in) :: q_cons_filtered + type(scalar_field), intent(in) :: filtered_pressure + integer, intent(in) :: t_step + integer, intent(in) :: t_step_stat_start + + real(wp) :: ns + integer :: i, j + + ns = real(t_step - t_step_stat_start, wp) + + ! update M1, M2, M3, M4 + do i = 1, 3 + do j = 1, 3 + call s_update_statistics(ns, reynolds_stress(i)%vf(j), Msn_reynolds_stress((i - 1)*3 + j)%vf) + call s_update_statistics(ns, eff_visc(i)%vf(j), Msn_eff_visc((i - 1)*3 + j)%vf) + end do + call s_update_statistics(ns, int_mom_exch(i), Msn_int_mom_exch(i)%vf) + end do + do i = 1, sys_size - 1 + call s_update_statistics(ns, q_cons_filtered(i), Msn_q_cons_filtered(i)%vf) + end do + call s_update_statistics(ns, filtered_pressure, Msn_filtered_pressure) + + ! compute 1st, 2nd, 3rd, 4th order statistical moments + if (t_step == t_step_stop - 1) then ! only compute at final time + do i = 1, 3 + do j = 1, 3 + call s_compute_statistical_moments(ns, Msn_reynolds_stress((i - 1)*3 + j)%vf, stat_reynolds_stress((i - 1)*3 + j)%vf) + call s_compute_statistical_moments(ns, Msn_eff_visc((i - 1)*3 + j)%vf, stat_eff_visc((i - 1)*3 + j)%vf) + end do + call s_compute_statistical_moments(ns, Msn_int_mom_exch(i)%vf, stat_int_mom_exch(i)%vf) + end do + do i = 1, sys_size - 1 + call s_compute_statistical_moments(ns, Msn_q_cons_filtered(i)%vf, stat_q_cons_filtered(i)%vf) + end do + call s_compute_statistical_moments(ns, Msn_filtered_pressure, stat_filtered_pressure) + end if + + end subroutine s_compute_statistics_momentum_unclosed_terms + + subroutine s_update_statistics(ns, q_temp, Msn) + type(scalar_field), intent(in) :: q_temp + type(scalar_field), dimension(1:4), intent(inout) :: Msn + + real(wp), intent(in) :: ns + real(wp) :: delta, delta_n, delta_n2, delta_f + integer :: i, j, k + + $:GPU_PARALLEL_LOOP(collapse=3, copyin='[ns]', private='[delta, delta_n, delta_n2, delta_f]') + do i = 0, m + do j = 0, n + do k = 0, p + delta = q_temp%sf(i, j, k) - Msn(1)%sf(i, j, k) + delta_n = delta/ns + delta_n2 = delta_n**2 + delta_f = delta*delta_n*(ns - 1._wp) + + Msn(1)%sf(i, j, k) = Msn(1)%sf(i, j, k) + delta_n + Msn(4)%sf(i, j, k) = Msn(4)%sf(i, j, k) + delta_f*delta_n2*(ns**2 - 3._wp*ns + 3._wp) + 6._wp*delta_n2*Msn(2)%sf(i, j, k) - 4._wp*delta_n*Msn(3)%sf(i, j, k) + Msn(3)%sf(i, j, k) = Msn(3)%sf(i, j, k) + delta_f*delta_n*(ns - 2._wp) - 3._wp*delta_n*Msn(2)%sf(i, j, k) + Msn(2)%sf(i, j, k) = Msn(2)%sf(i, j, k) + delta_f + end do + end do + end do + + end subroutine s_update_statistics + + subroutine s_compute_statistical_moments(ns, Msn, q_stat) + type(scalar_field), dimension(1:4), intent(in) :: Msn + type(scalar_field), dimension(1:4), intent(inout) :: q_stat + + real(wp), intent(in) :: ns + integer :: i, j, k + + $:GPU_PARALLEL_LOOP(collapse=3, copyin='[ns]') + do i = 0, m + do j = 0, n + do k = 0, p + q_stat(1)%sf(i, j, k) = Msn(1)%sf(i, j, k) + q_stat(2)%sf(i, j, k) = Msn(2)%sf(i, j, k)/(ns - 1._wp) + q_stat(3)%sf(i, j, k) = sqrt(ns - 1._wp)/(ns - 2._wp)*ns*Msn(3)%sf(i, j, k)/(Msn(2)%sf(i, j, k)**1.5) + q_stat(4)%sf(i, j, k) = (ns - 1._wp)/((ns - 2._wp)*(ns - 3._wp))*((ns + 1._wp)*(ns*Msn(4)%sf(i, j, k)/(Msn(2)%sf(i, j, k)**2) - 3._wp) + 6._wp) + end do + end do + end do + + end subroutine s_compute_statistical_moments + + subroutine s_finalize_statistics_module + integer :: i, j + + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(Msn_reynolds_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_reynolds_stress(i)%vf) + end do + @:DEALLOCATE(Msn_reynolds_stress) + + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(Msn_eff_visc(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_eff_visc(i)%vf) + end do + @:DEALLOCATE(Msn_eff_visc) + + do i = 1, 3 + do j = 1, 4 + @:DEALLOCATE(Msn_int_mom_exch(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_int_mom_exch(i)%vf) + end do + @:DEALLOCATE(Msn_int_mom_exch) + + do i = 1, sys_size - 1 + do j = 1, 4 + @:DEALLOCATE(Msn_q_cons_filtered(i)%vf(j)%sf) + end do + @:DEALLOCATE(Msn_q_cons_filtered(i)%vf) + end do + @:DEALLOCATE(Msn_q_cons_filtered) + + do i = 1, 4 + @:DEALLOCATE(Msn_filtered_pressure(i)%sf) + end do + @:DEALLOCATE(Msn_filtered_pressure) + + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(stat_reynolds_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_reynolds_stress(i)%vf) + end do + @:DEALLOCATE(stat_reynolds_stress) + + do i = 1, 9 + do j = 1, 4 + @:DEALLOCATE(stat_eff_visc(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_eff_visc(i)%vf) + end do + @:DEALLOCATE(stat_eff_visc) + + do i = 1, 3 + do j = 1, 4 + @:DEALLOCATE(stat_int_mom_exch(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_int_mom_exch(i)%vf) + end do + @:DEALLOCATE(stat_int_mom_exch) + + do i = 1, sys_size - 1 + do j = 1, 4 + @:DEALLOCATE(stat_q_cons_filtered(i)%vf(j)%sf) + end do + @:DEALLOCATE(stat_q_cons_filtered(i)%vf) + end do + @:DEALLOCATE(stat_q_cons_filtered) + + do i = 1, 4 + @:DEALLOCATE(stat_filtered_pressure(i)%sf) + end do + @:DEALLOCATE(stat_filtered_pressure) + + end subroutine s_finalize_statistics_module + +end module m_compute_statistics diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 9e46bdd775..c2568ab415 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -82,7 +82,10 @@ contains !! @param q_cons_vf Conservative variables !! @param q_prim_vf Primitive variables !! @param t_step Current time step - impure subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta) + impure subroutine s_write_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta, & + filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), & dimension(sys_size), & @@ -104,10 +107,20 @@ contains dimension(1:num_dims, -1:1), & intent(in) :: bc_type + type(scalar_field), intent(inout), optional :: filtered_fluid_indicator_function + type(vector_field), dimension(1:9), intent(inout), optional :: stat_reynolds_stress + type(vector_field), dimension(1:9), intent(inout), optional :: stat_eff_visc + type(vector_field), dimension(1:3), intent(inout), optional :: stat_int_mom_exch + type(vector_field), dimension(1:5), intent(inout), optional :: stat_q_cons_filtered + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_filtered_pressure + if (.not. parallel_io) then call s_write_serial_data_files(q_cons_vf, q_T_sf, q_prim_vf, t_step, bc_type, beta) else - call s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta) + call s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta, & + filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) end if end subroutine s_write_data_files @@ -783,11 +796,21 @@ contains !! @param q_cons_vf Cell-average conservative variables !! @param t_step Current time-step !! @param beta Eulerian void fraction from lagrangian bubbles - impure subroutine s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta) + impure subroutine s_write_parallel_data_files(q_cons_vf, t_step, bc_type, beta, & + filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf integer, intent(in) :: t_step type(scalar_field), intent(inout), optional :: beta + type(scalar_field), intent(inout), optional :: filtered_fluid_indicator_function + type(vector_field), dimension(1:sys_size - 1), intent(inout), optional :: stat_q_cons_filtered + type(scalar_field), dimension(1:4), intent(inout), optional :: stat_filtered_pressure + type(vector_field), dimension(1:9), intent(inout), optional :: stat_reynolds_stress + type(vector_field), dimension(1:9), intent(inout), optional :: stat_eff_visc + type(vector_field), dimension(1:3), intent(inout), optional :: stat_int_mom_exch + type(integer_field), & dimension(1:num_dims, -1:1), & intent(in) :: bc_type @@ -823,6 +846,8 @@ contains if (present(beta)) then alt_sys = sys_size + 1 + else if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + alt_sys = sys_size + 1 + 9*4 + 9*4 + 3*4 + 6*4 else alt_sys = sys_size end if @@ -930,6 +955,11 @@ contains if (ib) then call s_initialize_mpi_data(q_cons_vf, ib_markers, levelset, levelset_norm) + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + call s_initialize_mpi_data_filtered(filtered_fluid_indicator_function, & + stat_q_cons_filtered, stat_filtered_pressure, & + stat_reynolds_stress, stat_eff_visc, stat_int_mom_exch) + end if elseif (present(beta)) then call s_initialize_mpi_data(q_cons_vf, beta=beta) else @@ -984,6 +1014,18 @@ contains mpi_p, status, ierr) end do end if + else if (volume_filtering_momentum_eqn) then + do i = 1, alt_sys + var_MOK = int(i, MPI_OFFSET_KIND) + + ! Initial displacement to skip at beginning of file + disp = m_MOK*max(MOK, n_MOK)*max(MOK, p_MOK)*WP_MOK*(var_MOK - 1) + + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_DATA%view(i), & + 'native', mpi_info_int, ierr) + call MPI_FILE_WRITE_ALL(ifile, MPI_IO_DATA%var(i)%sf, data_size, & + mpi_p, status, ierr) + end do else do i = 1, sys_size !TODO: check if correct (sys_size var_MOK = int(i, MPI_OFFSET_KIND) diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index b5fb9a35a3..bf12f8b3b4 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -549,6 +549,21 @@ module m_global_parameters $:GPU_DECLARE(create='[tau_star,cont_damage_s,alpha_bar]') !> @} + logical :: periodic_ibs + logical :: compute_particle_drag + real(wp) :: u_inf_ref !< reference freestream velocity + real(wp) :: rho_inf_ref !< reference freestream density + real(wp) :: P_inf_ref !< reference freestream temperature + logical :: periodic_forcing + logical :: volume_filtering_momentum_eqn + logical :: store_levelset + logical :: slab_domain_decomposition + integer :: t_step_stat_start + real(wp) :: filter_width + logical :: q_filtered_wrt + + $:GPU_DECLARE(create='[u_inf_ref, rho_inf_ref, P_inf_ref, filter_width]') + contains !> Assigns default values to the user inputs before reading @@ -835,6 +850,19 @@ contains relativity = .false. #:endif + periodic_ibs = .false. + compute_particle_drag = .false. + u_inf_ref = dflt_real + rho_inf_ref = dflt_real + P_inf_ref = dflt_real + periodic_forcing = .false. + volume_filtering_momentum_eqn = .false. + store_levelset = .true. + slab_domain_decomposition = .false. + t_step_stat_start = dflt_int + filter_width = dflt_real + q_filtered_wrt = .false. + end subroutine s_assign_default_values_to_user_inputs !> The computation of parameters, the allocation of memory, @@ -1203,6 +1231,9 @@ contains elseif (bubbles_lagrange) then allocate (MPI_IO_DATA%view(1:sys_size + 1)) allocate (MPI_IO_DATA%var(1:sys_size + 1)) + else if (volume_filtering_momentum_eqn) then + allocate (MPI_IO_DATA%view(1:sys_size + 109)) + allocate (MPI_IO_DATA%var(1:sys_size + 109)) else allocate (MPI_IO_DATA%view(1:sys_size)) allocate (MPI_IO_DATA%var(1:sys_size)) @@ -1224,6 +1255,11 @@ contains allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) MPI_IO_DATA%var(i)%sf => null() end do + else if (volume_filtering_momentum_eqn) then + do i = sys_size + 1, sys_size + 109 + allocate (MPI_IO_DATA%var(i)%sf(0:m, 0:n, 0:p)) + MPI_IO_DATA%var(i)%sf => null() + end do end if ! Configuring the WENO average flag that will be used to regulate @@ -1425,6 +1461,10 @@ contains do i = 1, sys_size + 1 MPI_IO_DATA%var(i)%sf => null() end do + else if (volume_filtering_momentum_eqn) then + do i = 1, sys_size + 109 + MPI_IO_DATA%var(i)%sf => null() + end do else do i = 1, sys_size MPI_IO_DATA%var(i)%sf => null() diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 6fe4932574..444b4ab8d5 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -54,6 +54,9 @@ module m_ibm logical :: moving_immersed_boundary_flag + real(wp) :: x_domain_beg_glb, x_domain_end_glb, y_domain_beg_glb, y_domain_end_glb, z_domain_beg_glb, z_domain_end_glb !< global domain beginning/end + $:GPU_DECLARE(create='[x_domain_beg_glb, x_domain_end_glb, y_domain_beg_glb, y_domain_end_glb, z_domain_beg_glb, z_domain_end_glb]') + contains !> Allocates memory for the variables in the IBM module @@ -90,6 +93,14 @@ contains integer :: i, j, k integer :: max_num_gps, max_num_inner_gps + call s_mpi_allreduce_min(x_domain%beg, x_domain_beg_glb) + call s_mpi_allreduce_max(x_domain%end, x_domain_end_glb) + call s_mpi_allreduce_min(y_domain%beg, y_domain_beg_glb) + call s_mpi_allreduce_max(y_domain%end, y_domain_end_glb) + call s_mpi_allreduce_min(z_domain%beg, z_domain_beg_glb) + call s_mpi_allreduce_max(z_domain%end, z_domain_end_glb) + $:GPU_UPDATE(device='[x_domain_beg_glb, x_domain_end_glb, y_domain_beg_glb, y_domain_end_glb, z_domain_beg_glb, z_domain_end_glb]') + moving_immersed_boundary_flag = .false. do i = 1, num_ibs if (patch_ib(i)%moving_ibm /= 0) then @@ -136,6 +147,7 @@ contains end subroutine s_ibm_setup subroutine s_populate_ib_buffers() + integer :: j, k, l #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] #:for LOCC, LOCI in [('beg', -1), ('end', 1)] @@ -145,6 +157,83 @@ contains #:endfor #:endfor + if (periodic_ibs) then + ! Population of Buffers in x-direction + if (bc_x%beg == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = 0, n + do j = 1, buff_size + ib_markers%sf(-j, k, l) = & + ib_markers%sf(m - (j - 1), k, l) + end do + end do + end do + end if + + if (bc_x%end == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = 0, n + do j = 1, buff_size + ib_markers%sf(m + j, k, l) = & + ib_markers%sf(j - 1, k, l) + end do + end do + end do + end if + + ! Population of Buffers in y-direction + if (bc_y%beg == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = -buff_size, m + buff_size + do j = 1, buff_size + ib_markers%sf(k, -j, l) = & + ib_markers%sf(k, n - (j - 1), l) + end do + end do + end do + end if + + if (bc_y%end == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = 0, p + do k = -buff_size, m + buff_size + do j = 1, buff_size + ib_markers%sf(k, n + j, l) = & + ib_markers%sf(k, j - 1, l) + end do + end do + end do + end if + + ! Population of Buffers in z-direction + if (bc_z%beg == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + do j = 1, buff_size + ib_markers%sf(k, l, -j) = & + ib_markers%sf(k, l, p - (j - 1)) + end do + end do + end do + end if + + if (bc_z%end == BC_PERIODIC) then + $:GPU_PARALLEL_LOOP(collapse=3) + do l = -buff_size, n + buff_size + do k = -buff_size, m + buff_size + do j = 1, buff_size + ib_markers%sf(k, l, p + j) = & + ib_markers%sf(k, l, j - 1) + end do + end do + end do + end if + end if + end subroutine s_populate_ib_buffers !> Subroutine that updates the conservative variables at the ghost points @@ -397,6 +486,13 @@ contains integer :: dir integer :: index + real(wp) :: radius, x_centroid, y_centroid, z_centroid + real(wp) :: x_pcen, y_pcen, z_pcen + real(wp) :: dist_calc + real(wp), dimension(3) :: dist_vec + real(wp), dimension(7, 3) :: dist_vec_per + real(wp), dimension(7) :: dist_per + do q = 1, num_gps gp = ghost_points_in(q) i = gp%loc(1) @@ -412,8 +508,106 @@ contains ! Calculate and store the precise location of the image point patch_id = gp%ib_patch_id - dist = abs(levelset_in%sf(i, j, k, patch_id)) - norm(:) = levelset_norm_in%sf(i, j, k, patch_id, :) + if (store_levelset) then + dist = abs(levelset%sf(i, j, k, patch_id)) + norm(:) = levelset_norm%sf(i, j, k, patch_id, :) + else ! compute levelset and levelset_norm on the fly + radius = patch_ib(patch_id)%radius + x_centroid = patch_ib(patch_id)%x_centroid + y_centroid = patch_ib(patch_id)%y_centroid + z_centroid = patch_ib(patch_id)%z_centroid + dist_vec(1) = x_cc(i) - x_centroid + dist_vec(2) = y_cc(j) - y_centroid + dist_vec(3) = z_cc(k) - z_centroid + dist_calc = sqrt(sum(dist_vec**2)) + ! all permutations of periodically projected ib + if (periodic_ibs) then + if ((x_centroid - x_domain_beg_glb) <= radius) then + x_pcen = x_domain_end_glb + (x_centroid - x_domain_beg_glb) + else if ((x_domain_end_glb - x_centroid) <= radius) then + x_pcen = x_domain_beg_glb - (x_domain_end_glb - x_centroid) + else + x_pcen = x_centroid + end if + if ((y_centroid - y_domain_beg_glb) <= radius) then + y_pcen = y_domain_end_glb + (y_centroid - y_domain_beg_glb) + else if ((y_domain_end_glb - y_centroid) <= radius) then + y_pcen = y_domain_beg_glb - (y_domain_end_glb - y_centroid) + else + y_pcen = y_centroid + end if + if ((z_centroid - z_domain_beg_glb) <= radius) then + z_pcen = z_domain_end_glb + (z_centroid - z_domain_beg_glb) + else if ((z_domain_end_glb - z_centroid) <= radius) then + z_pcen = z_domain_beg_glb - (z_domain_end_glb - z_centroid) + else + z_pcen = z_centroid + end if + dist_vec_per(1, 1) = x_cc(i) - x_pcen + dist_vec_per(1, 2) = y_cc(j) - y_pcen + dist_vec_per(1, 3) = z_cc(k) - z_pcen + dist_per(1) = sqrt(sum(dist_vec_per(1, :)**2)) + if (dist_per(1) < dist_calc) then + dist_calc = dist_per(1) + dist_vec = dist_vec_per(1, :) + end if + dist_vec_per(2, 1) = x_cc(i) - x_pcen + dist_vec_per(2, 2) = y_cc(j) - y_centroid + dist_vec_per(2, 3) = z_cc(k) - z_pcen + dist_per(2) = sqrt(sum(dist_vec_per(2, :)**2)) + if (dist_per(2) < dist_calc) then + dist_calc = dist_per(2) + dist_vec = dist_vec_per(2, :) + end if + dist_vec_per(3, 1) = x_cc(i) - x_pcen + dist_vec_per(3, 2) = y_cc(j) - y_pcen + dist_vec_per(3, 3) = z_cc(k) - z_centroid + dist_per(3) = sqrt(sum(dist_vec_per(3, :)**2)) + if (dist_per(3) < dist_calc) then + dist_calc = dist_per(3) + dist_vec = dist_vec_per(3, :) + end if + dist_vec_per(4, 1) = x_cc(i) - x_pcen + dist_vec_per(4, 2) = y_cc(j) - y_centroid + dist_vec_per(4, 3) = z_cc(k) - z_centroid + dist_per(4) = sqrt(sum(dist_vec_per(4, :)**2)) + if (dist_per(4) < dist_calc) then + dist_calc = dist_per(4) + dist_vec = dist_vec_per(4, :) + end if + dist_vec_per(5, 1) = x_cc(i) - x_centroid + dist_vec_per(5, 2) = y_cc(j) - y_pcen + dist_vec_per(5, 3) = z_cc(k) - z_pcen + dist_per(5) = sqrt(sum(dist_vec_per(5, :)**2)) + if (dist_per(5) < dist_calc) then + dist_calc = dist_per(5) + dist_vec = dist_vec_per(5, :) + end if + dist_vec_per(6, 1) = x_cc(i) - x_centroid + dist_vec_per(6, 2) = y_cc(j) - y_pcen + dist_vec_per(6, 3) = z_cc(k) - z_centroid + dist_per(6) = sqrt(sum(dist_vec_per(6, :)**2)) + if (dist_per(6) < dist_calc) then + dist_calc = dist_per(6) + dist_vec = dist_vec_per(6, :) + end if + dist_vec_per(7, 1) = x_cc(i) - x_centroid + dist_vec_per(7, 2) = y_cc(j) - y_centroid + dist_vec_per(7, 3) = z_cc(k) - z_pcen + dist_per(7) = sqrt(sum(dist_vec_per(7, :)**2)) + if (dist_per(7) < dist_calc) then + dist_calc = dist_per(7) + dist_vec = dist_vec_per(7, :) + end if + end if + dist = abs(dist_calc - radius) + if (dist_calc == 0) then + norm(:) = (/1, 0, 0/) + else + norm(:) = dist_vec(:)/dist_calc + end if + end if ! end store_levelset if statement + ghost_points_in(q)%ip_loc(:) = physical_loc(:) + 2*dist*norm(:) ! Find the closest grid point to the image point @@ -422,13 +616,13 @@ contains ! s_cc points to the dim array we need if (dim == 1) then s_cc => x_cc - bound = m + buff_size - 1 + bound = m + buff_size elseif (dim == 2) then s_cc => y_cc - bound = n + buff_size - 1 + bound = n + buff_size else s_cc => z_cc - bound = p + buff_size - 1 + bound = p + buff_size end if if (f_approx_equal(norm(dim), 0._wp)) then @@ -447,7 +641,7 @@ contains .or. temp_loc > s_cc(index + 1))) index = index + dir if (index < -buff_size .or. index > bound) then - print *, "temp_loc=", temp_loc, " s_cc(index)=", s_cc(index), " s_cc(index+1)=", s_cc(index + 1) + print *, "proc_rank=", proc_rank, "temp_loc=", temp_loc, " index=", index, "ib=", patch_id, "dim", dim, "dir", dir, "i, j, k", i, j, k print *, "Increase buff_size further in m_helper_basic (currently set to a minimum of 10)" error stop "Increase buff_size" end if @@ -475,6 +669,9 @@ contains :: subsection_2D integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & :: subsection_3D + integer, dimension(2*gp_layers + 1) :: subsection_x + integer, dimension(2*gp_layers + 1) :: subsection_y + integer, dimension(2*gp_layers + 1) :: subsection_z integer :: i, j, k!< Iterator variables num_gps_out = 0 @@ -496,11 +693,22 @@ contains else do k = 0, p if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then + ! subsection_3D = ib_markers%sf( & + ! i - gp_layers:i + gp_layers, & + ! j - gp_layers:j + gp_layers, & + ! k - gp_layers:k + gp_layers) + ! if (any(subsection_3D == 0)) then + ! num_gps_out = num_gps_out + 1 + ! else + ! num_inner_gps_out = num_inner_gps_out + 1 + ! end if + + subsection_x = ib_markers%sf(i - gp_layers:i + gp_layers, j, k) + subsection_y = ib_markers%sf(i, j - gp_layers:j + gp_layers, k) + subsection_z = ib_markers%sf(i, j, k - gp_layers:k + gp_layers) + if (any(subsection_x == 0) .or. & + any(subsection_y == 0) .or. & + any(subsection_z == 0)) then num_gps_out = num_gps_out + 1 else num_inner_gps_out = num_inner_gps_out + 1 @@ -522,6 +730,9 @@ contains :: subsection_2D integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & :: subsection_3D + integer, dimension(2*gp_layers + 1) :: subsection_x + integer, dimension(2*gp_layers + 1) :: subsection_y + integer, dimension(2*gp_layers + 1) :: subsection_z integer :: i, j, k !< Iterator variables integer :: count, count_i integer :: patch_id @@ -578,11 +789,19 @@ contains ! 3D do k = 0, p if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then + ! subsection_3D = ib_markers%sf( & + ! i - gp_layers:i + gp_layers, & + ! j - gp_layers:j + gp_layers, & + ! k - gp_layers:k + gp_layers) + ! if (any(subsection_3D == 0)) then + + subsection_x = ib_markers%sf(i - gp_layers:i + gp_layers, j, k) + subsection_y = ib_markers%sf(i, j - gp_layers:j + gp_layers, k) + subsection_z = ib_markers%sf(i, j, k - gp_layers:k + gp_layers) + if (any(subsection_x == 0) .or. & + any(subsection_y == 0) .or. & + any(subsection_z == 0)) then + ghost_points_in(count)%loc = [i, j, k] patch_id = ib_markers%sf(i, j, k) ghost_points_in(count)%ib_patch_id = & @@ -952,8 +1171,10 @@ contains impure subroutine s_finalize_ibm_module() @:DEALLOCATE(ib_markers%sf) - @:DEALLOCATE(levelset%sf) - @:DEALLOCATE(levelset_norm%sf) + if (store_levelset) then + @:DEALLOCATE(levelset%sf) + @:DEALLOCATE(levelset_norm%sf) + end if end subroutine s_finalize_ibm_module diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index 069cd9a8d6..7d21482e6a 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -100,7 +100,7 @@ contains & 'num_probes', 'num_integrals', 'bubble_model', 'thermal', & & 'num_source', 'relax_model', 'num_ibs', 'n_start', & & 'num_bc_patches', 'num_igr_iters', 'num_igr_warm_start_iters', & - & 'adap_dt_max_iters' ] + & 'adap_dt_max_iters', 't_step_stat_start' ] call MPI_BCAST(${VAR}$, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -116,7 +116,10 @@ contains & 'bc_z%grcbc_in', 'bc_z%grcbc_out', 'bc_z%grcbc_vel_out', & & 'cfl_adap_dt', 'cfl_const_dt', 'cfl_dt', 'surface_tension', & & 'shear_stress', 'bulk_stress', 'bubbles_lagrange', & - & 'hyperelasticity', 'down_sample', 'int_comp','fft_wrt' ] + & 'hyperelasticity', 'down_sample', 'int_comp','fft_wrt', & + & 'periodic_ibs', 'compute_particle_drag', 'periodic_forcing', & + & 'volume_filtering_momentum_eqn', 'store_levelset', & + 'slab_domain_decomposition', 'q_filtered_wrt' ] call MPI_BCAST(${VAR}$, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -156,7 +159,8 @@ contains & 'z_domain%beg', 'z_domain%end', 'x_a', 'x_b', 'y_a', 'y_b', 'z_a', & & 'z_b', 't_stop', 't_save', 'cfl_target', 'Bx0', 'alf_factor', & & 'tau_star', 'cont_damage_s', 'alpha_bar', 'adap_dt_tol', & - & 'ic_eps', 'ic_beta' ] + & 'ic_eps', 'ic_beta', 'u_inf_ref', 'rho_inf_ref', 'P_inf_ref', & + 'filter_width' ] call MPI_BCAST(${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor @@ -202,8 +206,8 @@ contains end do do i = 1, num_ibs - #:for VAR in [ 'radius', 'length_x', 'length_y', & - & 'x_centroid', 'y_centroid', 'c', 'm', 'p', 't', 'theta', 'slip'] + #:for VAR in [ 'radius', 'length_x', 'length_y', 'length_z', & + & 'x_centroid', 'y_centroid', 'z_centroid', 'c', 'm', 'p', 't', 'theta', 'slip'] call MPI_BCAST(patch_ib(i)%${VAR}$, 1, mpi_p, 0, MPI_COMM_WORLD, ierr) #:endfor #:for VAR in ['vel', 'angular_vel', 'angles'] diff --git a/src/simulation/m_rhs.fpp b/src/simulation/m_rhs.fpp index cdcb418075..af74e780f3 100644 --- a/src/simulation/m_rhs.fpp +++ b/src/simulation/m_rhs.fpp @@ -71,6 +71,8 @@ module m_rhs use m_pressure_relaxation + use m_additional_forcing + implicit none private; public :: s_initialize_rhs_module, & @@ -1028,6 +1030,12 @@ contains if (cont_damage) call s_compute_damage_state(q_cons_qp%vf, rhs_vf) + if (periodic_forcing) then + call nvtxStartRange("COMPUTE-PERIODIC-FORCING") + call s_compute_periodic_forcing(rhs_vf, q_cons_vf, q_prim_vf, t_step + 1) + call nvtxEndRange + end if + ! END: Additional pphysics and source terms if (run_time_info .or. probe_wrt .or. ib .or. bubbles_lagrange) then diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp index 91c9dd29eb..1de82fd02c 100644 --- a/src/simulation/m_start_up.fpp +++ b/src/simulation/m_start_up.fpp @@ -95,6 +95,12 @@ module m_start_up use m_igr + use m_additional_forcing + + use m_volume_filtering + + use m_compute_statistics + implicit none private; public :: s_read_input_file, & @@ -189,7 +195,11 @@ contains cont_damage, tau_star, cont_damage_s, alpha_bar, & alf_factor, num_igr_iters, num_igr_warm_start_iters, & int_comp, ic_eps, ic_beta, nv_uvm_out_of_core, & - nv_uvm_igr_temps_on_gpu, nv_uvm_pref_gpu, down_sample, fft_wrt + nv_uvm_igr_temps_on_gpu, nv_uvm_pref_gpu, down_sample, fft_wrt, & + periodic_ibs, compute_particle_drag, u_inf_ref, rho_inf_ref, P_inf_ref, & + periodic_forcing, volume_filtering_momentum_eqn, store_levelset, & + slab_domain_decomposition, t_step_stat_start, & + filter_width, q_filtered_wrt ! Checking that an input file has been provided by the user. If it ! has, then the input file is read in, otherwise, simulation exits. @@ -445,33 +455,35 @@ contains call s_mpi_abort(trim(file_path)//' is missing. Exiting.') end if - ! Read Levelset - write (file_path, '(A)') & - trim(t_step_dir)//'/levelset.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (file_exist) then - open (2, FILE=trim(file_path), & - FORM='unformatted', & - ACTION='read', & - STATUS='old') - read (2) levelset%sf(0:m, 0:n, 0:p, 1:num_ibs); close (2) - ! print*, 'check', STL_levelset(106, 50, 0, 1) - else - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') - end if + if (store_levelset) then + ! Read Levelset + write (file_path, '(A)') & + trim(t_step_dir)//'/levelset.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (file_exist) then + open (2, FILE=trim(file_path), & + FORM='unformatted', & + ACTION='read', & + STATUS='old') + read (2) levelset%sf(0:m, 0:n, 0:p, 1:num_ibs); close (2) + ! print*, 'check', STL_levelset(106, 50, 0, 1) + else + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if - ! Read Levelset Norm - write (file_path, '(A)') & - trim(t_step_dir)//'/levelset_norm.dat' - inquire (FILE=trim(file_path), EXIST=file_exist) - if (file_exist) then - open (2, FILE=trim(file_path), & - FORM='unformatted', & - ACTION='read', & - STATUS='old') - read (2) levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3); close (2) - else - call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + ! Read Levelset Norm + write (file_path, '(A)') & + trim(t_step_dir)//'/levelset_norm.dat' + inquire (FILE=trim(file_path), EXIST=file_exist) + if (file_exist) then + open (2, FILE=trim(file_path), & + FORM='unformatted', & + ACTION='read', & + STATUS='old') + read (2) levelset_norm%sf(0:m, 0:n, 0:p, 1:num_ibs, 1:3); close (2) + else + call s_mpi_abort(trim(file_path)//' is missing. Exiting.') + end if end if do i = 1, num_ibs @@ -740,44 +752,46 @@ contains call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') end if - ! Read Levelset - write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + if (store_levelset) then + ! Read Levelset + write (file_loc, '(A)') 'levelset.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') - end if + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if - ! Read Levelset Norm - write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + ! Read Levelset Norm + write (file_loc, '(A)') 'levelset_norm.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if end if end if @@ -889,44 +903,46 @@ contains call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') end if - ! Read Levelset - write (file_loc, '(A)') 'levelset.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + if (store_levelset) then + ! Read Levelset + write (file_loc, '(A)') 'levelset.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelset_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelset_DATA%var%sf, data_size * num_ibs, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') - end if + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if - ! Read Levelset Norm - write (file_loc, '(A)') 'levelset_norm.dat' - file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) - inquire (FILE=trim(file_loc), EXIST=file_exist) + ! Read Levelset Norm + write (file_loc, '(A)') 'levelset_norm.dat' + file_loc = trim(case_dir)//'/restart_data'//trim(mpiiofs)//trim(file_loc) + inquire (FILE=trim(file_loc), EXIST=file_exist) - if (file_exist) then + if (file_exist) then - call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) + call MPI_FILE_OPEN(MPI_COMM_WORLD, file_loc, MPI_MODE_RDONLY, mpi_info_int, ifile, ierr) - disp = 0 + disp = 0 - call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & - 'native', mpi_info_int, ierr) - call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & - mpi_p, status, ierr) + call MPI_FILE_SET_VIEW(ifile, disp, mpi_p, MPI_IO_levelsetnorm_DATA%view, & + 'native', mpi_info_int, ierr) + call MPI_FILE_READ(ifile, MPI_IO_levelsetnorm_DATA%var%sf, data_size * num_ibs * 3, & + mpi_p, status, ierr) - else - call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + else + call s_mpi_abort('File '//trim(file_loc)//' is missing. Exiting.') + end if end if end if @@ -1134,6 +1150,25 @@ contains call s_compute_derived_variables(t_step) + ! Volume filter flow variables, compute unclosed terms and their statistics + if (volume_filtering_momentum_eqn) then + if (t_step > t_step_stat_start) then + call nvtxStartRange("VOLUME-FILTER-MOMENTUM-EQUATION") + call s_volume_filter_momentum_eqn(q_cons_ts(1)%vf, q_prim_vf, bc_type) + call nvtxEndRange + + call nvtxStartRange("COMPUTE-STATISTICS") + call s_compute_statistics_momentum_unclosed_terms(t_step, t_step_stat_start, reynolds_stress, eff_visc, int_mom_exch, q_cons_filtered, filtered_pressure) + call nvtxEndRange + + ! Compute explicit x-, y-, z- forces on each particle + if (compute_particle_drag) then + call nvtxStartRange("COMPUTE-PARTICLE-FORCES") + call s_compute_particle_forces() + call nvtxEndRange + end if + end if + end if #ifdef DEBUG print *, 'Computed derived vars' @@ -1226,6 +1261,28 @@ contains call cpu_time(start) call nvtxStartRange("SAVE-DATA") + if (q_filtered_wrt .and. (t_step == 0 .or. t_step == t_step_stop)) then + $:GPU_UPDATE(host='[filtered_fluid_indicator_function%sf]') + do i = 1, 9 + do j = 1, 4 + $:GPU_UPDATE(host='[stat_reynolds_stress(i)%vf(j)%sf]') + $:GPU_UPDATE(host='[stat_eff_visc(i)%vf(j)%sf]') + end do + end do + do i = 1, 3 + do j = 1, 4 + $:GPU_UPDATE(host='[stat_int_mom_exch(i)%vf(j)%sf]') + end do + end do + do i = 1, sys_size-1 + do j = 1, 4 + $:GPU_UPDATE(host='[stat_q_cons_filtered(i)%vf(j)%sf]') + end do + end do + do i = 1, 4 + $:GPU_UPDATE(host='[stat_filtered_pressure(i)%sf]') + end do + end if do i = 1, sys_size $:GPU_UPDATE(host='[q_cons_ts(1)%vf(i)%sf]') do l = 0, p @@ -1266,6 +1323,11 @@ contains call s_write_restart_lag_bubbles(save_count) !parallel if (lag_params%write_bubbles_stats) call s_write_lag_bubble_stats() + else if (volume_filtering_momentum_eqn .and. (t_step == 0 .or. t_step == t_step_stop)) then + call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, bc_type, & + filtered_fluid_indicator_function=filtered_fluid_indicator_function, & + stat_q_cons_filtered=stat_q_cons_filtered, stat_filtered_pressure=stat_filtered_pressure, & + stat_reynolds_stress=stat_reynolds_stress, stat_eff_visc=stat_eff_visc, stat_int_mom_exch=stat_int_mom_exch) else call s_write_data_files(q_cons_ts(1)%vf, q_T_sf, q_prim_vf, save_count, bc_type) end if @@ -1393,6 +1455,12 @@ contains if (mhd .and. powell) call s_initialize_mhd_powell_module + if (periodic_forcing) call s_initialize_additional_forcing_module() + if (volume_filtering_momentum_eqn) then + call s_initialize_fftw_explicit_filter_module() + call s_initialize_statistics_module() + end if + end subroutine s_initialize_modules impure subroutine s_initialize_mpi_domain @@ -1517,6 +1585,7 @@ contains end if $:GPU_UPDATE(device='[igr, igr_order]') + $:GPU_UPDATE(device='[u_inf_ref, rho_inf_ref, P_inf_ref, filter_width]') end subroutine s_initialize_gpu_vars @@ -1555,6 +1624,9 @@ contains if (bodyForces) call s_finalize_body_forces_module() if (mhd .and. powell) call s_finalize_mhd_powell_module + if (periodic_forcing) call s_finalize_additional_forcing_module() + if (volume_filtering_momentum_eqn) call s_finalize_fftw_explicit_filter_module + ! Terminating MPI execution environment call s_mpi_finalize() end subroutine s_finalize_modules diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index d272c842aa..4651f484d2 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -46,6 +46,10 @@ module m_time_steppers use m_body_forces + use m_volume_filtering + + use m_compute_statistics + implicit none type(vector_field), allocatable, dimension(:) :: q_cons_ts !< @@ -57,7 +61,7 @@ module m_time_steppers type(scalar_field), allocatable, dimension(:) :: rhs_vf !< !! Cell-average RHS variables at the current time-stage - type(integer_field), allocatable, dimension(:, :) :: bc_type !< + type(integer_field), allocatable, dimension(:, :), public :: bc_type !< !! Boundary condition identifiers type(vector_field), allocatable, dimension(:) :: q_prim_ts !< diff --git a/src/simulation/m_volume_filtering.fpp b/src/simulation/m_volume_filtering.fpp new file mode 100644 index 0000000000..94f9804955 --- /dev/null +++ b/src/simulation/m_volume_filtering.fpp @@ -0,0 +1,1926 @@ +#:include 'macros.fpp' + +module m_volume_filtering + + use, intrinsic :: iso_c_binding + + use m_derived_types !< Definitions of the derived types + + use m_global_parameters !< Definitions of the global parameters + + use m_mpi_proxy !< Message passing interface (MPI) module proxy + + use m_ibm + + use m_boundary_common + + use m_nvtx + +#ifdef MFC_MPI + use mpi !< Message passing interface (MPI) module +#endif + +#if defined(MFC_OpenACC) && defined(__PGI) + use cufft +#endif + + implicit none + + private; public :: s_initialize_fftw_explicit_filter_module, & + s_initialize_filtering_kernel, s_initialize_fluid_indicator_function, & + s_initialize_filtered_fluid_indicator_function, s_initialize_fluid_indicator_gradient, & + s_finalize_fftw_explicit_filter_module, s_volume_filter_momentum_eqn, s_apply_fftw_filter_scalarfield, s_filter_batch, & + s_compute_viscous_stress_tensor, s_compute_stress_tensor, s_compute_divergence_stress_tensor, s_compute_particle_forces, & + s_mpi_transpose_slabZ2Y, s_mpi_transpose_slabY2Z, s_mpi_transpose_slabZ2Y_batch, s_mpi_transpose_slabY2Z_batch, & + s_mpi_FFT_fwd, s_mpi_FFT_bwd, s_setup_terms_filtering, s_compute_pseudo_turbulent_reynolds_stress, s_compute_effective_viscosity + +#if !defined(MFC_OpenACC) + include 'fftw3.f03' +#endif + + integer :: ierr + + ! fluid indicator function (1 = fluid, 0 = otherwise) + type(scalar_field), public :: fluid_indicator_function + type(scalar_field), public :: filtered_fluid_indicator_function + type(scalar_field), allocatable, dimension(:) :: grad_fluid_indicator + + ! volume filtered conservative variables + type(scalar_field), allocatable, dimension(:), public :: q_cons_filtered + type(scalar_field), public :: filtered_pressure + + ! viscous and pressure+viscous stress tensors + type(vector_field), allocatable, dimension(:) :: visc_stress + type(vector_field), allocatable, dimension(:) :: pres_visc_stress + + ! divergence of stress tensor + type(scalar_field), allocatable, dimension(:) :: div_pres_visc_stress + + ! unclosed terms in volume filtered momentum equation + type(vector_field), allocatable, dimension(:), public :: reynolds_stress + type(vector_field), allocatable, dimension(:), public :: eff_visc + type(scalar_field), allocatable, dimension(:), public :: int_mom_exch + + ! 1/mu + real(wp), allocatable, dimension(:, :) :: Res + + ! x-,y-,z-direction forces on particles + real(wp), allocatable, dimension(:, :) :: particle_forces + + $:GPU_DECLARE(create='[fluid_indicator_function, filtered_fluid_indicator_function, grad_fluid_indicator]') + $:GPU_DECLARE(create='[q_cons_filtered, filtered_pressure, visc_stress, pres_visc_stress, div_pres_visc_stress]') + $:GPU_DECLARE(create='[reynolds_stress, eff_visc, int_mom_exch, Res, particle_forces]') + +#if defined(MFC_OpenACC) + ! GPU plans + integer :: plan_x_fwd_gpu, plan_x_bwd_gpu, plan_y_gpu, plan_z_gpu +#else + ! CPU plans + type(c_ptr) :: plan_x_r2c_fwd, plan_x_c2r_bwd + type(c_ptr) :: plan_y_c2c_fwd, plan_y_c2c_bwd + type(c_ptr) :: plan_z_c2c_fwd, plan_z_c2c_bwd + type(c_ptr) :: plan_x_r2c_kernelG, plan_y_c2c_kernelG, plan_z_c2c_kernelG +#endif + + ! domain size information (global, complex, local) + integer :: Nx, Ny, Nz, NxC, Nyloc, Nzloc + + ! 1D real and complex vectors for FFT routines + real(c_double), allocatable :: data_real_in1d(:) + complex(c_double_complex), allocatable :: data_cmplx_out1d(:) + complex(c_double_complex), allocatable :: data_cmplx_out1dy(:) + + ! 3D arrays for slab transposes + complex(c_float_complex), allocatable :: data_cmplx_slabz(:, :, :), data_cmplx_slaby(:, :, :) + ! 3D arrays for slab transposes of tensor quantities + complex(c_float_complex), allocatable :: data_cmplx_slabz_batch(:, :, :, :), data_cmplx_slaby_batch(:, :, :, :) + + ! input/output array for FFT routine + real(c_double), allocatable :: data_real_3D_slabz(:, :, :) + + ! filtering kernel in physical space + real(c_double), allocatable :: real_kernelG_in(:, :, :) + + ! FFT of filtering kernel + complex(c_double_complex), allocatable :: cmplx_kernelG1d(:) + + $:GPU_DECLARE(create='[Nx, Ny, Nz, NxC, Nyloc, Nzloc]') + $:GPU_DECLARE(create='[data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy, data_cmplx_slabz, data_cmplx_slaby, data_cmplx_slabz_batch, data_cmplx_slaby_batch]') + $:GPU_DECLARE(create='[data_real_3D_slabz, real_kernelG_in, cmplx_kernelG1d]') + + ! buffers for data transpose + complex(c_float_complex), allocatable :: sendbuf_sf(:), recvbuf_sf(:) + complex(c_float_complex), allocatable :: sendbuf_batch(:), recvbuf_batch(:) + + $:GPU_DECLARE(create='[sendbuf_sf, recvbuf_sf, sendbuf_batch, recvbuf_batch]') + +contains + + !< create fft plans to be used for explicit filtering of data + subroutine s_initialize_fftw_explicit_filter_module + integer :: i, j, k + integer :: size_n(1), inembed(1), onembed(1) + + @:ALLOCATE(q_cons_filtered(1:sys_size-1)) + do i = 1, sys_size - 1 + @:ALLOCATE(q_cons_filtered(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(q_cons_filtered(i)) + end do + + @:ALLOCATE(filtered_pressure%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(filtered_pressure) + + @:ALLOCATE(visc_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(visc_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(visc_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(visc_stress(i)) + end do + + @:ALLOCATE(pres_visc_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(pres_visc_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(pres_visc_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(pres_visc_stress(i)) + end do + + @:ALLOCATE(div_pres_visc_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(div_pres_visc_stress(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(div_pres_visc_stress(i)) + end do + + @:ALLOCATE(reynolds_stress(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(reynolds_stress(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(reynolds_stress(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(reynolds_stress(i)) + end do + + @:ALLOCATE(eff_visc(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(eff_visc(i)%vf(1:num_dims)) + end do + do i = 1, num_dims + do j = 1, num_dims + @:ALLOCATE(eff_visc(i)%vf(j)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + end do + @:ACC_SETUP_VFs(eff_visc(i)) + end do + + @:ALLOCATE(int_mom_exch(1:num_dims)) + do i = 1, num_dims + @:ALLOCATE(int_mom_exch(i)%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(int_mom_exch(i)) + end do + + if (viscous) then + @:ALLOCATE(Res(1:2, 1:maxval(Re_size))) + end if + + if (viscous) then + do i = 1, 2 + do j = 1, Re_size(i) + Res(i, j) = fluid_pp(Re_idx(i, j))%Re(i) + end do + end do + $:GPU_UPDATE(device='[Res]') + end if + + @:ALLOCATE(particle_forces(0:num_ibs, 3)) + + !< global sizes + Nx = m_glb + 1 + Ny = n_glb + 1 + Nz = p_glb + 1 + + !< complex size + NxC = Nx/2 + 1 + + !< local sizes on each processor + Nyloc = Ny/num_procs + Nzloc = p + 1 + + $:GPU_UPDATE(device='[Nx, Ny, Nz, NxC, Nyloc, Nzloc]') + + @:ALLOCATE(data_real_in1d(Nx*Ny*Nzloc)) + @:ALLOCATE(data_cmplx_out1d(NxC*Ny*Nz/num_procs)) + @:ALLOCATE(data_cmplx_out1dy(NxC*Ny*Nz/num_procs)) + @:ALLOCATE(cmplx_kernelG1d(NxC*Nyloc*Nz)) + @:ALLOCATE(real_kernelG_in(Nx, Ny, Nzloc)) + @:ALLOCATE(data_real_3D_slabz(Nx, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slabz(NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby(NxC, Nyloc, Nz)) + @:ALLOCATE(data_cmplx_slabz_batch(24, NxC, Ny, Nzloc)) + @:ALLOCATE(data_cmplx_slaby_batch(24, NxC, Nyloc, Nz)) + + @:ALLOCATE(sendbuf_sf(NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(recvbuf_sf(NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(sendbuf_batch(24*NxC*Nyloc*Nzloc*num_procs)) + @:ALLOCATE(recvbuf_batch(24*NxC*Nyloc*Nzloc*num_procs)) + +#if defined(MFC_OpenACC) + !< GPU FFT plans + !< X - plans + size_n(1) = Nx + inembed(1) = Nx + onembed(1) = NxC + ierr = cufftPlanMany(plan_x_fwd_gpu, 1, size_n, inembed, 1, Nx, onembed, 1, NxC, CUFFT_D2Z, Ny*Nzloc) + size_n(1) = Nx + inembed(1) = NxC + onembed(1) = Nx + ierr = cufftPlanMany(plan_x_bwd_gpu, 1, size_n, inembed, 1, NxC, onembed, 1, Nx, CUFFT_Z2D, Ny*Nzloc) + !< Y - plans + size_n(1) = Ny + inembed(1) = Ny + onembed(1) = Ny + ierr = cufftPlanMany(plan_y_gpu, 1, size_n, inembed, 1, Ny, onembed, 1, Ny, CUFFT_Z2Z, NxC*Nzloc) + !< Z - plans + size_n(1) = Nz + inembed(1) = Nz + onembed(1) = Nz + ierr = cufftPlanMany(plan_z_gpu, 1, size_n, inembed, 1, Nz, onembed, 1, Nz, CUFFT_Z2Z, NxC*Nyloc) +#else + !< CPU FFT plans + !< X - direction plans + size_n(1) = Nx + inembed(1) = Nx + onembed(1) = NxC + plan_x_r2c_fwd = fftw_plan_many_dft_r2c(1, size_n, Ny*Nzloc, & ! rank, n, howmany + data_real_in1d, inembed, 1, Nx, & ! in, inembed, istride, idist + data_cmplx_out1d, onembed, 1, NxC, & ! out, onembed, ostride, odist + FFTW_MEASURE) ! sign, flags + size_n(1) = Nx + inembed(1) = NxC + onembed(1) = Nx + plan_x_c2r_bwd = fftw_plan_many_dft_c2r(1, size_n, Ny*Nzloc, & + data_cmplx_out1d, inembed, 1, NxC, & + data_real_in1d, onembed, 1, Nx, & + FFTW_MEASURE) + !< Y - direction plans + size_n(1) = Ny + inembed(1) = Ny + onembed(1) = Ny + plan_y_c2c_fwd = fftw_plan_many_dft(1, size_n, NxC*Nzloc, & + data_cmplx_out1dy, inembed, 1, Ny, & + data_cmplx_out1dy, onembed, 1, Ny, & + FFTW_FORWARD, FFTW_MEASURE) + plan_y_c2c_bwd = fftw_plan_many_dft(1, size_n, NxC*Nzloc, & + data_cmplx_out1dy, inembed, 1, Ny, & + data_cmplx_out1dy, onembed, 1, Ny, & + FFTW_BACKWARD, FFTW_MEASURE) + !< Z - direction plans + size_n(1) = Nz + inembed(1) = Nz + onembed(1) = Nz + plan_z_c2c_fwd = fftw_plan_many_dft(1, size_n, NxC*Nyloc, & + data_cmplx_out1d, inembed, 1, Nz, & + data_cmplx_out1d, onembed, 1, Nz, & + FFTW_FORWARD, FFTW_MEASURE) + plan_z_c2c_bwd = fftw_plan_many_dft(1, size_n, NxC*Nyloc, & + data_cmplx_out1d, inembed, 1, Nz, & + data_cmplx_out1d, onembed, 1, Nz, & + FFTW_BACKWARD, FFTW_MEASURE) + ! forward plans for filtering kernel + ! X kernel plan + size_n(1) = Nx + inembed(1) = Nx + onembed(1) = NxC + plan_x_r2c_kernelG = fftw_plan_many_dft_r2c(1, size_n, Ny*Nzloc, & + data_real_in1d, inembed, 1, Nx, & + cmplx_kernelG1d, onembed, 1, NxC, & + FFTW_MEASURE) + ! Y kernel plan + size_n(1) = Ny + inembed(1) = Ny + onembed(1) = Ny + plan_y_c2c_kernelG = fftw_plan_many_dft(1, size_n, NxC*Nzloc, & + data_cmplx_out1dy, inembed, 1, Ny, & + data_cmplx_out1dy, onembed, 1, Ny, & + FFTW_FORWARD, FFTW_MEASURE) + ! Z kernel plan + size_n(1) = Nz + inembed(1) = Nz + onembed(1) = Nz + plan_z_c2c_kernelG = fftw_plan_many_dft(1, size_n, NxC*Nyloc, & + cmplx_kernelG1d, inembed, 1, Nz, & + cmplx_kernelG1d, onembed, 1, Nz, & + FFTW_FORWARD, FFTW_MEASURE) +#endif + + ! file for particle forces + if (compute_particle_drag) then + if (proc_rank == 0) then + open (unit=100, file='particle_force.bin', status='replace', form='unformatted', access='stream', action='write') + end if + end if + + end subroutine s_initialize_fftw_explicit_filter_module + + !< initialize the gaussian filtering kernel in real space and then compute its DFT + subroutine s_initialize_filtering_kernel + real(wp) :: sigma_stddev + real(wp) :: Lx, Ly, Lz + real(wp) :: x_r, y_r, z_r + real(wp) :: r2 + real(wp) :: G_norm_int, G_norm_int_glb + integer :: i, j, k + + ! gaussian filter + sigma_stddev = filter_width + + Lx = x_domain_end_glb - x_domain_beg_glb + Ly = y_domain_end_glb - y_domain_beg_glb + Lz = z_domain_end_glb - z_domain_beg_glb + + G_norm_int = 0.0_wp + + $:GPU_PARALLEL_LOOP(collapse=3, reduction='[[G_norm_int]]', reductionOp='[+]', copyin='[Lx, Ly, Lz, sigma_stddev]', private='[x_r, y_r, z_r, r2]') + do i = 0, m + do j = 0, n + do k = 0, p + x_r = min(abs(x_cc(i) - x_domain_beg_glb), Lx - abs(x_cc(i) - x_domain_beg_glb)) + y_r = min(abs(y_cc(j) - y_domain_beg_glb), Ly - abs(y_cc(j) - y_domain_beg_glb)) + z_r = min(abs(z_cc(k) - z_domain_beg_glb), Lz - abs(z_cc(k) - z_domain_beg_glb)) + + r2 = x_r**2 + y_r**2 + z_r**2 + + real_kernelG_in(i + 1, j + 1, k + 1) = exp(-r2/(2.0_wp*sigma_stddev**2)) + + G_norm_int = G_norm_int + real_kernelG_in(i + 1, j + 1, k + 1)*dx(i)*dy(j)*dz(k) + end do + end do + end do + + call s_mpi_allreduce_sum(G_norm_int, G_norm_int_glb) + + ! FFT of kernel + ! normalize kernel + $:GPU_PARALLEL_LOOP(collapse=3, copyin='[G_norm_int_glb]') + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = real_kernelG_in(i, j, k)/G_norm_int_glb + end do + end do + end do + + ! 3D z-slab -> 1D x, y, z + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, cmplx_kernelG1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_kernelG, data_real_in1d, cmplx_kernelG1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = cmplx_kernelG1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_kernelG, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz(i, j, k) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + + ! transpose z-slab to y-slab + call s_mpi_transpose_slabZ2Y + + ! 3D y-slab -> 1D z, x, y + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_slaby(i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, cmplx_kernelG1d, cmplx_kernelG1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_kernelG, cmplx_kernelG1d, cmplx_kernelG1d) +#endif + + ! normalize FFT + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)/(real(Nx*Ny*Nz, wp)) + end do + end do + end do + + ! return cmplx_kernelG1d: 1D z, x, y + end subroutine s_initialize_filtering_kernel + + !< initialize fluid indicator function and filtered fluid indicator function + subroutine s_initialize_fluid_indicator_function(bc_type) + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + integer :: i, j, k + + @:ALLOCATE(fluid_indicator_function%sf(idwbuff(1)%beg:idwbuff(1)%end, & + idwbuff(2)%beg:idwbuff(2)%end, & + idwbuff(3)%beg:idwbuff(3)%end)) + @:ACC_SETUP_SFs(fluid_indicator_function) + + ! define fluid indicator function + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + if (ib_markers%sf(i, j, k) == 0) then + fluid_indicator_function%sf(i, j, k) = 1.0_wp + else + fluid_indicator_function%sf(i, j, k) = 0.0_wp + end if + end do + end do + end do + + call s_populate_scalarfield_buffers(bc_type, fluid_indicator_function) + + end subroutine s_initialize_fluid_indicator_function + + subroutine s_initialize_filtered_fluid_indicator_function + integer :: i, j, k + + @:ALLOCATE(filtered_fluid_indicator_function%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(filtered_fluid_indicator_function) + + ! filter fluid indicator function + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = fluid_indicator_function%sf(i - 1, j - 1, k - 1) + end do + end do + end do + + call s_mpi_FFT_fwd + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + + call s_mpi_FFT_bwd + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + filtered_fluid_indicator_function%sf(i - 1, j - 1, k - 1) = data_real_3D_slabz(i, j, k)/(real(Nx*Ny*Nz, wp)) + end do + end do + end do + + end subroutine s_initialize_filtered_fluid_indicator_function + + subroutine s_initialize_fluid_indicator_gradient + real(wp), dimension(-4:4) :: fd_coeffs + integer :: i, j, k, l + + fd_coeffs(-4) = 1._wp/280._wp + fd_coeffs(-3) = -4._wp/105._wp + fd_coeffs(-2) = 1._wp/5._wp + fd_coeffs(-1) = -4._wp/5._wp + fd_coeffs(0) = 0._wp + fd_coeffs(1) = 4._wp/5._wp + fd_coeffs(2) = -1._wp/5._wp + fd_coeffs(3) = 4._wp/105._wp + fd_coeffs(4) = -1._wp/280._wp + + @:ALLOCATE(grad_fluid_indicator(1:3)) + do i = 1, 3 + @:ALLOCATE(grad_fluid_indicator(i)%sf(0:m, 0:n, 0:p)) + @:ACC_SETUP_SFs(grad_fluid_indicator(i)) + end do + + $:GPU_PARALLEL_LOOP(collapse=3, copyin='[fd_coeffs]') + do i = 0, m + do j = 0, n + do k = 0, p + $:GPU_LOOP(parallelism='[seq]') + do l = -4, 4 + grad_fluid_indicator(1)%sf(i, j, k) = grad_fluid_indicator(1)%sf(i, j, k) + fd_coeffs(l)*fluid_indicator_function%sf(i + l, j, k)/dx(i) + grad_fluid_indicator(2)%sf(i, j, k) = grad_fluid_indicator(2)%sf(i, j, k) + fd_coeffs(l)*fluid_indicator_function%sf(i, j + l, k)/dy(j) + grad_fluid_indicator(3)%sf(i, j, k) = grad_fluid_indicator(3)%sf(i, j, k) + fd_coeffs(l)*fluid_indicator_function%sf(i, j, k + l)/dz(k) + end do + end do + end do + end do + + end subroutine s_initialize_fluid_indicator_gradient + + !< calculate the unclosed terms present in the volume filtered momentum equation + subroutine s_volume_filter_momentum_eqn(q_cons_vf, q_prim_vf, bc_type) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + integer :: i, j, k + + call s_setup_terms_filtering(q_cons_vf, q_prim_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress, bc_type) + + call s_filter_batch(q_cons_vf, q_cons_filtered, q_prim_vf(E_idx), filtered_pressure, reynolds_stress, visc_stress, eff_visc) + + call s_compute_interphase_momentum_exchange(filtered_fluid_indicator_function, grad_fluid_indicator, pres_visc_stress, int_mom_exch) + + call s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) + call s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, bc_type) + + end subroutine s_volume_filter_momentum_eqn + + !< applies the gaussian filter to an arbitrary scalar field + subroutine s_apply_fftw_filter_scalarfield(filtered_fluid_indicator_function, fluid_quantity, q_temp_in, q_temp_out) + type(scalar_field), intent(in) :: filtered_fluid_indicator_function + type(scalar_field), intent(inout) :: q_temp_in + type(scalar_field), intent(inout), optional :: q_temp_out + + logical, intent(in) :: fluid_quantity !< whether or not convolution integral is over V_f or V_p^(i) - integral over fluid volume or particle volume + + integer :: i, j, k + + if (fluid_quantity) then + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = q_temp_in%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) + end do + end do + end do + else + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = q_temp_in%sf(i, j, k)*(1.0_wp - fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end if + + call nvtxStartRange("FORWARD-3D-FFT") + call s_mpi_FFT_fwd + call nvtxEndRange + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + + call nvtxStartRange("BACKWARD-3D-FFT") + call s_mpi_FFT_bwd + call nvtxEndRange + + if (present(q_temp_out)) then + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + q_temp_out%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + else + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + q_temp_in%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end if + + end subroutine s_apply_fftw_filter_scalarfield + + ! compute viscous stress tensor + subroutine s_compute_viscous_stress_tensor(visc_stress, q_prim_vf, q_cons_filtered) + type(vector_field), dimension(num_dims), intent(inout) :: visc_stress + type(scalar_field), dimension(sys_size), intent(in), optional :: q_prim_vf + type(scalar_field), dimension(sys_size - 1), intent(in), optional :: q_cons_filtered + real(wp) :: dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz ! spatial velocity derivatives + integer :: i, j, k + + if (present(q_prim_vf)) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz]') + do i = 0, m + do j = 0, n + do k = 0, p + ! velocity gradients, local to each process + dudx = (q_prim_vf(2)%sf(i + 1, j, k) - q_prim_vf(2)%sf(i - 1, j, k))/(2._wp*dx(i)) + dudy = (q_prim_vf(2)%sf(i, j + 1, k) - q_prim_vf(2)%sf(i, j - 1, k))/(2._wp*dy(j)) + dudz = (q_prim_vf(2)%sf(i, j, k + 1) - q_prim_vf(2)%sf(i, j, k - 1))/(2._wp*dz(k)) + + dvdx = (q_prim_vf(3)%sf(i + 1, j, k) - q_prim_vf(3)%sf(i - 1, j, k))/(2._wp*dx(i)) + dvdy = (q_prim_vf(3)%sf(i, j + 1, k) - q_prim_vf(3)%sf(i, j - 1, k))/(2._wp*dy(j)) + dvdz = (q_prim_vf(3)%sf(i, j, k + 1) - q_prim_vf(3)%sf(i, j, k - 1))/(2._wp*dz(k)) + + dwdx = (q_prim_vf(4)%sf(i + 1, j, k) - q_prim_vf(4)%sf(i - 1, j, k))/(2._wp*dx(i)) + dwdy = (q_prim_vf(4)%sf(i, j + 1, k) - q_prim_vf(4)%sf(i, j - 1, k))/(2._wp*dy(j)) + dwdz = (q_prim_vf(4)%sf(i, j, k + 1) - q_prim_vf(4)%sf(i, j, k - 1))/(2._wp*dz(k)) + + ! viscous stress tensor, visc_stress(row, column) + visc_stress(1)%vf(1)%sf(i, j, k) = (4._wp/3._wp*dudx - 2._wp/3._wp*(dvdy + dwdz))/Res(1, 1) + visc_stress(1)%vf(2)%sf(i, j, k) = (dudy + dvdx)/Res(1, 1) + visc_stress(1)%vf(3)%sf(i, j, k) = (dudz + dwdx)/Res(1, 1) + visc_stress(2)%vf(1)%sf(i, j, k) = (dvdx + dudy)/Res(1, 1) + visc_stress(2)%vf(2)%sf(i, j, k) = (4._wp/3._wp*dvdy - 2._wp/3._wp*(dudx + dwdz))/Res(1, 1) + visc_stress(2)%vf(3)%sf(i, j, k) = (dvdz + dwdy)/Res(1, 1) + visc_stress(3)%vf(1)%sf(i, j, k) = (dwdx + dudz)/Res(1, 1) + visc_stress(3)%vf(2)%sf(i, j, k) = (dwdy + dvdz)/Res(1, 1) + visc_stress(3)%vf(3)%sf(i, j, k) = (4._wp/3._wp*dwdz - 2._wp/3._wp*(dudx + dvdy))/Res(1, 1) + end do + end do + end do + else if (present(q_cons_filtered)) then + $:GPU_PARALLEL_LOOP(collapse=3, private='[dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz]') + do i = 0, m + do j = 0, n + do k = 0, p + ! velocity gradients, local to each process + dudx = (q_cons_filtered(2)%sf(i + 1, j, k)/q_cons_filtered(1)%sf(i + 1, j, k) - q_cons_filtered(2)%sf(i - 1, j, k)/q_cons_filtered(1)%sf(i - 1, j, k))/(2._wp*dx(i)) + dudy = (q_cons_filtered(2)%sf(i, j + 1, k)/q_cons_filtered(1)%sf(i, j + 1, k) - q_cons_filtered(2)%sf(i, j - 1, k)/q_cons_filtered(1)%sf(i, j - 1, k))/(2._wp*dy(j)) + dudz = (q_cons_filtered(2)%sf(i, j, k + 1)/q_cons_filtered(1)%sf(i, j, k + 1) - q_cons_filtered(2)%sf(i, j, k - 1)/q_cons_filtered(1)%sf(i, j, k - 1))/(2._wp*dz(k)) + + dvdx = (q_cons_filtered(3)%sf(i + 1, j, k)/q_cons_filtered(1)%sf(i + 1, j, k) - q_cons_filtered(3)%sf(i - 1, j, k)/q_cons_filtered(1)%sf(i - 1, j, k))/(2._wp*dx(i)) + dvdy = (q_cons_filtered(3)%sf(i, j + 1, k)/q_cons_filtered(1)%sf(i, j + 1, k) - q_cons_filtered(3)%sf(i, j - 1, k)/q_cons_filtered(1)%sf(i, j - 1, k))/(2._wp*dy(j)) + dvdz = (q_cons_filtered(3)%sf(i, j, k + 1)/q_cons_filtered(1)%sf(i, j, k + 1) - q_cons_filtered(3)%sf(i, j, k - 1)/q_cons_filtered(1)%sf(i, j, k - 1))/(2._wp*dz(k)) + + dwdx = (q_cons_filtered(4)%sf(i + 1, j, k)/q_cons_filtered(1)%sf(i + 1, j, k) - q_cons_filtered(4)%sf(i - 1, j, k)/q_cons_filtered(1)%sf(i - 1, j, k))/(2._wp*dx(i)) + dwdy = (q_cons_filtered(4)%sf(i, j + 1, k)/q_cons_filtered(1)%sf(i, j + 1, k) - q_cons_filtered(4)%sf(i, j - 1, k)/q_cons_filtered(1)%sf(i, j - 1, k))/(2._wp*dy(j)) + dwdz = (q_cons_filtered(4)%sf(i, j, k + 1)/q_cons_filtered(1)%sf(i, j, k + 1) - q_cons_filtered(4)%sf(i, j, k - 1)/q_cons_filtered(1)%sf(i, j, k - 1))/(2._wp*dz(k)) + + ! viscous stress tensor, visc_stress(row, column) + visc_stress(1)%vf(1)%sf(i, j, k) = (4._wp/3._wp*dudx - 2._wp/3._wp*(dvdy + dwdz))/Res(1, 1) + visc_stress(1)%vf(2)%sf(i, j, k) = (dudy + dvdx)/Res(1, 1) + visc_stress(1)%vf(3)%sf(i, j, k) = (dudz + dwdx)/Res(1, 1) + visc_stress(2)%vf(1)%sf(i, j, k) = (dvdx + dudy)/Res(1, 1) + visc_stress(2)%vf(2)%sf(i, j, k) = (4._wp/3._wp*dvdy - 2._wp/3._wp*(dudx + dwdz))/Res(1, 1) + visc_stress(2)%vf(3)%sf(i, j, k) = (dvdz + dwdy)/Res(1, 1) + visc_stress(3)%vf(1)%sf(i, j, k) = (dwdx + dudz)/Res(1, 1) + visc_stress(3)%vf(2)%sf(i, j, k) = (dwdy + dvdz)/Res(1, 1) + visc_stress(3)%vf(3)%sf(i, j, k) = (4._wp/3._wp*dwdz - 2._wp/3._wp*(dudx + dvdy))/Res(1, 1) + end do + end do + end do + end if + + end subroutine s_compute_viscous_stress_tensor + + subroutine s_compute_stress_tensor(pres_visc_stress, visc_stress, q_prim_vf) + type(vector_field), dimension(num_dims), intent(inout) :: pres_visc_stress + type(vector_field), dimension(num_dims), intent(in) :: visc_stress + type(scalar_field), dimension(sys_size), intent(in) :: q_prim_vf + integer :: i, j, k + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + pres_visc_stress(1)%vf(1)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k) - visc_stress(1)%vf(1)%sf(i, j, k) + pres_visc_stress(1)%vf(2)%sf(i, j, k) = -visc_stress(1)%vf(2)%sf(i, j, k) + pres_visc_stress(1)%vf(3)%sf(i, j, k) = -visc_stress(1)%vf(3)%sf(i, j, k) + pres_visc_stress(2)%vf(1)%sf(i, j, k) = -visc_stress(2)%vf(1)%sf(i, j, k) + pres_visc_stress(2)%vf(2)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k) - visc_stress(2)%vf(2)%sf(i, j, k) + pres_visc_stress(2)%vf(3)%sf(i, j, k) = -visc_stress(2)%vf(3)%sf(i, j, k) + pres_visc_stress(3)%vf(1)%sf(i, j, k) = -visc_stress(3)%vf(1)%sf(i, j, k) + pres_visc_stress(3)%vf(2)%sf(i, j, k) = -visc_stress(3)%vf(2)%sf(i, j, k) + pres_visc_stress(3)%vf(3)%sf(i, j, k) = q_prim_vf(E_idx)%sf(i, j, k) - visc_stress(3)%vf(3)%sf(i, j, k) + end do + end do + end do + + end subroutine s_compute_stress_tensor + + !< compute the divergence of the pressure-viscous stress tensor + subroutine s_compute_divergence_stress_tensor(div_stress_tensor, stress_tensor) + type(scalar_field), dimension(num_dims), intent(inout) :: div_stress_tensor + type(vector_field), dimension(num_dims), intent(in) :: stress_tensor + integer :: i, j, k + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + div_stress_tensor(1)%sf(i, j, k) = (stress_tensor(1)%vf(1)%sf(i + 1, j, k) - stress_tensor(1)%vf(1)%sf(i - 1, j, k))/(2._wp*dx(i)) & + + (stress_tensor(1)%vf(2)%sf(i, j + 1, k) - stress_tensor(1)%vf(2)%sf(i, j - 1, k))/(2._wp*dy(j)) & + + (stress_tensor(1)%vf(3)%sf(i, j, k + 1) - stress_tensor(1)%vf(3)%sf(i, j, k - 1))/(2._wp*dz(k)) + + div_stress_tensor(2)%sf(i, j, k) = (stress_tensor(2)%vf(1)%sf(i + 1, j, k) - stress_tensor(2)%vf(1)%sf(i - 1, j, k))/(2._wp*dx(i)) & + + (stress_tensor(2)%vf(2)%sf(i, j + 1, k) - stress_tensor(2)%vf(2)%sf(i, j - 1, k))/(2._wp*dy(j)) & + + (stress_tensor(2)%vf(3)%sf(i, j, k + 1) - stress_tensor(2)%vf(3)%sf(i, j, k - 1))/(2._wp*dz(k)) + + div_stress_tensor(3)%sf(i, j, k) = (stress_tensor(3)%vf(1)%sf(i + 1, j, k) - stress_tensor(3)%vf(1)%sf(i - 1, j, k))/(2._wp*dx(i)) & + + (stress_tensor(3)%vf(2)%sf(i, j + 1, k) - stress_tensor(3)%vf(2)%sf(i, j - 1, k))/(2._wp*dy(j)) & + + (stress_tensor(3)%vf(3)%sf(i, j, k + 1) - stress_tensor(3)%vf(3)%sf(i, j, k - 1))/(2._wp*dz(k)) + end do + end do + end do + + end subroutine s_compute_divergence_stress_tensor + + !< setup for calculation of unclosed terms in volume filtered momentum eqn + subroutine s_setup_terms_filtering(q_cons_vf, q_prim_vf, reynolds_stress, visc_stress, pres_visc_stress, div_pres_visc_stress, bc_type) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(sys_size), intent(inout) :: q_prim_vf + type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress + type(vector_field), dimension(1:num_dims), intent(inout) :: pres_visc_stress + type(scalar_field), dimension(1:num_dims), intent(inout) :: div_pres_visc_stress + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + + integer :: i, j, k, l, q + + ! pseudo turbulent reynolds stress setup + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + $:GPU_LOOP(parallelism='[seq]') + do l = 1, num_dims + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_dims + reynolds_stress(l)%vf(q)%sf(i, j, k) = q_cons_vf(1)%sf(i, j, k)*(q_prim_vf(momxb - 1 + l)%sf(i, j, k)*q_prim_vf(momxb - 1 + q)%sf(i, j, k)) ! rho*(u x u) + end do + end do + end do + end do + end do + + ! set density and momentum buffers + do i = contxb, momxe + call s_populate_scalarfield_buffers(bc_type, q_prim_vf(i)) + end do + + ! effective viscosity setup, return viscous stress tensor + call s_compute_viscous_stress_tensor(visc_stress, q_prim_vf=q_prim_vf) + + call s_compute_stress_tensor(pres_visc_stress, visc_stress, q_prim_vf) + + ! set stress tensor buffers for taking divergence + do i = 1, 3 + do j = 1, 3 + call s_populate_scalarfield_buffers(bc_type, pres_visc_stress(i)%vf(j)) + end do + end do + + ! interphase momentum exchange term setup + call s_compute_divergence_stress_tensor(div_pres_visc_stress, pres_visc_stress) + + end subroutine s_setup_terms_filtering + + subroutine s_compute_pseudo_turbulent_reynolds_stress(q_cons_filtered, reynolds_stress) + type(scalar_field), dimension(sys_size - 1), intent(in) :: q_cons_filtered + type(vector_field), dimension(1:num_dims), intent(inout) :: reynolds_stress + integer :: i, j, k, l, q + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + $:GPU_LOOP(parallelism='[seq]') + do l = 1, num_dims + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_dims + reynolds_stress(l)%vf(q)%sf(i, j, k) = reynolds_stress(l)%vf(q)%sf(i, j, k) & + - (q_cons_filtered(momxb - 1 + l)%sf(i, j, k)*q_cons_filtered(momxb - 1 + q)%sf(i, j, k)/q_cons_filtered(1)%sf(i, j, k)) + end do + end do + end do + end do + end do + + end subroutine s_compute_pseudo_turbulent_reynolds_stress + + subroutine s_compute_effective_viscosity(q_cons_filtered, eff_visc, visc_stress, bc_type) + type(scalar_field), dimension(1:sys_size - 1), intent(inout) :: q_cons_filtered + type(vector_field), dimension(1:num_dims), intent(inout) :: eff_visc + type(vector_field), dimension(1:num_dims), intent(inout) :: visc_stress + type(integer_field), dimension(1:num_dims, -1:1), intent(in) :: bc_type + + integer :: i, j, k, l, q + + ! set buffers for filtered momentum quantities and density + do i = contxb, momxe + call s_populate_scalarfield_buffers(bc_type, q_cons_filtered(i)) + end do + + ! calculate stress tensor with filtered quantities + call s_compute_viscous_stress_tensor(visc_stress, q_cons_filtered=q_cons_filtered) + + ! calculate eff_visc + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + $:GPU_LOOP(parallelism='[seq]') + do l = 1, num_dims + $:GPU_LOOP(parallelism='[seq]') + do q = 1, num_dims + eff_visc(l)%vf(q)%sf(i, j, k) = eff_visc(l)%vf(q)%sf(i, j, k) - visc_stress(l)%vf(q)%sf(i, j, k) + end do + end do + end do + end do + end do + + end subroutine s_compute_effective_viscosity + + subroutine s_compute_interphase_momentum_exchange(filtered_fluid_indicator_function, grad_fluid_indicator, pres_visc_stress, int_mom_exch) + type(scalar_field), intent(in) :: filtered_fluid_indicator_function + type(scalar_field), dimension(1:3), intent(in) :: grad_fluid_indicator + type(vector_field), dimension(1:3), intent(in) :: pres_visc_stress + type(scalar_field), dimension(1:3), intent(inout) :: int_mom_exch + + integer :: i, j, k, l + + ! x-, y-, z- component loop + do l = 1, 3 + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = pres_visc_stress(l)%vf(1)%sf(i, j, k)*grad_fluid_indicator(1)%sf(i, j, k) & + + pres_visc_stress(l)%vf(2)%sf(i, j, k)*grad_fluid_indicator(2)%sf(i, j, k) & + + pres_visc_stress(l)%vf(3)%sf(i, j, k)*grad_fluid_indicator(3)%sf(i, j, k) + end do + end do + end do + + call nvtxStartRange("FORWARD-3D-FFT") + call s_mpi_FFT_fwd + call nvtxEndRange + + ! convolution with filtering kernel + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + + call nvtxStartRange("BACKWARD-3D-FFT") + call s_mpi_FFT_bwd + call nvtxEndRange + + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + int_mom_exch(l)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)) + end do + end do + end do + end do ! end component loop + + end subroutine s_compute_interphase_momentum_exchange + + ! computes x-,y-,z-direction forces on particles + subroutine s_compute_particle_forces + real(wp), dimension(num_ibs, 3) :: force_glb + real(wp) :: dvol + integer :: i, j, k, l + + ! zero particle forces + particle_forces = 0.0_wp + $:GPU_UPDATE(device='[particle_forces]') + + $:GPU_PARALLEL_LOOP(collapse=3, private='[dvol]') + do i = 0, m + do j = 0, n + do k = 0, p + dvol = dx(i)*dy(j)*dz(k) + $:GPU_ATOMIC(atomic='update') + particle_forces(ib_markers%sf(i, j, k), 1) = particle_forces(ib_markers%sf(i, j, k), 1) - (div_pres_visc_stress(1)%sf(i, j, k)*dvol) + $:GPU_ATOMIC(atomic='update') + particle_forces(ib_markers%sf(i, j, k), 2) = particle_forces(ib_markers%sf(i, j, k), 2) - (div_pres_visc_stress(2)%sf(i, j, k)*dvol) + $:GPU_ATOMIC(atomic='update') + particle_forces(ib_markers%sf(i, j, k), 3) = particle_forces(ib_markers%sf(i, j, k), 3) - (div_pres_visc_stress(3)%sf(i, j, k)*dvol) + end do + end do + end do + + $:GPU_UPDATE(host='[particle_forces]') + + ! reduce particle forces across processors + do i = 1, num_ibs + call s_mpi_allreduce_sum(particle_forces(i, 1), force_glb(i, 1)) + call s_mpi_allreduce_sum(particle_forces(i, 2), force_glb(i, 2)) + call s_mpi_allreduce_sum(particle_forces(i, 3), force_glb(i, 3)) + end do + + ! write particle forces to file + if (proc_rank == 0) then + write (100) force_glb + flush (100) + end if + + end subroutine s_compute_particle_forces + + !< transpose domain from z-slabs to y-slabs on each processor + subroutine s_mpi_transpose_slabZ2Y + integer :: dest_rank, src_rank + integer :: i, j, k + + $:GPU_PARALLEL_LOOP(collapse=4) + do dest_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + sendbuf_sf(i + (j - 1)*NxC + (k - 1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slabz(i, j + dest_rank*Nyloc, k) + end do + end do + end do + end do + + $:GPU_UPDATE(host='[sendbuf_sf]') + + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) + + $:GPU_UPDATE(device='[recvbuf_sf]') + + $:GPU_PARALLEL_LOOP(collapse=4) + do src_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + data_cmplx_slaby(i, j, k + src_rank*Nzloc) = recvbuf_sf(i + (j - 1)*NxC + (k - 1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabZ2Y + + !< transpose domain from y-slabs to z-slabs on each processor + subroutine s_mpi_transpose_slabY2Z + integer :: dest_rank, src_rank + integer :: i, j, k + + $:GPU_PARALLEL_LOOP(collapse=4) + do dest_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + sendbuf_sf(i + (j - 1)*NxC + (k - 1)*NxC*Nyloc + dest_rank*NxC*Nyloc*Nzloc) = data_cmplx_slaby(i, j, k + dest_rank*Nzloc) + end do + end do + end do + end do + + $:GPU_UPDATE(host='[sendbuf_sf]') + + call MPI_Alltoall(sendbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_sf, NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) + + $:GPU_UPDATE(device='[recvbuf_sf]') + + $:GPU_PARALLEL_LOOP(collapse=4) + do src_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + data_cmplx_slabz(i, j + src_rank*Nyloc, k) = recvbuf_sf(i + (j - 1)*NxC + (k - 1)*NxC*Nyloc + src_rank*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabY2Z + + !< transpose domain from z-slabs to y-slabs on each processor for batched 24 element tensors + subroutine s_mpi_transpose_slabZ2Y_batch + integer :: dest_rank, src_rank + integer :: i, j, k, l + + $:GPU_PARALLEL_LOOP(collapse=5) + do dest_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 24 + sendbuf_batch(l + (i - 1)*24 + (j - 1)*24*NxC + (k - 1)*24*NxC*Nyloc + dest_rank*24*NxC*Nyloc*Nzloc) = data_cmplx_slabz_batch(l, i, j + dest_rank*Nyloc, k) + end do + end do + end do + end do + end do + + $:GPU_UPDATE(host='[sendbuf_batch]') + + call MPI_Alltoall(sendbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) + + $:GPU_UPDATE(device='[recvbuf_batch]') + + $:GPU_PARALLEL_LOOP(collapse=5) + do src_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 24 + data_cmplx_slaby_batch(l, i, j, k + src_rank*Nzloc) = recvbuf_batch(l + (i - 1)*24 + (j - 1)*24*NxC + (k - 1)*24*NxC*Nyloc + src_rank*24*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabZ2Y_batch + + !< transpose domain from y-slabs to z-slabs on each processor for batched 24 element tensors + subroutine s_mpi_transpose_slabY2Z_batch + integer :: dest_rank, src_rank + integer :: i, j, k, l + + $:GPU_PARALLEL_LOOP(collapse=5) + do dest_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 24 + sendbuf_batch(l + (i - 1)*24 + (j - 1)*24*NxC + (k - 1)*24*NxC*Nyloc + dest_rank*24*NxC*Nyloc*Nzloc) = data_cmplx_slaby_batch(l, i, j, k + dest_rank*Nzloc) + end do + end do + end do + end do + end do + + $:GPU_UPDATE(host='[sendbuf_batch]') + + call MPI_Alltoall(sendbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, & + recvbuf_batch, 24*NxC*Nyloc*Nzloc, MPI_COMPLEX, MPI_COMM_WORLD, ierr) + + $:GPU_UPDATE(device='[recvbuf_batch]') + + $:GPU_PARALLEL_LOOP(collapse=5) + do src_rank = 0, num_procs - 1 + do k = 1, Nzloc + do j = 1, Nyloc + do i = 1, NxC + do l = 1, 24 + data_cmplx_slabz_batch(l, i, j + src_rank*Nyloc, k) = recvbuf_batch(l + (i - 1)*24 + (j - 1)*24*NxC + (k - 1)*24*NxC*Nyloc + src_rank*24*NxC*Nyloc*Nzloc) + end do + end do + end do + end do + end do + + end subroutine s_mpi_transpose_slabY2Z_batch + + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_filter_batch(q_cons_vf, q_cons_filtered, pressure, filtered_pressure, reynolds_stress, visc_stress, eff_visc) + type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf + type(scalar_field), dimension(5), intent(inout) :: q_cons_filtered + type(scalar_field), intent(inout) :: pressure + type(scalar_field), intent(inout) :: filtered_pressure + type(vector_field), dimension(3), intent(inout) :: reynolds_stress + type(vector_field), dimension(3), intent(inout) :: visc_stress + type(vector_field), dimension(3), intent(inout) :: eff_visc + integer :: i, j, k, l, q + + ! cons vars + do l = 1, 5 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = q_cons_vf(l)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(l, i, j, k) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + end do + + ! pressure + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = pressure%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(6, i, j, k) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + + ! reynolds stress + do l = 1, 3 + do q = 1, 3 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = reynolds_stress(l)%vf(q)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(6 + 3*(l - 1) + q, i, j, k) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + end do + end do + + ! effective viscosity + do l = 1, 3 + do q = 1, 3 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + data_real_3D_slabz(i + 1, j + 1, k + 1) = visc_stress(l)%vf(q)%sf(i, j, k)*fluid_indicator_function%sf(i, j, k) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz_batch(15 + 3*(l - 1) + q, i, j, k) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + end do + end do + + call s_mpi_transpose_slabZ2Y_batch + + ! cons vars + do l = 1, 5 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_slaby_batch(l, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(l, i, j, k) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + end do + + ! pressure + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_slaby_batch(6, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(6, i, j, k) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + + ! reynolds stress + do l = 1, 3 + do q = 1, 3 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_slaby_batch(6 + 3*(l - 1) + q, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(6 + 3*(l - 1) + q, i, j, k) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + end do + end do + + ! effective viscosity + do l = 1, 3 + do q = 1, 3 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_slaby_batch(15 + 3*(l - 1) + q, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC)*cmplx_kernelG1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby_batch(15 + 3*(l - 1) + q, i, j, k) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + end do + end do + + call s_mpi_transpose_slabY2Z_batch + + ! cons vars + do l = 1, 5 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_slabz_batch(l, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + q_cons_filtered(l)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end do + + ! pressure + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_slabz_batch(6, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + filtered_pressure%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + + ! reynolds stress + do l = 1, 3 + do q = 1, 3 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_slabz_batch(6 + 3*(l - 1) + q, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + reynolds_stress(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end do + end do + + ! effective viscosity + do l = 1, 3 + do q = 1, 3 + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_slabz_batch(15 + 3*(l - 1) + q, i, j, k) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) + end do + end do + end do + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 0, m + do j = 0, n + do k = 0, p + eff_visc(l)%vf(q)%sf(i, j, k) = data_real_3D_slabz(i + 1, j + 1, k + 1)/(real(Nx*Ny*Nz, wp)*filtered_fluid_indicator_function%sf(i, j, k)) + end do + end do + end do + end do + end do + + end subroutine s_filter_batch + + !< compute forward FFT, input: data_real_3D_slabz, output: data_cmplx_out1d + subroutine s_mpi_FFT_fwd + integer :: i, j, k + + ! 3D z-slab -> 1D x, y, z + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) = data_real_3D_slabz(i, j, k) + end do + end do + end do + + ! X FFT +#if defined(MFC_OpenACC) + ierr = cufftExecD2Z(plan_x_fwd_gpu, data_real_in1d, data_cmplx_out1d) +#else + call fftw_execute_dft_r2c(plan_x_r2c_fwd, data_real_in1d, data_cmplx_out1d) +#endif + + ! 1D x, y, z -> 1D y, x, z (CMPLX) + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) + end do + end do + end do + + ! Y FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_y_c2c_fwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 3D z-slab + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_slabz(i, j, k) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + + ! transpose z-slab to y-slab + call nvtxStartRange("SLAB-MPI-TRANSPOSE-Z2Y") + call s_mpi_transpose_slabZ2Y + call nvtxEndRange + + ! 3D y-slab -> 1D z, x, y + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) = data_cmplx_slaby(i, j, k) + end do + end do + end do + + ! Z FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_FORWARD) +#else + call fftw_execute_dft(plan_z_c2c_fwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! return data_cmplx_out1d: 1D z, x, y + end subroutine s_mpi_FFT_fwd + + !< compute inverse FFT, input: data_cmplx_out1d, output: data_real_3D_slabz + subroutine s_mpi_FFT_bwd + integer :: i, j, k + + ! Z inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_z_gpu, data_cmplx_out1d, data_cmplx_out1d, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_z_c2c_bwd, data_cmplx_out1d, data_cmplx_out1d) +#endif + + ! 1D z, x, y -> 3D y-slab + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Nyloc + do k = 1, Nz + data_cmplx_slaby(i, j, k) = data_cmplx_out1d(k + (i - 1)*Nz + (j - 1)*Nz*NxC) + end do + end do + end do + + ! transpose y-slab to z-slab + call nvtxStartRange("SLAB-MPI-TRANSPOSE-Y2Z") + call s_mpi_transpose_slabY2Z + call nvtxEndRange + + ! 3D z-slab -> 1D y, x, z + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) = data_cmplx_slabz(i, j, k) + end do + end do + end do + + ! Y inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2Z(plan_y_gpu, data_cmplx_out1dy, data_cmplx_out1dy, CUFFT_INVERSE) +#else + call fftw_execute_dft(plan_y_c2c_bwd, data_cmplx_out1dy, data_cmplx_out1dy) +#endif + + ! 1D y, x, z -> 1D x, y, z + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, NxC + do j = 1, Ny + do k = 1, Nzloc + data_cmplx_out1d(i + (j - 1)*NxC + (k - 1)*NxC*Ny) = data_cmplx_out1dy(j + (i - 1)*Ny + (k - 1)*Ny*NxC) + end do + end do + end do + + ! X inv FFT +#if defined(MFC_OpenACC) + ierr = cufftExecZ2D(plan_x_bwd_gpu, data_cmplx_out1d, data_real_in1d) +#else + call fftw_execute_dft_c2r(plan_x_c2r_bwd, data_cmplx_out1d, data_real_in1d) +#endif + + ! 1D x, y, z -> 3D z-slab + $:GPU_PARALLEL_LOOP(collapse=3) + do i = 1, Nx + do j = 1, Ny + do k = 1, Nzloc + data_real_3D_slabz(i, j, k) = data_real_in1d(i + (j - 1)*Nx + (k - 1)*Nx*Ny) + end do + end do + end do + + end subroutine s_mpi_FFT_bwd + + subroutine s_finalize_fftw_explicit_filter_module + integer :: i, j + + @:DEALLOCATE(fluid_indicator_function%sf) + @:DEALLOCATE(filtered_fluid_indicator_function%sf) + do i = 1, 3 + @:DEALLOCATE(grad_fluid_indicator(i)%sf) + end do + @:DEALLOCATE(grad_fluid_indicator) + + do i = 1, sys_size - 1 + @:DEALLOCATE(q_cons_filtered(i)%sf) + end do + @:DEALLOCATE(q_cons_filtered) + + @:DEALLOCATE(filtered_pressure%sf) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(visc_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(visc_stress(i)%vf) + end do + @:DEALLOCATE(visc_stress) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(pres_visc_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(pres_visc_stress(i)%vf) + end do + @:DEALLOCATE(pres_visc_stress) + + do i = 1, num_dims + @:DEALLOCATE(div_pres_visc_stress(i)%sf) + end do + @:DEALLOCATE(div_pres_visc_stress) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(reynolds_stress(i)%vf(j)%sf) + end do + @:DEALLOCATE(reynolds_stress(i)%vf) + end do + @:DEALLOCATE(reynolds_stress) + + do i = 1, num_dims + do j = 1, num_dims + @:DEALLOCATE(eff_visc(i)%vf(j)%sf) + end do + @:DEALLOCATE(eff_visc(i)%vf) + end do + @:DEALLOCATE(eff_visc) + + do i = 1, num_dims + @:DEALLOCATE(int_mom_exch(i)%sf) + end do + @:DEALLOCATE(int_mom_exch) + + @:DEALLOCATE(Res) + @:DEALLOCATE(particle_forces) + + @:DEALLOCATE(data_real_in1d, data_cmplx_out1d, data_cmplx_out1dy) + @:DEALLOCATE(cmplx_kernelG1d, real_kernelG_in) + @:DEALLOCATE(data_real_3D_slabz, data_cmplx_slabz, data_cmplx_slaby) + @:DEALLOCATE(data_cmplx_slabz_batch, data_cmplx_slaby_batch) + + @:DEALLOCATE(sendbuf_sf, recvbuf_sf) + @:DEALLOCATE(sendbuf_batch, recvbuf_batch) + +#if defined(MFC_OpenACC) + ierr = cufftDestroy(plan_x_fwd_gpu) + ierr = cufftDestroy(plan_x_bwd_gpu) + ierr = cufftDestroy(plan_y_gpu) + ierr = cufftDestroy(plan_z_gpu) +#else + call fftw_destroy_plan(plan_x_r2c_fwd) + call fftw_destroy_plan(plan_x_c2r_bwd) + call fftw_destroy_plan(plan_y_c2c_fwd) + call fftw_destroy_plan(plan_y_c2c_bwd) + call fftw_destroy_plan(plan_z_c2c_fwd) + call fftw_destroy_plan(plan_z_c2c_bwd) + call fftw_destroy_plan(plan_x_r2c_kernelG) + call fftw_destroy_plan(plan_y_c2c_kernelG) + call fftw_destroy_plan(plan_z_c2c_kernelG) +#endif + + if (compute_particle_drag) then + if (proc_rank == 0) then + close (100) + end if + end if + + end subroutine s_finalize_fftw_explicit_filter_module + +end module m_volume_filtering diff --git a/src/simulation/p_main.fpp b/src/simulation/p_main.fpp index 29cb3b8281..dcd11525eb 100644 --- a/src/simulation/p_main.fpp +++ b/src/simulation/p_main.fpp @@ -22,6 +22,8 @@ program p_main use m_nvtx + use m_volume_filtering + implicit none integer :: t_step !< Iterator for the time-stepping loop @@ -54,6 +56,13 @@ program p_main call s_initialize_gpu_vars() call nvtxEndRange + if (volume_filtering_momentum_eqn .or. periodic_forcing) call s_initialize_fluid_indicator_function(bc_type) + if (volume_filtering_momentum_eqn) then + call s_initialize_filtering_kernel() + call s_initialize_filtered_fluid_indicator_function() + call s_initialize_fluid_indicator_gradient() + end if + ! Setting the time-step iterator to the first time-step if (cfl_dt) then t_step = 0 diff --git a/toolchain/mfc/run/case_dicts.py b/toolchain/mfc/run/case_dicts.py index 80eb30cc34..4fd05780b6 100644 --- a/toolchain/mfc/run/case_dicts.py +++ b/toolchain/mfc/run/case_dicts.py @@ -66,6 +66,9 @@ def analytic(self): 'down_sample': ParamType.LOG, 'recon_type': ParamType.INT, 'muscl_order': ParamType.INT, + 'periodic_ibs': ParamType.LOG, + 'store_levelset': ParamType.LOG, + 'slab_domain_decomposition': ParamType.LOG, } PRE_PROCESS = COMMON.copy() @@ -109,7 +112,7 @@ def analytic(self): 'fft_wrt': ParamType.LOG, }) -for ib_id in range(1, 10+1): +for ib_id in range(1, 1000+1): for real_attr, ty in [("geometry", ParamType.INT), ("radius", ParamType.REAL), ("theta", ParamType.REAL), ("slip", ParamType.LOG), ("c", ParamType.REAL), ("p", ParamType.REAL), @@ -324,6 +327,15 @@ def analytic(self): 'nv_uvm_igr_temps_on_gpu': ParamType.INT, 'nv_uvm_pref_gpu': ParamType.LOG, 'fft_wrt': ParamType.LOG, + 'compute_particle_drag': ParamType.LOG, + 'u_inf_ref': ParamType.REAL, + 'rho_inf_ref': ParamType.REAL, + 'P_inf_ref': ParamType.REAL, + 'periodic_forcing': ParamType.LOG, + 'volume_filtering_momentum_eqn': ParamType.LOG, + 't_step_stat_start': ParamType.INT, + 'filter_width': ParamType.REAL, + 'q_filtered_wrt': ParamType.LOG, }) for var in [ 'heatTransfer_model', 'massTransfer_model', 'pressure_corrector', @@ -343,7 +355,7 @@ def analytic(self): for var in [ 'gamma_method' ]: SIMULATION[f'chem_params%{var}'] = ParamType.INT -for ib_id in range(1, 10+1): +for ib_id in range(1, 1000+1): for real_attr, ty in [("geometry", ParamType.INT), ("radius", ParamType.REAL), ("theta", ParamType.REAL), ("slip", ParamType.LOG), ("c", ParamType.REAL), ("p", ParamType.REAL), @@ -489,6 +501,7 @@ def analytic(self): 'lag_mg_wrt': ParamType.LOG, 'lag_betaT_wrt': ParamType.LOG, 'lag_betaC_wrt': ParamType.LOG, + 'q_filtered_wrt': ParamType.LOG, }) for cmp in ["x", "y", "z"]: diff --git a/toolchain/templates/delta.mako b/toolchain/templates/delta.mako index 694f22c457..52246fd334 100644 --- a/toolchain/templates/delta.mako +++ b/toolchain/templates/delta.mako @@ -16,7 +16,7 @@ % endif % if gpu: #SBATCH --gpus-per-node=${tasks_per_node} -#SBATCH --mem=208G +#SBATCH --mem=240G #SBATCH --gpu-bind=closest % endif #SBATCH --output="${name}.out" diff --git a/voronoi/gen_voronoi_2D.py b/voronoi/gen_voronoi_2D.py new file mode 100644 index 0000000000..73beb4b8d7 --- /dev/null +++ b/voronoi/gen_voronoi_2D.py @@ -0,0 +1,99 @@ +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import freud + + +# lloyd relaxation +def compute_simplex_centroid(simplex_vertices): + v1 = simplex_vertices[:, :, 0] + v2 = simplex_vertices[:, :, 1] + v3 = simplex_vertices[:, :, 2] + + v1_mean = np.mean(v1, axis=1) + v2_mean = np.mean(v2, axis=1) + v3_mean = np.mean(v3, axis=1) + + simplex_centroids = np.array([v1_mean, v2_mean, v3_mean]) + + return simplex_centroids + +def compute_simplex_area(simplex_vertices): + v1 = simplex_vertices[:, :, 0] + v2 = simplex_vertices[:, :, 1] + v3 = simplex_vertices[:, :, 2] + + area = 0.5 * np.linalg.norm( np.cross(v2 - v1, v3 - v1), axis=1 ) + + return area + +def lloyd_relaxation_2d(initial_points, box, w=1.0, iterations=20): + points = initial_points + + for _ in range(iterations): + voro = freud.locality.Voronoi() + voro_data = voro.compute((box, initial_points)) + vertices = voro_data.polytopes + + for i in range(len(points)): + n = len(vertices[i]) + + simplex_vertices = np.array( [(points[i, :], vertices[i][j-1], vertices[i][j]) for j in range(n)] ) + + simplex_centroids = compute_simplex_centroid(simplex_vertices) + simplex_areas = compute_simplex_area(simplex_vertices) + + centroid = (1/np.sum(simplex_areas)) * (np.sum(simplex_centroids*simplex_areas, axis=1)) + + dist = centroid - points[i, :] + + points[i, :] += w * dist + + points = box.wrap(points) + + return points + +if (__name__ == '__main__'): + print('running 2D...') + + # setup + phi = 0.4 + D = 0.1 + L = 10*D + + N = int( 4*phi*L**2 / (np.pi*D**2) ) + print(f'volume fraction phi: {phi}, number of circles: {N}') + + x_i = L/2 * np.random.uniform(-1, 1, N) + y_i = L/2 * np.random.uniform(-1, 1, N) + z_i = L/2 * np.random.uniform(-1, 1, N) * 0 + + initial_points = np.stack((x_i, y_i, z_i), axis=1) + + box = freud.box.Box.square(L) + voro = freud.locality.Voronoi() + + cells = voro.compute((box, initial_points)).polytopes + + # plot initial distribution + plt.figure() + ax = plt.gca() + voro.plot(ax=ax, cmap='RdBu') + ax.scatter(initial_points[:, 0], initial_points[:, 1], s=5, c='k') + plt.show() + plt.close() + + # calculate relaxed points + relaxed_points = lloyd_relaxation_2d(initial_points, box, w=1.5, iterations=25) + voro.compute((box, relaxed_points)) + + # plot relaxed distribution + plt.figure() + ax = plt.gca() + voro.plot(ax=ax, cmap='RdBu') + ax.scatter(relaxed_points[:, 0], relaxed_points[:, 1], s=5, c='k') + plt.show() + plt.close() + + + diff --git a/voronoi/gen_voronoi_3D.py b/voronoi/gen_voronoi_3D.py new file mode 100644 index 0000000000..ecb08eb36c --- /dev/null +++ b/voronoi/gen_voronoi_3D.py @@ -0,0 +1,98 @@ +import os +import numpy as np +import matplotlib +import matplotlib.pyplot as plt +import freud + + +# lloyd relaxation +def compute_tetrahedron_centroid(tetrahedron_vertices): + + return np.mean(tetrahedron_vertices, axis=0) + +def compute_tetrahedron_volume(tetrahedron_vertices): + v0, v1, v2, v3 = tetrahedron_vertices + matrix = np.vstack([v1 - v0, v2 - v0, v3 - v0]).T + volume = np.abs(np.linalg.det(matrix)) / 6 + + return volume + +def lloyd_relaxation_3d(initial_points, box, w=1, iterations=10): + points = initial_points + + for _ in range(iterations): + voro = freud.locality.Voronoi() + voro_data = voro.compute((box, points)) + vertices = voro_data.polytopes + + for i in range(len(points)): + n = len(vertices[i]) + + tetrahedra = [] + for j in range(n): + tetrahedra.append([points[i, :], vertices[i][j], vertices[i][(j+1) % n], vertices[i][(j+2) % n]]) + + centroids = np.array([compute_tetrahedron_centroid(t) for t in tetrahedra]) + volumes = np.array([compute_tetrahedron_volume(t) for t in tetrahedra]) + + weighted_centroid = np.sum(centroids * volumes[:, np.newaxis], axis=0) + total_volume = np.sum(volumes) + + if total_volume > 1.0e-12: + centroid = weighted_centroid / total_volume + dist = centroid - points[i, :] + + points[i, :] += w * dist + + points = box.wrap(points) + + return points + +if (__name__ == '__main__'): + print('running 3D...') + + # setup + phi = 0.1 + str_phi = '01' + + D = 0.1 + L = 10*D + + output_dir = '../runs/phi'+str_phi + if os.path.exists(output_dir) == False: + os.mkdir(output_dir) + + N_sphere = int( 6*phi*L**3 / (np.pi*D**3) ) + print(f'volume fraction phi: {phi}, number of spheres: {N_sphere}') + print(f'actual phi value: {N_sphere*4/3*np.pi*(D/2)**3/(L**3)}') + + x_i = L/2 * np.random.uniform(-1, 1, N_sphere) + y_i = L/2 * np.random.uniform(-1, 1, N_sphere) + z_i = L/2 * np.random.uniform(-1, 1, N_sphere) + + initial_points = np.stack((x_i, y_i, z_i), axis=1) + box = freud.box.Box.cube(L) + + relaxed_points = lloyd_relaxation_3d(initial_points, box, iterations=40) + print(np.shape(relaxed_points)) + + np.savetxt(output_dir+'/sphere_array_locations.txt', relaxed_points) + + # check no spheres are overlaping + for i in range(N_sphere): + for j in range(N_sphere): + if (i != j): + dist = np.sqrt((relaxed_points[i, 0] - relaxed_points[j, 0])**2 + (relaxed_points[i, 1] - relaxed_points[j, 1])**2 + (relaxed_points[i, 2] - relaxed_points[j, 2])**2) + if (dist <= 1.05*D): + print(f'spheres overlaping, dist={dist}, spheres #: {i}, {j}') + print(f'locations: ({relaxed_points[i, :]}), ({relaxed_points[j, :]})') + + fig = plt.figure(figsize=(10,5)) + ax1 = fig.add_subplot(121, projection='3d') + ax1.scatter(initial_points[:, 0], initial_points[:, 1], initial_points[:, 2], color='blue', s=10) + ax1.set_title('initial points') + ax2 = fig.add_subplot(122, projection='3d') + ax2.scatter(relaxed_points[:, 0], relaxed_points[:, 1], relaxed_points[:, 2], color='red', s=10) + ax2.set_title('relaxed points') + plt.show() + plt.close()