From a1409f6d292f216fb90ac47631bef6bde8987595 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 18 Jun 2026 15:49:51 -0600 Subject: [PATCH 1/5] Adding timer atm_advance_scalars_4857 --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 227fbde862..bad87b79d5 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -4854,6 +4854,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc enter data create(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') + MPAS_ACC_TIMER_START('atm_advance_scalars_4857') !$acc parallel wait !$acc loop gang worker private(scalar_tend_column, wdtn) do iCell=cellSolveStart,cellSolveEnd @@ -4932,6 +4933,8 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do !$acc end parallel + MPAS_ACC_TIMER_STOP('atm_advance_scalars_4857') + MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc exit data delete(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') From 7686fa29384e994199760756232a3664787e057d Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 18 Jun 2026 16:04:20 -0600 Subject: [PATCH 2/5] Fix timer --- src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index bad87b79d5..ef347f55e9 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -4854,7 +4854,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc enter data create(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - MPAS_ACC_TIMER_START('atm_advance_scalars_4857') + call mpas_timer_start('atm_advance_scalars_4857') !$acc parallel wait !$acc loop gang worker private(scalar_tend_column, wdtn) do iCell=cellSolveStart,cellSolveEnd @@ -4933,7 +4933,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do !$acc end parallel - MPAS_ACC_TIMER_STOP('atm_advance_scalars_4857') + call mpas_timer_stop('atm_advance_scalars_4857') MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') !$acc exit data delete(scalar_tend_column) From 8c1d8825dc6738aa9b22556b7ace686709975de8 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 18 Jun 2026 16:29:02 -0600 Subject: [PATCH 3/5] OPT 1 - Swap the seq and vector collapse(2) loops --- .../dynamics/mpas_atm_time_integration.F | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index ef347f55e9..2564f0ef95 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -4871,20 +4871,19 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & end do end do - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - iEdge = edgesOnCell(i,iCell) - + !$acc loop vector collapse(2) + do k=1,nVertLevels + do iScalar=1,num_scalars ! here we add the horizontal flux divergence into the scalar tendency. ! note that the scalar tendency is modified. - !$acc loop vector collapse(2) - do k=1,nVertLevels - do iScalar=1,num_scalars + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) & - edgesOnCell_sign(i,iCell) * uhAvg(k,iEdge)*horiz_flux_arr(iScalar,k,iEdge) - end do + end do - + end do end do !$acc loop vector collapse(2) From d77f2f0a87f73d999d7c292f0f087e8ed7c978ae Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 18 Jun 2026 16:51:57 -0600 Subject: [PATCH 4/5] OPT 2 fuse all scalar_tend_column loops --- .../dynamics/mpas_atm_time_integration.F | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 2564f0ef95..2e3c15f074 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -4868,30 +4868,18 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & #ifndef DO_PHYSICS scalar_tend_save(iScalar,k,iCell) = 0.0_RKIND ! testing purposes - we have no sources or sinks #endif - end do - end do - - !$acc loop vector collapse(2) - do k=1,nVertLevels - do iScalar=1,num_scalars ! here we add the horizontal flux divergence into the scalar tendency. ! note that the scalar tendency is modified. !$acc loop seq do i=1,nEdgesOnCell(iCell) iEdge = edgesOnCell(i,iCell) - scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) & - - edgesOnCell_sign(i,iCell) * uhAvg(k,iEdge)*horiz_flux_arr(iScalar,k,iEdge) - + scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) & + - edgesOnCell_sign(i,iCell) * uhAvg(k,iEdge)*horiz_flux_arr(iScalar,k,iEdge) end do - end do - end do - !$acc loop vector collapse(2) - do k=1,nVertLevels - do iScalar=1,num_scalars - scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) * invAreaCell(iCell) & - + scalar_tend_save(iScalar,k,iCell) - end do + scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) * invAreaCell(iCell) & + + scalar_tend_save(iScalar,k,iCell) + end do end do From 6588495013c6ac76c4b6c8da3e1638e76c0ab9a9 Mon Sep 17 00:00:00 2001 From: Abishek Gopal Date: Thu, 18 Jun 2026 17:16:29 -0600 Subject: [PATCH 5/5] OPT 3 - Replace scalar_tend_column by scalar local to each gang This change also involves rearranging loops so wdtn is computed before the computation of scalar_tend_column and rho_zz_new_inv and scalar_new. The latter three of which can be fused together. --- .../dynamics/mpas_atm_time_integration.F | 57 ++++++++----------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 2e3c15f074..a64347f9b9 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -4724,8 +4724,9 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & logical :: local_advance_density real (kind=RKIND) :: weight_time_old, weight_time_new - real (kind=RKIND), dimension(num_scalars,nVertLevels) :: scalar_tend_column ! local storage to accumulate tendency + !real (kind=RKIND), dimension(num_scalars,nVertLevels) :: scalar_tend_column ! local storage to accumulate tendency real (kind=RKIND) :: u_direction, u_positive, u_negative + real (kind=RKIND) :: scalar_tend_column_tmp flux4(q_im2, q_im1, q_i, q_ip1, ua) = & ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0 @@ -4851,42 +4852,18 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & ! MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc enter data create(scalar_tend_column) + !!$acc enter data create(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') call mpas_timer_start('atm_advance_scalars_4857') !$acc parallel wait - !$acc loop gang worker private(scalar_tend_column, wdtn) + !$acc loop gang worker private(wdtn) do iCell=cellSolveStart,cellSolveEnd if(bdyMaskCell(iCell) <= nRelaxZone) then ! specified zone for regional_MPAS is not updated in this routine - - !$acc loop vector collapse(2) - do k=1,nVertLevels - do iScalar=1,num_scalars - scalar_tend_column(iScalar,k) = 0.0_RKIND -#ifndef DO_PHYSICS - scalar_tend_save(iScalar,k,iCell) = 0.0_RKIND ! testing purposes - we have no sources or sinks -#endif - ! here we add the horizontal flux divergence into the scalar tendency. - ! note that the scalar tendency is modified. - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - iEdge = edgesOnCell(i,iCell) - scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) & - - edgesOnCell_sign(i,iCell) * uhAvg(k,iEdge)*horiz_flux_arr(iScalar,k,iEdge) - end do - - scalar_tend_column(iScalar,k) = scalar_tend_column(iScalar,k) * invAreaCell(iCell) & - + scalar_tend_save(iScalar,k,iCell) - end do - end do - - ! ! vertical flux divergence and update of the scalars ! - !$acc loop vector do iScalar=1,num_scalars wdtn(iScalar,1) = 0.0 @@ -4908,11 +4885,27 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc loop vector collapse(2) do k=1,nVertLevels - do iScalar=1,num_scalars - rho_zz_new_inv = 1.0_RKIND / (weight_time_old*rho_zz_old(k,iCell) + weight_time_new*rho_zz_new(k,iCell)) - scalar_new(iScalar,k,iCell) = ( scalar_old(iScalar,k,iCell)*rho_zz_old(k,iCell) & - + dt*( scalar_tend_column(iScalar,k) -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) ) * rho_zz_new_inv + do iScalar=1,num_scalars + scalar_tend_column_tmp = 0.0_RKIND +#ifndef DO_PHYSICS + scalar_tend_save(iScalar,k,iCell) = 0.0_RKIND ! testing purposes - we have no sources or sinks +#endif + ! here we add the horizontal flux divergence into the scalar tendency. + ! note that the scalar tendency is modified. + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + scalar_tend_column_tmp = scalar_tend_column_tmp & + - edgesOnCell_sign(i,iCell) * uhAvg(k,iEdge)*horiz_flux_arr(iScalar,k,iEdge) end do + + scalar_tend_column_tmp = scalar_tend_column_tmp * invAreaCell(iCell) & + + scalar_tend_save(iScalar,k,iCell) + + rho_zz_new_inv = 1.0_RKIND / (weight_time_old*rho_zz_old(k,iCell) + weight_time_new*rho_zz_new(k,iCell)) + scalar_new(iScalar,k,iCell) = ( scalar_old(iScalar,k,iCell)*rho_zz_old(k,iCell) & + + dt*( scalar_tend_column_tmp -rdnw(k)*(wdtn(iScalar,k+1)-wdtn(iScalar,k)) ) ) * rho_zz_new_inv + end do end do end if ! specified zone regional_MPAS test @@ -4923,7 +4916,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & call mpas_timer_stop('atm_advance_scalars_4857') MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc exit data delete(scalar_tend_column) + !!$acc exit data delete(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') end subroutine atm_advance_scalars_work