module m_phase1

  implicit none

contains

  subroutine phase1( nx, ny, ival1, oval0, oval1, oval2 )

  use Math_Const
  use ffts

  implicit none

  integer, intent(in) :: nx, ny
  real, dimension(nx,ny), intent(in) :: ival1
  real, dimension(nx,ny), intent(out) :: oval0, oval1, oval2

  integer, parameter :: ifft=5
  integer :: i, j
  integer, dimension(ifft) :: pxfact, pyfact
  complex, dimension(nx,ny) :: cpval1, cpval2, sp1, sp2, spi1, spi2
  complex, allocatable, dimension(:,:) :: omegaxbr, omegaybr, omegaxbi, omegaybi
  complex, allocatable, dimension(:,:) :: omegaxnr, omegaynr, omegaxni, omegayni

  call rotate_array_f()
  call prim_calc( nx, pxfact(1:4), pxfact(5) )
  call prim_calc( ny, pyfact(1:4), pyfact(5) )

  allocate(omegaxnr(0:nx-1,0:nx-1))
  allocate(omegaynr(0:ny-1,0:ny-1))
  allocate(omegaxni(0:nx-1,0:nx-1))
  allocate(omegayni(0:ny-1,0:ny-1))
  allocate(omegaxbr(0:pxfact(5)-1,0:pxfact(5)-1))
  allocate(omegaybr(0:pyfact(5)-1,0:pyfact(5)-1))
  allocate(omegaxbi(0:pxfact(5)-1,0:pxfact(5)-1))
  allocate(omegaybi(0:pyfact(5)-1,0:pyfact(5)-1))

  call rotate_calc( nx, 'r', pxfact,  &
  &               omegaxbr(0:pxfact(5)-1,0:pxfact(5)-1),  &
  &               omegaxnr(0:nx-1,0:nx-1) )
  call rotate_calc( nx, 'i', pxfact,  &
  &               omegaxbi(0:pxfact(5)-1,0:pxfact(5)-1),  &
  &               omegaxni(0:nx-1,0:nx-1) )
  call rotate_calc( ny, 'r', pyfact,  &
  &               omegaybr(0:pyfact(5)-1,0:pyfact(5)-1),  &
  &               omegaynr(0:ny-1,0:ny-1) )
  call rotate_calc( ny, 'i', pyfact,  &
  &               omegaybi(0:pyfact(5)-1,0:pyfact(5)-1),  &
  &               omegayni(0:ny-1,0:ny-1) )

  do j=1,ny
     do i=1,nx
        cpval1(i,j)=ival1(i,j)
     end do
  end do

  call ffttp_2d( nx, ny, cpval1(1:nx,1:ny), cpval2(1:nx,1:ny),  &
  &              'r', 'o', prim_factx=pxfact, prim_facty=pyfact,  &
  &              omegax_fix=omegaxbr, omegaxn_fix=omegaxnr,  &
  &              omegay_fix=omegaybr, omegayn_fix=omegaynr )

  sp1=0.0
  sp2=0.0

!-- WN0
  sp1(1,1)=cpval2(1,1)
  sp1(nx,ny)=cpval2(nx,ny)

!-- WN0
  sp2(1,1)=cpval2(1,1)
  sp2(nx,ny)=cpval2(nx,ny)

!-- WNK1
  sp2(2,1)=cpval2(2,1)
  sp2(nx,1)=cpval2(nx,1)
!-- WNL1
  sp2(1,2)=cpval2(1,2)
  sp2(1,ny)=cpval2(1,ny)

  call ffttp_2d( nx, ny, cpval2(1:nx,1:ny), cpval1(1:nx,1:ny),  &
  &              'i', 'o', prim_factx=pxfact, prim_facty=pyfact,  &
  &              omegax_fix=omegaxbi, omegaxn_fix=omegaxni,  &
  &              omegay_fix=omegaybi, omegayn_fix=omegayni )

  call ffttp_2d( nx, ny, sp1(1:nx,1:ny), spi1(1:nx,1:ny),  &
  &              'i', 'o', prim_factx=pxfact, prim_facty=pyfact,  &
  &              omegax_fix=omegaxbi, omegaxn_fix=omegaxni,  &
  &              omegay_fix=omegaybi, omegayn_fix=omegayni )

  call ffttp_2d( nx, ny, sp2(1:nx,1:ny), spi2(1:nx,1:ny),  &
  &              'i', 'o', prim_factx=pxfact, prim_facty=pyfact,  &
  &              omegax_fix=omegaxbi, omegaxn_fix=omegaxni,  &
  &              omegay_fix=omegaybi, omegayn_fix=omegayni )

  do j=1,ny
     do i=1,nx
        oval0(i,j)=real(cpval1(i,j))
        oval1(i,j)=real(spi1(i,j))
        oval2(i,j)=real(spi2(i,j))
     end do
  end do

  end subroutine phase1

end module m_phase1

program phase4

!-- phase3 ν¿ز.
!-- ǡΤ phase3 Ʊˤ, ƥץǡ¿ز.
!-- ꥸʥǡκ,  root ץǹԤ.

  use m_phase1
  use basis
  use mpi
  use dcl
  use dcl_automatic

  implicit none

  integer, parameter :: nx=200, ny=300, nz=50
  integer, parameter :: npx=2, npy=2
  integer, parameter :: root=0

  integer :: i, j, k, icounter, ip, jp
  integer :: MY_RANK, PETOT, IERROR   ! For MPI variables
  real :: x(nx), y(ny)
  real, allocatable, dimension(:,:,:) :: valp1
  real, allocatable, dimension(:) :: totsp1, totsp2
  real, dimension(nx,ny,nz) :: ival1
  real, dimension(nx,ny) :: oval0, oval1, oval2

!-- Initializing MPI

  call MPI_INIT( IERROR )

!-- Getting total process number and oneself process ID.

  call MPI_COMM_RANK( MPI_COMM_WORLD, MY_RANK, IERROR )
  call MPI_COMM_SIZE( MPI_COMM_WORLD, PETOT, IERROR )

  allocate(totsp1(nx*ny*nz))
  allocate(totsp2((nx/npx)*(ny/npy)*nz))

  if(MY_RANK==root)then

     do k=1,nz
        do j=1,ny
           do i=1,nx
              ival1(i,j,k)=exp(-(((real(i)-0.5*real(nx))/real(nx))**2  &
  &                             +((real(j)-0.5*real(ny))/real(ny))**2  &
  &                             +((real(k)-0.5*real(nz))/real(nz))**2))
           end do
        end do
     end do

     x=(/((real(i)/real(nx)),i=1,nx)/)
     y=(/((real(j)/real(ny)),j=1,ny)/)

!-- rearranging from data array to comm array

     icounter=0

     do jp=1,npy
        do ip=1,npx
           do k=1,nz
              do j=1,ny/npy
                 do i=1,nx/npx
                    icounter=icounter+1
                    totsp1(icounter)=ival1((ip-1)*nx/npx+i,(jp-1)*ny/npy+j,k)
                 end do
              end do
           end do
        end do
     end do

  end if

  call MPI_BARRIER( MPI_COMM_WORLD, IERROR )

!-- splitting the whole data to partial data

  call MPI_SCATTER( totsp1, (nx/npx)*(ny/npy)*nz, MPI_REAL,  &
  &                 totsp2, (nx/npx)*(ny/npy)*nz, MPI_REAL,  &
  &                 root, MPI_COMM_WORLD, IERROR )

  call MPI_BARRIER( MPI_COMM_WORLD, IERROR )

!-- rearranging from comm array to data array

  if(MY_RANK==root)then
     totsp1=0.0
     ival1=0.0
  end if

  allocate(valp1(nx/npx,ny/npy,nz))
  icounter=0

  do k=1,nz
     do j=1,ny/npy
        do i=1,nx/npx
           icounter=icounter+1
           valp1(i,j,k)=totsp2(icounter)
        end do
     end do
  end do

  open(unit=10+MY_RANK,  &
  &    file='pe'//trim(adjustl( i2c_convert(MY_RANK,forma='(i4.4)') ))//'.bin',  &
  &    recl=4*(nx/npx)*(ny/npy),access='direct',status='replace')

     do k=1,nz
        write(10+MY_RANK,rec=k) ((valp1(i,j,k),i=1,nx/npx),j=1,ny/npy)
     end do

  close(unit=10+MY_RANK)

  call MPI_BARRIER( MPI_COMM_WORLD, IERROR )

  valp1=0.0

  call MPI_BARRIER( MPI_COMM_WORLD, IERROR )

  open(unit=1000+MY_RANK,  &
  &    file='pe'//trim(adjustl( i2c_convert(MY_RANK,forma='(i4.4)') ))//'.bin',  &
  &    recl=4*(nx/npx)*(ny/npy),access='direct',status='old')

     do k=1,nz
        read(1000+MY_RANK,rec=k) ((valp1(i,j,k),i=1,nx/npx),j=1,ny/npy)
     end do

  close(unit=1000+MY_RANK)

  icounter=0

  do k=1,nz
     do j=1,ny/npy
        do i=1,nx/npx
           icounter=icounter+1
           totsp2(icounter)=valp1(i,j,k)
        end do
     end do
  end do

  call MPI_BARRIER( MPI_COMM_WORLD, IERROR )

!-- gathering the partial data to the whole data

  call MPI_GATHER( totsp2, (nx/npx)*(ny/npy)*nz, MPI_REAL,  &
  &                totsp1, (nx/npx)*(ny/npy)*nz, MPI_REAL,  &
  &                root, MPI_COMM_WORLD, IERROR )

  call MPI_BARRIER( MPI_COMM_WORLD, IERROR )

  if(MY_RANK==root)then
!------------- DCL
     call SGISET('IFONT',2 )
     CALL GLRSET( 'RMISS', -999.0 )
     CALL GLLSET( 'LMISS', .TRUE. )
     call UZFACT(0.75)

     call color_setting( 80, (/0.0, 1.0/), min_tab=15999,  &
  &                      max_tab=95999, col_min=10, col_max=99 )

     call DclOpenGraphics(1)

!-- rearranging from comm array to data array

     icounter=0

     do jp=1,npy
        do ip=1,npx
           do k=1,nz
              do j=1,ny/npy
                 do i=1,nx/npx
                    icounter=icounter+1
                    ival1((ip-1)*nx/npx+i,(jp-1)*ny/npy+j,k)=totsp1(icounter)
                 end do
              end do
           end do
        end do
     end do

     do k=1,nz

        call phase1( nx, ny, ival1(:,:,k), oval0, oval1, oval2 )

        call Dcl_2D_cont_shade( 'Full(z='//  &
  &                             trim(adjustl(i2c_convert(k,forma='(i3.3)')))//')',  &
  &                             x, y, ival1(:,:,k), oval0,  &
  &                             (/0.0, 1.0/), (/0.0, 1.0e-4/),  &
  &                             (/'X (km)', 'Y (km)'/),  &
  &                             (/'(f6.1)', '(f6.1)'/),  &
  &                             c_num=(/10, 80/), no_tone=.true. )

        call Dcl_2D_cont_shade( 'WN0(z='//  &
  &                             trim(adjustl(i2c_convert(k,forma='(i3.3)')))//')',  &
  &                             x, y, ival1(:,:,k), oval1,  &
  &                             (/0.0, 1.0/), (/0.0, 1.0e-4/),  &
  &                             (/'X (km)', 'Y (km)'/),  &
  &                             (/'(f6.1)', '(f6.1)'/),  &
  &                             c_num=(/10, 80/), no_tone=.true. )

        call Dcl_2D_cont_shade( 'WN1(z='//  &
  &                             trim(adjustl(i2c_convert(k,forma='(i3.3)')))//')',  &
  &                             x, y, ival1(:,:,k), oval2,  &
  &                             (/0.0, 1.0/), (/0.0, 1.0e-4/),  &
  &                             (/'X (km)', 'Y (km)'/),  &
  &                             (/'(f6.1)', '(f6.1)'/),  &
  &                             c_num=(/10, 80/), no_tone=.true. )

     end do

     call DclCloseGraphics

  end if

!-- finishing MPI process

  call MPI_FINALIZE( IERROR )

end program phase4
