C                        lpm1 - parmacs version
C                        ----------------------
C            benctl.f - benchmark control, running on all nodes,
C                  
C                       master processor (mynum .eq. 0)
C                       slave processors (mynum .ne. 0)
C
C ----------------------------------------------------------------------
C
C
      SUBROUTINE MASTPROG()
C
      COMMON /GRAFIX/ MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,
     1       IBLK,IWHT,IRED,IGRN,IBLU,IYEL,IMAG,ICYA,SRPORT
      INTEGER MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,SRPORT
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
C
      IF(MYNUM .EQ. 0) THEN
C  code for master
C  continuous loop, exit by control c. avoids waiting for reload
 100  CONTINUE
C
      CALL CLEAR_SCREEN(IBLK)
C
      IY0=910
      CALL DRAW_DISC(500,IY0,10,ICYA)
C
      PRINT *,'immediately before  call lpm1ma'
      CALL LPM1MA
      PRINT *,'immediately after   call lpm1ma'
C
      CALL DRAW_DISC(550,IY0,10,IRED)
C
C  make sure all processors finish together
C  quit or start new case
      CALL BENTIM(4)
C
C  temporarilly remove for testing, make true for running
C     if(.true.) goto 100
C
C  end code for master
      ELSE
C  code for slaves
C  continuousloop to avoid program reload for new cases
 1001 CONTINUE
C
C  lpm1ma called instead of lpm1sl because there is no i/o
      CALL LPM1MA
C
C  make sure all processors finish together 
C  quit or start new case
      CALL BENTIM(4)
C
C  temporarily remove for testing, continuous loop for running
C     if(.true.) goto 1001
C
C  end code for slaves
      ENDIF
C
      END
C
       SUBROUTINE SNDATA(K)
C  reads data from keyboard of
C
C---------------------------------------------------------------------
Cl                  c2.2     physical state
       COMMON/COMSTT/
     R   B3    ,   BAV   ,   CD1   ,   CD2   ,   E1    ,   E2    ,
     R   P1    ,   P2    ,   Q1    ,   Q2    ,   REALTN,   TIMRUN
       DIMENSION
     R   B3(605,40),        BAV(605,40),       CD1(605,40),
     R   CD2(605,40),       E1(605,40),        E2(605,40),
     R   P1(20000),          P2(20000),          Q1(20000),
     R   Q2(20000)
C---------------------------------------------------------------------
Cl                  c3.1     numerical parameters
       COMMON/COMNUM/
     R   C1    ,   C2    ,   CD0   ,   CURANT,   DT    ,   DX1   ,
     R   DX1DX2,   DX1OX2,   DX2   ,   DX2OX1,   ELPERP,   HLFDX1,
     R   HLFDX2,   TCACO ,
     I   LMAX  ,   LMAXP ,   LMAXP2,   MMAX  ,   MMAXP ,   MMAXP2,
     I   NEMIT ,   NP
C
C---------------------------------------------------------------------
       COMMON/COMBAS/
     +   ALTIME,   CPTIME,   NLEDGE,   NLEND,    NLRES,    NONLIN,
     +   NOUT,     NPRINT,   NREAD,    NREC,     NRESUM,   NSTEP,
     +   STIME,    LABEL1,   LABEL2,   LABEL3,   LABEL4,   LABEL5,
     +   LABEL6,   LABEL7,   LABEL8,   NDIARY,   NIN,      NPUNCH,
     +   NRUN
       REAL        ALTIME,   CPTIME,   STIME
       LOGICAL     NLEND,    NLRES
       DIMENSION
     H   LABEL1(12),         LABEL2(12),         LABEL3(12),
     H   LABEL4(12),         LABEL5(12),         LABEL6(12),
     H   LABEL7(12),         LABEL8(12)
C
C       include 'comdev.inc'
C---------------------------------------------------------------------
Cl                  c2.3     device description
       COMMON/COMDEV/
     R   B3RHS ,   B3RHSD,   BAPLY ,   BAPLYD,   CAVEAM,   CAVEFR,
     R   CAVIC ,   CAVRES,   CAVTC ,   DEPTH ,   DEVHYT,   DEVLEN,
     R   DEVRAD,   EAPLYD,   EEMIT ,   EEMITD,   ENIT  ,   ENITD ,
     R   GAP   ,   RESAX ,   RESE  ,   RESN  ,   RESS  ,   RESW  ,
     R   RINNER,   SPACE ,   VOLTAG,   WIDTH ,
     I   LCATH ,   LCAV  ,   LLEFT ,   LWVANE,   MDEPTH,   MGAP  ,
     I   NBCAX ,   NBCCAV,   NBCE  ,   NBCN  ,   NBCS  ,   NBCW  ,
     I   NCAV  ,   NDEV
       DIMENSION
     R   CAVEAM(100),        CAVEFR(100),        CAVIC(4,100),
     R   CAVRES(100),        CAVTC(100),
     I   NBCCAV(100)
C---------------------------------------------------------------------
C       include 'c:\tinyv2r0\include\tiny.inc'
C
      COMMON /GRAFIX/ MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,
     1       IBLK,IWHT,IRED,IGRN,IBLU,IYEL,IMAG,ICYA,SRPORT
      INTEGER MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,SRPORT
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
      COMMON/BENINT/NBENSW,NBENOR,NBENCL,NBEN1,NBEN2,NBEN3,NBEN4,NBEN5
     1 ,NBEN6,NBEN7,NBEN8,NBEN9
C
      INTEGER IBUFF(256)
      REAL BUFF(256)
      EQUIVALENCE(IBUFF(1),BUFF(1))
C
      IF(MYNUM .EQ. 0) THEN
      IF(K .EQ. 1) THEN
       PRINT *,'new run setup'
       I=0
       IF(I .EQ. 0) NLRES=.FALSE.
       IF(I .EQ. 1) NLRES=.TRUE.
C  broadcast i to slaves
       IBUFF(1)=I
       CALL BRDCINT(MSPORT,IBUFF,1)
C
      ELSEIF(K .EQ. 2) THEN
C  setoff benchmark timing
       CALL BENTIM(0)
       NRUN=10
       TIMRUN=1.0
       IF(NBENSW .LT. 1 .OR. NBENSW .GT. 4) THEN
       PRINT *,'type nanosec to be simulated this run'
       PRINT *,'     1.0 is a good value'
       PRINT *,'type 0.0 to specify number of steps instead'
       READ *,TIMRUN
       ENDIF
       IF(TIMRUN .EQ. 0.0) THEN
         PRINT *,'type number of extra timesteps in this run'
         READ *,NEXTRA
         NRUN=NSTEP+NEXTRA
       ENDIF
C
       IFAC=1
       IF(NBEN3 .GE. 2 .AND. NBEN3 .LE. 4) THEN
C  scale problem for different benchmark sizes
         IFAC=2**(NBEN3-1)
       ENDIF
C  broadcast values to slaves
       IBUFF(1)=NRUN
       IBUFF(2)=IFAC
       BUFF(3)=TIMRUN
       CALL BRDCINT(MSPORT,IBUFF,3)
C  scale down 'electrons per superparticle'
         ELPERP=ELPERP/IFAC
C  scale up injection rate
         NEMIT=NEMIT*IFAC
C  scale up number of mesh cells in one direction
         LMAX=LMAX*IFAC
C  scale up device dimensions in cell widths
         LLEFT=LLEFT*IFAC
         LCAV=LCAV*IFAC
         LWVANE=LWVANE*IFAC
         LCATH=LCATH*IFAC
C  initialise boundary indices for sections of device
C  one section per processor
       DO 210 I=1,128
         ISLMIN(I)=1
         ISLMAX(I)=LMAX
 210   CONTINUE
C
       ZFLEN=FLOAT(LMAX)/FLOAT(NSLAV)
       IF(ZFLEN .LT. 1.0) THEN
         PRINT *,' too many processors for problem size'
         PRINT *,' maximum processors allowed is ',LMAX
         STOP
       ENDIF
       ISLMIN(1)=1
       DO 212 J=1,NSLAV-1
         ISLMAX(J)=J*ZFLEN
         ISLMIN(J+1)=ISLMAX(J)+1
 212   CONTINUE
       ISLMAX(NSLAV)=LMAX
C
C  broadcast values to slaves
       DO 300 I=1,128
         IBUFF(I)=ISLMIN(I)
 300   CONTINUE
       CALL BRDCINT(MSPORT,IBUFF,128)
       DO 301 I=1,128
         IBUFF(I)=ISLMAX(I)
 301   CONTINUE
       CALL BRDCINT(MSPORT,IBUFF,128)
C
       IF(NBENSW .GE. 1 .AND. NBENSW .LE. 4) GOTO 250
C  insert some test particles to test boundary exchange
C  calc boundary values for this processor (mynum=its taskid)
       JMIN=ISLMIN(MYNUM+1)
       JMAX=ISLMAX(MYNUM+1)
C  jmin,jmax are mesh point numbers; zxmin,zxmax are cell boundaries
       ZXMIN=JMIN
       ZXMAX=JMAX+1.0
       ZMID=(ZXMAX+ZXMIN)/2.0
       ZXPTC=ZXMIN+4.0
       IF(ZXPTC .GT. ZMID) ZXPTC=ZMID
C
       NP=NP+1
       Q1(NP)=ZXPTC
       Q2(NP)=12.5+0.4*MYNUM
       P1(NP)=-2.0
       P2(NP)=0.0
C
       ZXPTC=ZXMAX-4.0
       IF(ZXPTC .LT. ZMID) ZXPTC=ZMID
       NP=NP+1
       Q1(NP)=ZXPTC
       Q2(NP)=13.5+0.4*MYNUM
       P1(NP)=2.0
       P2(NP)=0.0
C
 250   CONTINUE
      ENDIF
C
C  end of master program
      ELSE
C  program for slaves
C
C  isrc is a dummy place to hold the received source number
      ISRC=3
C ............................................................
      IF(K .EQ. 1) THEN
C  default of initial run
       IBUFF(1)=0
C  receive i from master
       CALL RECVINT(MSPORT,0,IBUFF,1)
       I=IBUFF(1)
       IF(I .EQ. 0) NLRES=.FALSE.
       IF(I .EQ. 1) NLRES=.TRUE.
C ..............................................................
      ELSEIF(K .EQ. 2) THEN
C  start and initialise benchmark
       CALL BENTIM(0)
       IF(NBENSW .EQ. 99) NBENSW=0
C  receive values from master
       IBUFF(1)=10
       CALL RECVINT(MSPORT,0,IBUFF,4)
       NRUN=IBUFF(1)
       IFAC=IBUFF(2)
       TIMRUN=BUFF(3)
       IRUN1=IBUFF(4)
       IF(TIMRUN .EQ. 0.0 .AND. MYNUM .EQ. NSLAV) NRUN=IRUN1
C  scale down 'electrons per superparticle'
         ELPERP=ELPERP/IFAC
C  scale up injection rate 
         NEMIT=NEMIT*IFAC 
C  scale up number of mesh cells in one direction
         LMAX=LMAX*IFAC  
C  scale up device dimensions in cell widths 
         LLEFT=LLEFT*IFAC
         LCAV=LCAV*IFAC 
         LWVANE=LWVANE*IFAC 
         LCATH=LCATH*IFAC 
C
C  initialise boundary indices for sections of device
C  one section per processor
C  receive boundary indices from master
       CALL RECVINT(MSPORT,0,IBUFF,128)
       DO 3001 I=1,128
         ISLMIN(I)=IBUFF(I)
 3001  CONTINUE
       CALL RECVINT(MSPORT,0,IBUFF,128)
       DO 3011 I=1,128
         ISLMAX(I)=IBUFF(I)
 3011   CONTINUE
C
       IF(NBENSW .GE. 1 .AND. NBENSW .LE. 4) GOTO 2501
       IF(.TRUE.) GOTO 2501
C  insert some test particles to test boundary exchange
C  calc boundary values for this processor (mynum=its taskid)
       JMIN=ISLMIN(MYNUM+1)
       JMAX=ISLMAX(MYNUM+1)
C  jmin,jmax are mesh point numbers; zxmin,zxmax are cell boundaries
       ZXMIN=JMIN
       ZXMAX=JMAX+1.0
       ZMID=(ZXMAX+ZXMIN)/2.0
       ZXPTC=ZXMIN+4.0
       IF(ZXPTC .GT. ZMID) ZXPTC=ZMID
C
       NP=NP+1
       Q1(NP)=ZXPTC
       Q2(NP)=12.5+0.4*MYNUM
       P1(NP)=-2.0
       P2(NP)=0.0
C
       ZXPTC=ZXMAX-4.0
       IF(ZXPTC .LT. ZMID) ZXPTC=ZMID
       NP=NP+1
       Q1(NP)=ZXPTC
       Q2(NP)=13.5+0.4*MYNUM
       P1(NP)=2.0
       P2(NP)=0.0
C
 2501  CONTINUE
      ENDIF
C
C  end of slave program
       ENDIF
C
       END
C
C/ module c0s5
C
         SUBROUTINE RECORD(KLEDGE,KCALL,KRET)
C
C 0.5  read and write restart records
C
C
       IMPLICIT COMPLEX(C),  DOUBLE PRECISION(D), LOGICAL(L)
C
       CHARACTER S1148(16)*48,S118(5)*8
C
       COMMON/CHABAS/ S1148,S118
       COMMON/COMBAS/ R11(3),I11(108),L11(2)
       COMMON/COMDDP/ I19(65),L19(161)
       COMMON/COMCON/ R21(8)
       COMMON/COMSTT/ R22(225202)
       COMMON/COMDEV/ R23(823),I23(113)
       COMMON/COMDIA/ R24(327),I24(202)
       COMMON/COMKLY/ R25(4)
       COMMON/COMNUM/ R31(14),I31(8)
       COMMON/COMSCA/ R32(16)
       COMMON/COMHOK/ R41(36),I41(10)
       COMMON/COMGEO/ R42(13312),I42(21506)
       COMMON/COMOUT/ R51(17),I51(9)
C
C
      COMMON /GRAFIX/ MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,
     1       IBLK,IWHT,IRED,IGRN,IBLU,IYEL,IMAG,ICYA,SRPORT
      INTEGER MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,SRPORT
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
C
      DIMENSION BR11(113)
      EQUIVALENCE(R11(1),BR11(1))
C---------------------------------------------------------------------
         IF(.TRUE.) RETURN
         GO TO (100,200),KCALL
C---------------------------------------------------------------------
Cl              1.         write record
C
  100    CONTINUE
         WRITE(KLEDGE)
     .   S1148,S118,
     .   R11,I11,L11,I19,L19,R21,R22,R23,I23,R24,I24,R25,R31,I31,R32,
     .   S1148,S118,
     .   R41,I41,R42,I42,R51,I51
         RETURN
C
C---------------------------------------------------------------------
Cl              2.         read record
C
  200    CONTINUE
         READ(KLEDGE,END=202,ERR=203)
     .   S1148,S118,
     .   R11,I11,L11,I19,L19,R21,R22,R23,I23,R24,I24,R25,R31,I31,R32,
     .   S1148,S118,
     .   R41,I41,R42,I42,R51,I51
C
       IF(MYNUM .EQ. 0) THEN
C  master program
C  broadcast restart information to slaves
         CALL BRDCINT(MSPORT,R25,4)
       ELSE
C  slave program
C  receive restart information from master
         CALL RECVINT(MSPORT,ISRC,R25,4)
      ENDIF
C
C     success
  201    KRET=1
         RETURN
C
C     end of file
  202    KRET=2
         RETURN
C
C     error condition
  203    KRET=3
         RETURN
C
         END
C
         SUBROUTINE TMANAL
C-----------------------------------------------------------------------
       COMMON/COMBAS/
     +   ALTIME,   CPTIME,   NLEDGE,   NLEND,    NLRES,    NONLIN,
     +   NOUT,     NPRINT,   NREAD,    NREC,     NRESUM,   NSTEP,
     +   STIME,    LABEL1,   LABEL2,   LABEL3,   LABEL4,   LABEL5,
     +   LABEL6,   LABEL7,   LABEL8,   NDIARY,   NIN,      NPUNCH,
     +   NRUN
       REAL        ALTIME,   CPTIME,   STIME
       LOGICAL     NLEND,    NLRES
       DIMENSION
     H   LABEL1(12),         LABEL2(12),         LABEL3(12),
     H   LABEL4(12),         LABEL5(12),         LABEL6(12),
     H   LABEL7(12),         LABEL8(12)
C
C
      COMMON /GRAFIX/ MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,
     1       IBLK,IWHT,IRED,IGRN,IBLU,IYEL,IMAG,ICYA,SRPORT
      INTEGER MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,SRPORT
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
      COMMON/BENINT/NBENSW,NBENOR,NBENCL,NBEN1,NBEN2,NBEN3,NBEN4,NBEN5
     1 ,NBEN6,NBEN7,NBEN8,NBEN9
C
         COMMON/COMTIM/ SNTIME(20,20),SNTDIF(20,20),TLOOP(20)
         REAL ZLST10/0.0/
         SAVE ZLST10
C
C  output channel for screen
         IOUT=6
C
C  sntime(timpnt,mynum+1) time point timpnt, processor number mynum
C  1-proc calc is in mynum=nslav, times in position 20
C
         IF(MYNUM .EQ. NSLAV) THEN
           INUM1=20
         ELSE
           INUM1=MYNUM+1
           IF(INUM1 .GT. 19) INUM1=19
         ENDIF
C
         DO 100 I=2,20
           ZTIME=SNTIME(I,INUM1)-SNTIME(I-1,INUM1)
           SNTDIF(I,INUM1)=ZTIME
C          print *,'time difference at point ',i,'=',ztime,' seconds'
 100     CONTINUE
         SNTDIF(1,INUM1)=SNTIME(1,INUM1)-ZLST10
         TLOOP(INUM1)=SNTIME(10,INUM1)-ZLST10
         ZLST10=SNTIME(10,INUM1)
C  patch up time differences due to insertion of point 11,15,16,17
         SNTDIF(11,INUM1)=SNTIME(11,INUM1)-SNTIME(19,INUM1)
         SNTDIF(15,INUM1)=SNTIME(15,INUM1)-SNTIME(11,INUM1)
         SNTDIF(5,INUM1)=SNTIME(5,INUM1)-SNTIME(16,INUM1)
         SNTDIF(17,INUM1)=SNTIME(17,INUM1)-SNTIME(6,INUM1)
         SNTDIF(7,INUM1)=SNTIME(7,INUM1)-SNTIME(17,INUM1)
C  patch up time differences due to insertion of point 18,19
         SNTDIF(18,INUM1)=SNTIME(18,INUM1)-SNTIME(4,INUM1)
         SNTDIF(20,INUM1)=SNTIME(20,INUM1)-SNTIME(2,INUM1)
         SNTDIF(3,INUM1)=SNTIME(3,INUM1)-SNTIME(20,INUM1)
C  patch up time differences due to insertion of point 12
         SNTDIF(12,INUM1)=SNTIME(12,INUM1)-SNTIME(3,INUM1)
C  patch up time differences due to insertion of point 14
         SNTDIF(4,INUM1)=SNTIME(4,INUM1)-SNTIME(14,INUM1)
C
C  monitor progress to control screen every ten steps
         IF(MYNUM .EQ. 0 .AND. MOD(NSTEP,10) .EQ. 0)
     1     WRITE(IOUT,50) NSLAV,NSTEP
 50      FORMAT(1X,I2,'-processor  step = ',I4)
         IF(MYNUM .EQ. NSLAV .AND. MOD(NSTEP,10) .EQ. 0)
     1     WRITE(IOUT,51)NSTEP
 51      FORMAT(31X,'1-processor  step = ',I4)
C
         END
C
C
         SUBROUTINE TMOUT(K,IOUT)
C
C  k is processor number (ie value of mynum) for which timing analysis
C  is to be printed
C  iout is output channel, =6 for screen and nout for paper
C-----------------------------------------------------------------------
       COMMON/COMBAS/
     +   ALTIME,   CPTIME,   NLEDGE,   NLEND,    NLRES,    NONLIN,
     +   NOUT,     NPRINT,   NREAD,    NREC,     NRESUM,   NSTEP,
     +   STIME,    LABEL1,   LABEL2,   LABEL3,   LABEL4,   LABEL5,
     +   LABEL6,   LABEL7,   LABEL8,   NDIARY,   NIN,      NPUNCH,
     +   NRUN
       REAL        ALTIME,   CPTIME,   STIME
       LOGICAL     NLEND,    NLRES
       DIMENSION
     H   LABEL1(12),         LABEL2(12),         LABEL3(12),
     H   LABEL4(12),         LABEL5(12),         LABEL6(12),
     H   LABEL7(12),         LABEL8(12)
C
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
      COMMON/BENINT/NBENSW,NBENOR,NBENCL,NBEN1,NBEN2,NBEN3,NBEN4,NBEN5
     1 ,NBEN6,NBEN7,NBEN8,NBEN9
C
         COMMON/COMTIM/ SNTIME(20,20),SNTDIF(20,20),TLOOP(20)
C
C  sntime(timpnt,mynum+1) time point timpnt, processor number mynum
C  1-proc calc is in mynum=nslav, measured times are in position 20
C
         IF(MYNUM .NE. 0) RETURN
         IF(K .EQ. NSLAV) THEN
           INUM1=20
         ELSE
           INUM1=K+1
           IF(INUM1 .GT. 19) INUM1=19
         ENDIF
C
C  printout at end for master and 1-proc only
         IF(NBEN6 .GE. 1) RETURN
C
C  calculate distribution of time to arith, comms, graph, i/o
         ZLOOP=TLOOP(INUM1)
         ZARITH=SNTDIF(20,INUM1)+SNTDIF(13,INUM1)
     1   +SNTDIF(4,INUM1)+SNTDIF(11,INUM1)+SNTDIF(16,INUM1)   
     2   +SNTDIF(6,INUM1)+SNTDIF(7,INUM1)+SNTDIF(8,INUM1)   
     3   +SNTDIF(9,INUM1)+SNTDIF(10,INUM1)+SNTDIF(18,INUM1)   
         ZCOMMS=SNTDIF(12,INUM1)+SNTDIF(14,INUM1)+SNTDIF(15,INUM1)
     1   +SNTDIF(5,INUM1)+SNTDIF(17,INUM1)+SNTDIF(19,INUM1)   
         ZGRAPH=SNTDIF(2,INUM1)+SNTDIF(3,INUM1)
         ZOUT=SNTDIF(1,INUM1)
C  print results
         WRITE(IOUT,991) K
 991     FORMAT(1X,'     loop timing for processor :',I4,/
     1             '     -------------------------------'/)
 1001    FORMAT(1X,'output time =',F10.3,' ms')
 1002    FORMAT(1X,'scaplt time =',F10.3,' ms')
 1003    FORMAT(1X,'movcur time =',F10.3,' ms')
 1012    FORMAT(1X,'snxptc time =',F10.3,' ms')
 1013    FORMAT(1X,'cnorm  time =',F10.3,' ms')
 1014    FORMAT(1X,'snxcd  time =',F10.3,' ms')
 1004    FORMAT(1X,'bef.eb time =',F10.3,' ms')
 1018    FORMAT(1X,'surcur time =',F10.3,' ms')
 1019    FORMAT(1X,'snxcd0 time =',F10.3,' ms')
 1011    FORMAT(1X,'stepe  time =',F10.3,' ms')
 1015    FORMAT(1X,'snxe12 time =',F10.3,' ms')
 1016    FORMAT(1X,'stepb3 time =',F10.3,' ms')
 1005    FORMAT(1X,'snxb3  time =',F10.3,' ms')
 1006    FORMAT(1X,'bextrp time =',F10.3,' ms')
 1017    FORMAT(1X,'snxbav time =',F10.3,' ms')
 1007    FORMAT(1X,'surden time =',F10.3,' ms')
 1008    FORMAT(1X,'emitel time =',F10.3,' ms')
 1009    FORMAT(1X,'accel  time =',F10.3,' ms')
 1010    FORMAT(1X,'ebjcls time =',F10.3,' ms')
 1051    FORMAT(1X,'-----------------------')
 1050    FORMAT(1X,'loop   time =',F10.3,' seconds')
C
 1052    FORMAT(1X,'arith  time =',F10.3,' s,  =',F6.3,'%')
 1053    FORMAT(1X,'comms  time =',F10.3,' s,  =',F6.3,'%')
 1055    FORMAT(1X,'graph  time =',F10.3,' s,  =',F6.3,'%')
 1056    FORMAT(1X,'output time =',F10.3,' s,  =',F6.3,'%')
 1054    FORMAT(/)
C
         WRITE(IOUT,1001) 1000.0*SNTDIF(1,INUM1)
         WRITE(IOUT,1002) 1000.0*(SNTDIF(2,INUM1)+SNTDIF(3,INUM1))
         WRITE(IOUT,1003) 1000.0*SNTDIF(20,INUM1)
         WRITE(IOUT,1012) 1000.0*SNTDIF(12,INUM1)
         WRITE(IOUT,1013) 1000.0*SNTDIF(13,INUM1)
         WRITE(IOUT,1014) 1000.0*SNTDIF(14,INUM1)
         WRITE(IOUT,1004) 1000.0*SNTDIF(4,INUM1)
         WRITE(IOUT,1018) 1000.0*SNTDIF(18,INUM1)
         WRITE(IOUT,1019) 1000.0*SNTDIF(19,INUM1)
         WRITE(IOUT,1011) 1000.0*SNTDIF(11,INUM1)
         WRITE(IOUT,1015) 1000.0*SNTDIF(15,INUM1)
         WRITE(IOUT,1016) 1000.0*SNTDIF(16,INUM1)
         WRITE(IOUT,1005) 1000.0*SNTDIF(5,INUM1)
         WRITE(IOUT,1006) 1000.0*SNTDIF(6,INUM1)
         WRITE(IOUT,1017) 1000.0*SNTDIF(17,INUM1)
         WRITE(IOUT,1007) 1000.0*SNTDIF(7,INUM1)
         WRITE(IOUT,1008) 1000.0*SNTDIF(8,INUM1)
         WRITE(IOUT,1009) 1000.0*SNTDIF(9,INUM1)
         WRITE(IOUT,1010) 1000.0*SNTDIF(10,INUM1)
         WRITE(IOUT,1051)
         WRITE(IOUT,1050) ZLOOP
         WRITE(IOUT,1051)
         WRITE(IOUT,1052) ZARITH,100.0*ZARITH/ZLOOP
         WRITE(IOUT,1053) ZCOMMS,100.0*ZCOMMS/ZLOOP
         WRITE(IOUT,1055) ZGRAPH,100.0*ZGRAPH/ZLOOP
         WRITE(IOUT,1056) ZOUT,100.0*ZOUT/ZLOOP
         WRITE(IOUT,1054)
C
         END
C
         SUBROUTINE ENDRUN
C
C 4.2  terminate the run
C
C---------------------------------------------------------------------
Cl                  c1.1.    basic system parameters
       COMMON/COMBAS/
     +   ALTIME,   CPTIME,   NLEDGE,   NLEND,    NLRES,    NONLIN,
     +   NOUT,     NPRINT,   NREAD,    NREC,     NRESUM,   NSTEP,
     +   STIME,    LABEL1,   LABEL2,   LABEL3,   LABEL4,   LABEL5,
     +   LABEL6,   LABEL7,   LABEL8,   NDIARY,   NIN,      NPUNCH,
     +   NRUN
       REAL        ALTIME,   CPTIME,   STIME
       LOGICAL     NLEND,    NLRES
       DIMENSION
     H   LABEL1(12),         LABEL2(12),         LABEL3(12),
     H   LABEL4(12),         LABEL5(12),         LABEL6(12),
     H   LABEL7(12),         LABEL8(12)
C       include 'chabas.inc'
C---------------------------------------------------------------------
Cl                  c1.2.    basic system character parameters
C     chabas introduces character type for labels in fortran77
       CHARACTER*48
     +   CHLAB1,   CHLAB2,   CHLAB3,   CHLAB4,   CHLAB5,   CHLAB6,
     +   CHLAB7,   CHLAB8,   CHLAB9,   CHLB10,   CHLB11,   CHLB12,
     +   CHLB13,   CHLB14,   CHLB15,   CHLB16
       CHARACTER*8
     +   CHRDAT,   CHRTIM,   CHDATO,   CHTIMO,   CHREFN
       COMMON/CHABAS/
     +   CHLAB1,   CHLAB2,   CHLAB3,   CHLAB4,   CHLAB5,   CHLAB6,
     +   CHLAB7,   CHLAB8,   CHLAB9,   CHLB10,   CHLB11,   CHLB12,
     +   CHLB13,   CHLB14,   CHLB15,   CHLB16,   CHRDAT,   CHRTIM,
     +   CHDATO,   CHTIMO,   CHREFN


C       include 'comddp.inc'
C--------------------------------------------------------------------
Cl                  c1.9.    development and diagnostic parameters
       COMMON/COMDDP/
     I   MAXDUM,   MXDUMP,   NADUMP,   NCLASS,   NPDUMP,   NPOINT,
     I   NSUB,     NVDUMP,
     L   NLCHED,   NLHEAD,   NLOMT1,   NLOMT2,   NLOMT3,   NLREPT
       LOGICAL
     L   NLCHED,   NLHEAD,   NLOMT1,   NLOMT2,   NLOMT3,   NLREPT
       DIMENSION
     I   NADUMP(20),         NPDUMP(20),         NVDUMP(20),
     L   NLHEAD(9),          NLOMT1(50),         NLOMT2(50),
     L   NLOMT3(50)
C---------------------------------------------------------------------
C
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
C
       DATA        ICLASS,   ISUB/4,   2/
                                       CALL EXPERT(ICLASS,ISUB,1)
C
C---------------------------------------------------------------------
Cl              1.         write restart coordinates
C
C     reset nlres and nlend for restart
         NLRES=.TRUE.
         NLEND=.FALSE.
C     output data to channel nledge
C        open(unit=nledge,file=chlab8,err=200,form='unformatted')
C        call record(nledge,1,iret)
C        close(unit=nledge)
C        callmesage('  *****restart coordinates written*****         ' )
C final times
         CALL BLINES(2)
C        call daytim
C        print *,' time for processor 0: n-processor run'
         CALL RUNTIM
         CALL PAGE
C
         RETURN
C---------------------------------------------------------------------
 200     CALLMESAGE('  *****error opening restart file*****         ' )
         CALLMESAGE('  *****run abandoned*****         ' )
         STOP
C---------------------------------------------------------------------
         END
C
         SUBROUTINE BENTIM(K)
C  assemble and calculate genesis dmmp benchmark timing results
C
C---------------------------------------------------------------------
Cl                  c2.2     physical state
       COMMON/COMSTT/
     R   B3    ,   BAV   ,   CD1   ,   CD2   ,   E1    ,   E2    ,
     R   P1    ,   P2    ,   Q1    ,   Q2    ,   REALTN,   TIMRUN
       DIMENSION
     R   B3(605,40),        BAV(605,40),       CD1(605,40),
     R   CD2(605,40),       E1(605,40),        E2(605,40),
     R   P1(20000),          P2(20000),          Q1(20000),
     R   Q2(20000)
C
C---------------------------------------------------------------------
Cl                  c3.1     numerical parameters
       COMMON/COMNUM/
     R   C1    ,   C2    ,   CD0   ,   CURANT,   DT    ,   DX1   ,
     R   DX1DX2,   DX1OX2,   DX2   ,   DX2OX1,   ELPERP,   HLFDX1,
     R   HLFDX2,   TCACO ,
     I   LMAX  ,   LMAXP ,   LMAXP2,   MMAX  ,   MMAXP ,   MMAXP2,
     I   NEMIT ,   NP
C
C---------------------------------------------------------------------
Cl                  c1.1.    basic system parameters
       COMMON/COMBAS/
     +   ALTIME,   CPTIME,   NLEDGE,   NLEND,    NLRES,    NONLIN,
     +   NOUT,     NPRINT,   NREAD,    NREC,     NRESUM,   NSTEP,
     +   STIME,    LABEL1,   LABEL2,   LABEL3,   LABEL4,   LABEL5,
     +   LABEL6,   LABEL7,   LABEL8,   NDIARY,   NIN,      NPUNCH,
     +   NRUN
       REAL        ALTIME,   CPTIME,   STIME
       LOGICAL     NLEND,    NLRES
       DIMENSION
     H   LABEL1(12),         LABEL2(12),         LABEL3(12),
     H   LABEL4(12),         LABEL5(12),         LABEL6(12),
     H   LABEL7(12),         LABEL8(12)
C---------------------------------------------------------------------
C
      COMMON /GRAFIX/ MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,
     1       IBLK,IWHT,IRED,IGRN,IBLU,IYEL,IMAG,ICYA,SRPORT
      INTEGER MASKSZ,MSPORT,SSPORT,GPORT,GBOOT,G_TASKID,SRPORT
      COMMON /MTINY/ MYNUM,NSLAV,NTASK,ISLMIN(128),ISLMAX(128)
      COMMON/BENCOM/BENT0,BENT1,BENT2,BENT3,BENT4,BENT5,BENT6
     1 ,BENT7,BENT8,BENT9,T1P,TNP,SPEDUP
      COMMON/BENINT/NBENSW,NBENOR,NBENCL,NBEN1,NBEN2,NBEN3,NBEN4,NBEN5
     1 ,NBEN6,NBEN7,NBEN8,NBEN9
      COMMON/BENARR/NPART(1000),NPRNUM(1000),NPART1(1000)
         COMMON/COMTIM/ SNTIME(20,20),SNTDIF(20,20),TLOOP(20)
      COMMON/READIN/NBENSH,NBEN4H
C
      INTEGER IBUFF(1000)
      REAL BUFF(1000)
      EQUIVALENCE(IBUFF(1),BUFF(1))
      CHARACTER*80 CHARS,BLANK,QUIT
      CHARACTER STRNG1*1,STRNG2*8,FILOUT*15,ADJLFT*8
      EXTERNAL FUNCTION ADJLFT
      DATA BLANK/' '/,QUIT/'q'/
C        save bencom
C
         DIMENSION ZTARR(2)
C
         IF(MYNUM .EQ. 0) THEN
C  master program
         IF(K .EQ. 0) THEN
C  initialise benchmark
           CALL SECOWA(BENT0,ZTARR)
C  set defaults
           NBENSW=0
           NBENOR=1
C  nbencl=0 plot dots as black, i.e. erase them
C  nbencl=1 plot dots as colours, i.e. normal plotting
           NBENCL=1
C  nben1=0 single display, don't plot mynum=nslav (e.g. in movie demo)
C  nben1=1 split display, plot mynum=nslav (e.g. when testing)
           NBEN1=1
C  nben2=1 selects psuedo calculation without communication
C       =0 normal calculation
           NBEN2=0
C  nben3=1,2,3,4 selects problem size
           NBEN3=1
C  nben4=0 omit 1-processor calculation
C       =1 perform 1-processor calculation
           NBEN4=1
C  control extent of output
C  nben6=0 full output including time analysis of main loop
C       =1 performance metrics only, i.e. normal benchmark output
           NBEN6=1
C
           NBENSW=0
C  get input from /readin/ common
           NBENSW=NBENSH
C
           IF(NBENSW .EQ. 0) THEN
C  fixed movie display on whole screen of muli-proc only
             NBENOR=0
             NBEN1=0
             NBEN2=0
             NBEN3=1
             NBEN4=1
C
           ELSEIF(NBENSW .GE. 1 .AND. NBENSW .LE. 4) THEN
             NBEN3=NBENSW
             NBEN2=0
             NBEN4=1
C  get input from /readin/ common
             NBEN4=NBEN4H
C
           ELSEIF(NBENSW .EQ. 99) THEN
C  for testing, allow setting of orbits & split display as required
           PRINT *,' type 1 for case=1, mesh= 75x33'
           PRINT *,' type 2         =2,     =150x33'
           PRINT *,' type 3         =3,     =300x33'
           PRINT *,' type 4         =4,     =600x33'
           NBEN3=1
           READ(*,'(a80)') CHARS
           IF(CHARS .EQ. QUIT) STOP
           IF(CHARS .NE. BLANK) READ(CHARS,'(i10)') NBEN3
             PRINT *,' type 0 for movie'
             PRINT *,' type 1 for orbits'
             PRINT *,' type q to quit'
             NBENOR=0
             READ(*,'(a80)') CHARS
             IF(CHARS .EQ. QUIT) STOP
             IF(CHARS .NE. BLANK) READ(CHARS,'(i10)') NBENOR
             PRINT *,' type 0: full screen display of multi-proc only'
             PRINT *,' type 1: split display with single-proc added'
             PRINT *,' type q to quit'
             NBEN1=1
             READ(*,'(a80)') CHARS
             IF(CHARS .EQ. QUIT) STOP
             IF(CHARS .NE. BLANK) READ(CHARS,'(i10)') NBEN1
             PRINT *,' type 0: proper calculation with communication'
             PRINT *,' type 1: artificial calculation with no comms.'
             PRINT *,' type q to quit'
             NBEN2=0
             READ(*,'(a80)') CHARS
             IF(CHARS .EQ. QUIT) STOP
             IF(CHARS .NE. BLANK) READ(CHARS,'(i10)') NBEN2
             PRINT *,' type 0: omit 1-processor check calculation'
             PRINT *,' type 1: perform 1-processor check calculation'
             PRINT *,' type q to quit'
             NBEN4=1
             READ(*,'(a80)') CHARS
             IF(CHARS .EQ. QUIT) STOP
             IF(CHARS .NE. BLANK) READ(CHARS,'(i10)') NBEN4
         PRINT *,' type 0: full output with timing analysis of loop'
         PRINT *,' type 1: benchmark performance metrics only'
         PRINT *,' type q to quit'
             NBEN6=1
             READ(*,'(a80)') CHARS
             IF(CHARS .EQ. QUIT) STOP
             IF(CHARS .NE. BLANK) READ(CHARS,'(i10)') NBEN6
             NBENSW=0
           ENDIF
C
           IBUFF(1)=NBENSW
           IBUFF(2)=NBENOR
           IBUFF(3)=NBEN1
           IBUFF(4)=NBEN2
           IBUFF(5)=NBEN3
           IBUFF(6)=NBEN4
           IBUFF(7)=NBEN6
C  broadcast value to slaves
           CALL BRDCINT(MSPORT,IBUFF,7)
C  open different output file for each case
           CLOSE(UNIT=NOUT)
           WRITE(STRNG1,66) NBEN3
 66        FORMAT(I1)
           WRITE(STRNG2,67) NSLAV
 67        FORMAT(I8)
           FILOUT='lpm1c'//STRNG1//'p'//ADJLFT(STRNG2)
           OPEN(UNIT=NOUT,FILE=FILOUT)
C
         ELSEIF(K .EQ. 1) THEN
C  called just before entering timestep loop
         IALPHA=2**(NBEN3-1)
         WRITE(6,1100) NBEN3,IALPHA,LMAX,MMAX,NSLAV,NRUN
         CALL HEADER(NOUT)
         WRITE(NOUT,1102)
         WRITE(NOUT,1101)
         WRITE(NOUT,1100) NBEN3,IALPHA,LMAX,MMAX,NSLAV,NRUN
 1102    FORMAT(
     3 ' this benchmark is the simulation of an electronic device'/
     4 ' using a particle-mesh (pm) method, often also called a'/
     5 ' particle-in-cell (pic) simulation. in each timestep the'/
     6 ' electric and magnetic fields on an (lmax x mmax) mesh are'/
     7 ' advanced explicitly in time using maxwell''s equations, and'/
     8 ' the particles (electrons) are advanced in the fields using'/
     9 ' newton''s equations. '//
     1 ' the benchmark is described as local because the time scale'/
     1 ' is such that the fields may be computed explicitly, using'/
     2 ' fields only local to each mesh point. four benchmark cases'/
     3 ' are provided (nben3=1,2,3,4), giving four problem sizes'/
     4 ' described by the size factor alpha=1,2,4,8 and mesh numbers'/
     5 ' (75*alpha,33). the number of particles at the end of the'/
     6 ' run of 1 picosecond is given empirically by'/
     7 '                  628*alpha**1.172.  '/)
 1101    FORMAT(
     8 ' as the number of mesh-points increases for the same physical'/
     9 ' dimension, the time-step must be reduced to satisfy the cfl'/
     1 ' stability criterion.  this effect has an important influence'/
     2 ' on the meaning of the performance metrics. the performance'/
     3 ' is expressed in several different metrics (and units) for'/
     4 ' comparison purposes.  as well as the traditional speedup and'/
     5 ' efficiency, we give the temporal (tstep/s), simulation'/
     6 ' (sim-ps/s), and benchmark (mflop/s(lpm1)) performance, which'/
     1 ' are much more meaningful and useful measures.'//
     2 ' parallelisation is by one-dimensional domain decomposition,'/
     3 ' in the first coordinate. each processor is responsible for'/
     4 ' a slab of space, and stores the mesh-ponts and coordinates'/
     5 ' of particles in its region of space. during each timestep'/
     6 ' particle coordinates are transferred between processors as'/
     7 ' the particles move from region to region.'/)
C .................................................................
C  open files for 1-processor reslts
           OPEN(11,FILE='res1p.size1')
           OPEN(12,FILE='res1p.size2')
           OPEN(13,FILE='res1p.size3')
           OPEN(14,FILE='res1p.size4')
C  start of calculational benchmark
C  reset and start racetrack
           IF(NBENSW .LT. 1 .OR. NBENSW .GT. 4) CALL MARKST(1)
C  synchronise start of multi and uni-processor runs
           CALL BARRI(NSLAV)
           CALL SECOWA(BENT1,ZTARR)
C .................................................................
         ELSEIF(K .EQ. 2) THEN
C  called at end of stepon each timestep
C  remember number of particles in this processor this step
           ISTEP=NSTEP+1
           IF(ISTEP .GT. 1000) ISTEP=1000
           IF(ISTEP .LT. 2) ISTEP=2
           NPART(1)=MYNUM
           NPART(ISTEP)=NP
           NPRNUM(1)=MYNUM
           NPRNUM(ISTEP)=MYNUM
C  no racetrack when benchmarking
           IF(NBENSW .GE. 1 .AND. NBENSW .LE.4) RETURN
C  update racetrack
           CALL MARKST(2)
C .................................................................
         ELSEIF(K .EQ. 3) THEN
C  end of calculational benchmark
C  bent3 is multi-processor end time
           CALL SECOWA(BENT3,ZTARR)
C  finish multiprocessor race
           CALL MARKST(3)
C  count number of particles in multi-proc run
C  assume that 1-proc ends last
C  each processor sends its own number of particles as barrier signal
C  and its time to execute
      BAVNP=0.0
      J1=ISLMIN(MYNUM+1)
      J2=ISLMAX(MYNUM+1)
      DO 810 J=J1,J2
        BAVNP=BAVNP+BAV(J,12)
 810  CONTINUE
      INNP=NP
      DO 800 I=1,NSLAV-1
        CALL RECVINT(MSPORT,0,IBUFF,3)
        INNP=INNP+IBUFF(1)
C  time in ibuff(2) not used        
        BAVNP=BAVNP+BUFF(3)
 800  CONTINUE
C  1-processor calculation omitted if nben4=0
      IF(NBEN4 .EQ. 1) THEN
C  wait for uni-processor(nslav) to finish
        CALL RECVINT(MSPORT,0,IBUFF,3)
C  bent4 is uni-processor end time
           CALL SECOWA(BENT4,ZTARR)
        IN1P=IBUFF(1)
        T1P=BUFF(2)
        BAV1P=BUFF(3)
C  write 1-processor result to file
        WRITE(NBEN3+10,*) IN1P,T1P,BAV1P
      ELSEIF(NBEN4 .EQ. 0) THEN
        IN1P=1
        T1P=0.0
        BAV1P=0.0
        READ(NBEN3+10,*) IN1P,T1P,BAV1P
      ENDIF
C
        ZFAC=1.0/LMAX
        BAV1P=BAV1P*ZFAC
        BAVNP=BAVNP*ZFAC
C
C  send continuation signal to all slaves
      CALL BRDCINT(MSPORT,IBUFF,1)
C
C  ################################################### 1
C
C  receive number particles each step from slaves
        DO 840 J=1,NSLAV
          CALL RECVINT(MSPORT,0,IBUFF,NRUN+1)
C
          NPART1(1)=0
          NPRNUM(1)=0
          DO 850 JJ=2,NRUN+1
            IF(IBUFF(1) .NE. NSLAV) THEN
              IF (IBUFF(JJ) .GT. NPART(JJ)) THEN
C             replace particle number if newprocessor has more
C             ibuff(jj) is # particles, ibuff(1) is processor mynum
                NPART(JJ)=IBUFF(JJ)
                NPRNUM(JJ)=IBUFF(1)
              ENDIF
            ELSE
              NPART1(JJ)=IBUFF(JJ)
            ENDIF
 850      CONTINUE
C
 840    CONTINUE
C
C  send continuation signal to all slaves
      CALL BRDCINT(MSPORT,IBUFF,1)
C
C  ################################################### 2
C
C  calculate average number particles per step
        ZNPAR=0.0
        ZNPAR1=0.0
        DO 860 JJ=2,NRUN+1
          ZNPAR=ZNPAR+NPART(JJ)
          ZNPAR1=ZNPAR1+NPART1(JJ)
 860    CONTINUE
        ZNPAR=ZNPAR/NRUN
        ZNPAR1=ZNPAR1/NRUN
        ZPARAT=ZNPAR1/ZNPAR
C
      IF(NBEN6 .GE. 1) GOTO 875
      WRITE(NOUT,872) INNP,IN1P,ZNPAR,ZNPAR1,ZPARAT
      WRITE(6,872) INNP,IN1P,ZNPAR,ZNPAR1,ZPARAT
 872  FORMAT(/,' particle numbers'/
     1         ' ----------------'/
     1 '  number of particles in last step:',I6,' (n-proc),',
     1 I6,' (1-proc)'//
     1 '  average number of particles per timestep',/,
     1       1X,'n-proc =',F12.3,4X,'1-proc =',F12.3,4X,
     1          'ratio =',F12.3)
C
      WRITE(NOUT,871)
      WRITE(NOUT,870)(J-1,NPRNUM(J),NPART(J),NPART1(J),J=1,NRUN+1)
      WRITE(NOUT,*)
      WRITE(6,*)
 870  FORMAT(1X,I6,3I9)
 871  FORMAT(///' critical processor data'/
     1          ' -----------------------'/
     2 ' crit.proc is processor with most particles that',
     3 ' determines timing'//,22X,
     2 '  number particles each timestep'/
     1 '   step',4X,'crit.proc',4X,'n-proc',4X,'1-proc')
 875  CONTINUE
C
C
C  ################################################### 3
C
C
C  receive tmanal analysis from nslav (1-proc)
          CALL RECVINT(MSPORT,0,SNTDIF(1,20),20)
          CALL RECVINT(MSPORT,0,TLOOP(20),1)
C  print out timing analysis for n-proc using proc-0 analysis
      CALL TMOUT(0,6)
      CALL TMOUT(0,NOUT)
C  print out timing analysis for 1-proc using proc-nslav analysis
      CALL TMOUT(NSLAV,6)
      CALL TMOUT(NSLAV,NOUT)
C
C  send continuation signal to all slaves
      CALL BRDCINT(MSPORT,IBUFF,1)
C
C  ################################################### 4
C
C  calculate benchmark performance numbers and output
         PRINT *
C        print *,'bent0=',bent0,'bent1=',bent1,'bent3=',bent3,
C    1           'bent4=',bent4
C  temporal performance
           TNP=BENT3-BENT1
           T1PERF=NRUN/T1P
           TNPERF=NRUN/TNP
           ZTMRAT=TNPERF/T1PERF
C  speedup and efficiency
           SPEDUP=T1P/TNP
           EFFPCT=100.0*SPEDUP/NSLAV
C  simulation performance
           ZTRUN=1.0E+12*NRUN*DT
           SIMR1P=ZTRUN/T1P
           SIMRNP=ZTRUN/TNP
           ZSMRAT=SIMRNP/SIMR1P
C benchmark performance
           IALPHA=2**(NBEN3-1)
           INEND=628*IALPHA**1.172
           ZFLOPM=46*LMAX*MMAX
           ZFLOPP=58*INEND
           ZFLOPT=ZFLOPM+ZFLOPP
           ZMFPS1=1.0E-06*ZFLOPT*NRUN/T1P
           ZMFPSN=1.0E-06*ZFLOPT*NRUN/TNP
           ZBNRAT=ZMFPSN/ZMFPS1
C  errors
           PCTNP=100.0*FLOAT(INNP-IN1P)/FLOAT(INNP)
           PCTBAV=100.0*(BAVNP-BAV1P)/BAVNP
         PRINT *
 1100    FORMAT(10X,'basic run parameters'/
     1          10X,'===================='//
     3   ' case number,          nben3 =',I4/
     3   ' problem size factor,  alpha =',I4/
     4   '     mesh points in z,  lmax =',I4/
     5   '     mesh points in r,  mmax =',I4/
     7   ' number of processors,   p   =',I4/
     8   ' number of timesteps,   nrun =',I4//
     1          10X,'===================='/)
         WRITE(NOUT,1002) IN1P,INNP,BAV1P,BAVNP,PCTNP,PCTBAV
 1002    FORMAT(' error check'/
     1          ' -----------'/
     2   ' because the simulation uses random numbers, the multi-',
     3   'processor'/
     4   ' calculation cannot be expected to give identical results',
     5   ' to the'/
     6   ' uni-processor calculation. however, the percentage',
     7   ' difference'/
     8   ' in particle number, np, and average b-field, bav, ',
     9   ' in the last'/
     1   ' timestep, should not exceed a few percent:'//
     6   '     number particles,   np  =',I9,' (1-proc)',I9,' (p-proc)'/
     6   '     average b-field,   bav  =',1PE9.2,' (1-proc)'
     6                                   ,1PE9.2,' (p-proc)'//
     2   '     % difference np =',0PF8.3,'      % difference bav =',
     3   0PF8.3/'calculations are accepted if differences < 10%'//)
C
         WRITE(6,1022) IN1P,INNP,BAV1P,BAVNP,PCTNP,PCTBAV
 1022    FORMAT(' error check'/
     1          ' -----------'/
     6   '     number particles,   np  =',I9,' (1-proc)',I9,' (p-proc)'/
     6   '     average b-field,   bav  =',1PE9.2,' (1-proc)'
     6                                   ,1PE9.2,' (p-proc)'//
     2   '     % difference np =',0PF8.3,'      % difference bav =',
     3   0PF8.3/'calculations are accepted if differences < 10%'//)
C
         ZTOL=10.0
         IF(ABS(PCTNP) .LE. ZTOL .AND. ABS(PCTBAV) .LE. ZTOL) THEN
         WRITE(6,1010) 
         WRITE(NOUT,1010) 
 1010    FORMAT(' $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'//
     1          '        benchmark calculation acceptable'/
     2          '            you may use the results',//
     3          ' $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'//)
         ELSE
         WRITE(6,1011) 
         WRITE(NOUT,1011) 
 1011    FORMAT(' $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'//
     1          '     benchmark calculation seriously in error'/
     2          '               no results reported',//
     3          ' $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'//)
         RETURN
         ENDIF
C
         WRITE(NOUT,1003) T1P,TNP,T1PERF,TNPERF,ZTMRAT
 1003    FORMAT( ' temporal performance'/
     1           ' --------------------'/
     2 ' temporal performance is the inverse of the execution time,'/
     3 ' here expressed in units of timestep per second (tstep/s).'/
     4 ' this is the fundamental metric of performance, because it'/ 
     5 ' is in absolute units and one can guarantee that the code with'/
     5 ' the highest temporal performance executes in the least time:'//
     2   '  uniprocessor   time,          t1p =',F12.4,' s'/
     4   '  multiprocessor time,          tnp =',F12.4,' s'/
     6   '                                -------------'/
     5   '  temporal perf. (1-proc) = t1p**-1 =',F12.4,' tstep/s'/
     5   '  temporal perf. (n-proc) = tnp**-1 =',F12.4,' tstep/s'/
     6   '                              ratio =',F12.4,//)
         WRITE(NOUT,1030) T1P,TNP,SPEDUP,EFFPCT
 1030    FORMAT( ' speedup and efficiency'/
     1           ' ----------------------'/
     2   ' speedup, sp, has the traditional definition of the ratio of'/
     3   ' 1-proc to n-proc. execution time, and efficiency, ep, is'/
     4   ' speedup per processor. because speedup is a relative'/ 
     5   ' measure, the program with the highest speedup may not'/ 
     6   ' execute in the least time! be warned.'//
     2   '   uniprocessor   time,  t1p =',F12.3,' s'/
     4   '   multiprocessor time,  tnp =',F12.3,' s'/
     6   '                             -------------'/
     7   '    speedup,    sp = t1p/tnp =',F12.3/
     8   '    efficiency, ep = sp/p    =',F12.3,'%'/
     9   '                             -------------'/)
         ZDT=1.0E+12*DT
         WRITE(NOUT,1001) ZDT,ZTRUN,TIMRUN,SIMR1P,SIMRNP,ZSMRAT
 1001    FORMAT(' simulation performance'/
     1          ' ----------------------'/
     2   ' this metric measures the amount of simulated time computed'/
     3   ' in one real wall-clock second. it is the most meaningful'/
     4 ' metric for a simulation because it is what the user actually'/
     5   ' wishes to maximise. for this benchmark, the units are '/
     6   ' simulated picosecond per second (sim-ps/s). in this metric'/
     7   ' larger problems with more mesh points run slower (which in'/
     8   ' fact they do), although they generate more speedup and'/
     9   ' mflop/s! this metric also includes the fact problems with'/
     1   ' a smaller space step often must use a smaller timestep,'/
     2   ' and therefore take more timesteps to cover the same amount'/
     3   ' of simulated time '//
     2   '    timestep,   dt =',F12.3,' ps'/
     3   '    simulated time  =  nrun*dt =',F10.3,' sim-ps;',
     4   '  requested =',F8.3,' ns'/
     5   '    simulation performance(1-proc) =',F12.3,' sim-ps/s'/
     6   '    simulation performance(n-proc) =',F12.3,' sim-ps/s'/
     6   '                             ratio =',F12.3,//)
         WRITE(NOUT,1013)
         WRITE(NOUT,1012) ZFLOPM,ZFLOPP,ZFLOPT,ZMFPS1,ZMFPSN,ZBNRAT
 1013    FORMAT(' benchmark performance'/
     1          ' ---------------------'/
     2 ' this metric is calculated from the nominal number of'/
     3 ' floating-point operations needed to perform the benchmark'/
     4 ' on a single processor.  for the one-nanosecond benchmark'/
     5 ' setup here, the average number of floating-point operations'/
     6 ' per timestep is defined to be:'/
     5 '         f_b(alpha) = 46*75*33*alpha + 58*628*alpha**1.172'/
     6 ' where the size factor alpha=1,2,4,8 for cases nben3=1,2,3,4.'/
     7 ' the first term above is the work to update the fields on the'/
     7 ' mesh, and the second term is the work to move the particles.'/
     7 ' then the benchmark performance is'/
     8 '         r_b(alpha,p) = f_b(alpha)/tp(alpha,p)'/
     9 ' performance calculated in this way has the units '/
     1 ' mflop/s(lpm1). different parallel implementations may,'/
     2 ' in fact, perform more or fewer operations than the above, but'/
     3 ' they are only credited with the number given by the formula.'/
     4 ' because f_b is fixed for all codes, we can quarantee that the'/
     5 ' code with the highest benchmark performance executes in the'/
     6 ' least time.'/)
 1012    FORMAT(
     2   '    floating-point operations per timestep:'/
     3   '       mesh =',E10.3,'  particles =',E10.3,
     3   '  total =',E10.3,' flop',/
     4   '    floating-point operations per second (all steps):'/
     5 '       benchmark performance(1-proc) =',F12.3,' mflop/s(lpm1)'/
     6 '       benchmark performance(n-proc) =',F12.3,' mflop/s(lpm1)'/
     6 '                               ratio =',F12.3,//)
C
         WRITE(6,1040)
         WRITE(6,1041) TNP,T1P,NRUN,TNPERF,T1PERF,SPEDUP,EFFPCT,
     1                 SIMRNP,SIMR1P,ZMFPSN,ZMFPS1
         WRITE(NOUT,1040)
         WRITE(NOUT,1041) TNP,T1P,NRUN,TNPERF,T1PERF,SPEDUP,EFFPCT,
     1                 SIMRNP,SIMR1P,ZMFPSN,ZMFPS1
 1040    FORMAT(
     1 ' **********************************************************'/
     1 ' *                  performance summary                   *'/
     2 ' **********************************************************'/
     3 ' *             particle-mesh (pic) simulation             *'/
     3 ' *       of one nanosecond of electronic device time      *'/
     3 ' *         parallelised by 1d domain decomposition        *'/
     3 ' *--------------------------------------------------------*'/
     3 ' *                                                        *')
 1041    FORMAT(
     3 ' *                     n-proc      1-proc                 *'/
     3 ' *   elap. time :',2F12.3,              ' s               *'/
     3 ' *   numb. step :',I6,                  ' tstep           *'/
     3 ' *     temporal :',2F12.3,              ' tstep/s         *'/
     4 ' *     speedup  :',F12.3,   '                             *'/
     5 ' *   efficiency :',F12.3,   ' %                           *'/
     6 ' *   simulation :',2F12.3,              ' sim-ps/s        *'/
     7 ' *   benchmark  :',2F12.3,              ' mflop/s(lpm1)   *'/
     3 ' *                                                        *'/
     3 ' *--------------------------------------------------------*'/
     3 ' *   tstep/s    -  timestep per second                    *'/
     3 ' *   sim-ps/s   -  simulated picosec per second           *'/
     3 ' *   mflop/s    -  10**6 floating-point op. per second    *'/
     2 ' **********************************************************'//)
C
         IF(NBENSW .GE. 1 .AND. NBENSW .LE. 4) THEN
C  plot all particles at end of benchmark run on split screen
           NBEN1=1
           CALL SCAPLT(2)
         ENDIF
C
         CALL BARRI(NSLAV)
         CALL CLEAR_SCREEN(0)
         CALL BARRI(NSLAV)
C  set to full screen
         NBEN1=0
         CALL SNPFLD(1) 
         CALL SNPFLD(2) 
C ...............................................................
         ELSEIF(K .EQ. 4) THEN
C  tidy up at end of program, this is last action of program
C  close files for 1-processor reslts
           CLOSE(11)
           CLOSE(12)
           CLOSE(13)
           CLOSE(14)
C  make sure all processors leave together
           PRINT *,' ******************************'
           PRINT *,' Output for all 4 cases are sent'
           PRINT *,' to different files,'
           PRINT *,' This Benchmark Output File is:'
           PRINT *,FILOUT
           PRINT *,' copy it now from another window'
           PRINT *,' if you want it somewhere else'
           PRINT *
           PRINT *,'    cp ',FILOUT,'  yourfilename'
           PRINT *
C          print *,' Control c to finish'
           PRINT *,' ******************************'
C          read(*,'(a80)') chars
           ISTOP=0
           IF(CHARS .EQ. QUIT) ISTOP=1
           IBUFF(1)=ISTOP
           CALL BRDCINT(MSPORT,IBUFF,1)
           IF(ISTOP .EQ. 1) STOP
           CALL BARRI(NSLAV)
         ENDIF
C
         ELSE
C  program for slaves
C
         IF(K .EQ. 0) THEN
C  initialise benchmark
C  set defaults 
           NBENSW=0
           NBENOR=1
           NBENCL=1 
           NBEN1=1 
           NBEN2=0
           NBEN3=1
C  read in values from master
           CALL RECVINT(MSPORT,0,IBUFF,7) 
           NBENSW=IBUFF(1)
           NBENOR=IBUFF(2)
           NBEN1=IBUFF(3)
           NBEN2=IBUFF(4)
           NBEN3=IBUFF(5)
           NBEN4=IBUFF(6)
           NBEN6=IBUFF(7)
C ...........................................................
         ELSEIF(K .EQ. 1) THEN
           IF(NBEN4 .EQ. 0 .AND. MYNUM .EQ. NSLAV) NRUN=1
C  start of calculational benchmark
C  reset and start racetrack
           IF(NBENSW .LT. 1 .OR. NBENSW .GT. 4) CALL MARKST(1)
C  synchronise start of multi and uni-processor runs
           CALL BARRI(NSLAV)
           CALL SECOWA(BENT1,ZTARR)
C ............................................................
         ELSEIF(K .EQ. 2) THEN
C  called at end of stepon each timestep
C  remember number of particles in this processor this step
           ISTEP=NSTEP+1
           IF(ISTEP .GT. 1000) ISTEP=1000
           IF(ISTEP .LT. 2) ISTEP=2
           NPART(1)=MYNUM
           NPART(ISTEP)=NP
C  no racetrack when benchmarking
           IF(NBENSW .GE. 1 .AND. NBENSW .LE. 4) RETURN
C  update racetrack
           CALL MARKST(2)
C ............................................................
         ELSEIF(K .EQ. 3) THEN
C  end of calculational benchmark
C  finish uniprocessor race
           CALL SECOWA(BENT4,ZTARR)
           CALL MARKST(3)
C  number of particles, np, time and bav sent as barrier signal
      BAVNP=0.0
      J1=ISLMIN(MYNUM+1)
      J2=ISLMAX(MYNUM+1)
      DO 8101 J=J1,J2
       BAVNP=BAVNP+BAV(J,12)
 8101 CONTINUE
      IBUFF(1)=NP
      BUFF(2)=BENT4-BENT1
      BUFF(3)=BAVNP
      IF(NBEN4 .EQ. 0 .AND. MYNUM .EQ. NSLAV) GOTO 830
      CALL SENDINT(MSPORT,0,IBUFF,3)
 830  CONTINUE
C
C  wait for continuation signal
      CALL RECVINT(MSPORT,0,IBUFF,1)
C
C  ################################################# 1
C
C  send number particles in each step to master
      CALL SENDINT(MSPORT,0,NPART,NRUN+1)
C
C  master (proc0) works out which is critical processor
C  and prints critical number of particles
C
C  wait for continuation signal
      CALL RECVINT(MSPORT,0,IBUFF,1)
C
C  ################################################# 2
C
C  master prints average number of critical particles/step
C
C  ################################################# 3
C
C  send tmanal timing analysis of 1-proc (#nslav) to master
      IF(MYNUM .EQ. NSLAV) THEN
        CALL SENDINT(MSPORT,0,SNTDIF(1,20),20)
        CALL SENDINT(MSPORT,0,TLOOP(20),1)
      ENDIF
C
C  master prints out timing analysis
C
C  wait for continuation signal
      CALL RECVINT(MSPORT,0,IBUFF,1)
C
C  ################################################# 4
C
         IF(NBENSW .GE. 1 .AND. NBENSW .LE. 4) THEN
C  plot all particles at end of benchmark run on split screen
           NBEN1=1
           CALL SCAPLT(2)
         ENDIF
C  wait for show-field signal
           CALL BARRI(NSLAV)  
C  wait for clear-screen to finish
           CALL BARRI(NSLAV)  
C  set to full screen
           NBEN1=0
           IF(.FALSE.) GOTO 205
           CALL SNPFLD(1) 
           CALL SNPFLD(2) 
 205       CONTINUE
C ................................................................
         ELSEIF(K .EQ. 4) THEN
C  tidy up at end of program, this is last action of program
C  make sure all processors leave sn together
           CALL RECVINT(MSPORT,0,IBUFF,1)
           ISTOP=IBUFF(1)
           IF(ISTOP .EQ. 1) STOP
           CALL BARRI(NSLAV)
         ENDIF
C
C  end program for slaves
         ENDIF
C
         END
C
         FUNCTION ADJLFT(STRING)
C  removes leading blanks from string
         CHARACTER*8 ADJLFT,STRING
         CHARACTER ST1*8/'        '/,BLANK*1/' '/
C
C  isw=0 means leading blank, isw=1 means enclosed blank
         ISW=0
         I=1
         DO 100 J=1,8
           IF(STRING(J:J) .NE. BLANK .OR. ISW .EQ. 1) THEN
             ST1(I:I)=STRING(J:J)
             J=J+1
             ISW=1
           ENDIF
 100     CONTINUE
         ADJLFT=ST1

         END

