Fortran DVM - contents Part 1 (1-4) Part 2 (5-6) Part 3 (7-12) Part 4 (Annexes)
created: april 2001 - last edited 03.05.01 -

Annex 1. Syntax rules

2.2. Syntax of FDVM directives

directive-line is      CDVM$ dvm-directive
  or    *DVM$ dvm-directive
   
dvm-directive is      specification-directive
  or     executable-directive
   
specification-directive is      processors-directive
  or     align-directive
  or     distribute-directive
  or     template-directive
  or     pointer-directive
  or     shadow-directive
  or     dynamic-directive
  or     inherit-directive
  or     remote-group-directive
  or     reduction-group-directive
  or     task-directive
  or     heap-directive
  or     asyncid-directive
   
executable-directive is      realign-directive
  or     redistribute-directive
  or     parallel-directive
  or     remote-access-directive
  or     shadow-group-directive
  or     shadow-start-directive
  or     shadow-wait-directive
  or     reduction-start-directive
  or     reduction-wait-directive
  or     new-value-directive
  or     prefetch-directive
  or     reset-directive
  or     parallel-task-loop-directive
  or     map-directive
  or     task-region-directive
  or     end-task-region-directive
  or     on-directive
  or     end-on-directive
  or     f90-directive
  or     asynchronous-directive
  or     end-asynchronous-directive
  or     asyncwait-directive

Constraints:

Definition. A specification expression is an expression where each primary is:

  1. A constant,
  2. A variable that is a dummy argument,
  3. A variable that is in a common block,
  4. An intrinsic function reference where each argument is a specification expression,
  5. A specification expression enclosed in parentheses.

3. Virtual processor arrangements. PROCESSORS directive

processors-directive is      PROCESSORS processors-decl-list
   
processors-decl is      processors-name ( explicit-shape-spec-list )
   
explicit-shape-spec is      [ lower-bound : ] upper-bound
   
lower-bound is      int-expr
   
upper-bound is      int-expr

4.1. DISTRIBUTE and REDISTRIBUTE directives

distribute-directive is      dist-action distributee dist-directive-stuff
  or     dist-action [ dist-directive-stuff ] :: distributee-list
   
dist-action is      DISTRIBUTE
  or     REDISTRIBUTE
   
dist-directive-stuff is      dist-format-list [ dist-onto-clause ]
   
distributee is      array-name
  or     pointer-name
  or     template-name
   
dist-format is      BLOCK
  or     GEN_BLOCK ( block-size-array )
  or     WGT_BLOCK ( block-weight-array , nblock )
  or     *
   
dist-onto-clause is      ONTO dist-target
   
dist-target is      processors-name [(processors-section-subscript-list )]
  or     task-name ( task-index )
   
processors-section-subscript is      [ subscript ] : [ subscript ]
   
subscript is      int-expr
   
nblock is      int-expr
   
block-size-array is      array-name
   
block-weight-array is      array-name

Constraints:

4.2.2. Dynamic arrays in FDVM model. POINTER directive

pointer-directive is      type , POINTER ( dimension-list ) :: pointer-name-list
   
dimension is      :
   
pointer-name is      scalar-int-variable-name
  or     int-array-name
   
heap-directive is      HEAP array-name-list

4.3.1. ALIGN and REALIGN directives

align-directive is      align-action alignee align-directive-stuff
  or     align-action [ align-directive-stuff ] :: alignee-list
align-action is      ALIGN
  or     REALIGN
   
align-directive-stuff is      ( align-source-list ) align-with-clause
   
alignee is      array-name
  or     pointer-name
   
align-source is      *
  or     align-dummy
   
align-dummy is      scalar-int-variable
   
align-with-clause is      WITH align-spec
   
align-spec is      align-target ( align-subscript-list )
   
align-target is      array-name
  or      template-name
  or      pointer-name
   
align-subscript is       int-expr
  or      align-dummy-use
  or      *
   
align-dummy-use is      [ primary-expr * ] align-dummy
         [ add-op primary-expr ]
   
primary-expr is      int-constant
  or     int-variable
  or     ( int-expr )
   
add-op is      +
  or     -

Constraints:

4.3.2. TEMPLATE directive

template-directive is      TEMPLATE template-decl-list
   
template-decl is      template-name [ ( explicit-shape-spec-list ) ]

4.4. DYNAMIC and NEW_VALUE directives

dynamic-directive is      DYNAMIC alignee-or-distributee-list
   
alignee-or-distributee is      alignee
  or    distributee
   
new-value-directive is      NEW_VALUE

5.1.2. Distribution of loop iterations. PARALLEL directive

parallel-directive is      PARALLEL ( do-variable-list )
                ON iteration-align-spec
        [ , new-clause ] [ , reduction-clause]
        [ , shadow-renew-clause] [ , shadow-compute-clause]
        [ , remote-access-clause ] [ , across-clause ]
   
iteration-align-spec is      align-target ( iteration-align-subscript-list )
   
iteration-align-subscript is      int-expr
  or    do-variable-use
  or    *
   
do-variable-use is      [ primary-expr * ] do-variable
         [ add-op primary-expr]

5.1.3. Private variables. NEW clause

new-clause is      NEW ( new-variable-list )
   
new-variable is      array-name
  or    scalar-variable-name

Constraint: The distributed arrays cannot be used as NEW-variables.

5.1.4. Reduction operations and variables. REDUCTION clause

reduction-clause is      REDUCTION
                ( [ reduction-group-name : ] reduction-op-list )
   
reduction-op is      reduction-op-name ( reduction-variable )
  or    reduction-loc-name ( reduction-variable ,
                                           location-variable, int-expr)
   
reduction-variable is      array-name
  or    scalar-variable-name
   
location-variable is      array-name
   
reduction-op-name is      SUM
  or    PRODUCT
  or    MAX
  or    MIN
  or    AND
  or    OR
  or    EQV
  or    NEQV
   
reduction-loc-name is      MAXLOC
  or    MINLOC

Constraints:

6.2.1. Specification of array with shadow edges

shadow-directive is      SHADOW dist-array ( shadow-edge-list )
  or     SHADOW ( shadow-edge-list ) :: dist-array-list
   
dist-array is      array-name
  or     pointer-name
   
shadow-edge is      width
  or     low-width : high-width
   
width is      int-expr
low-width is      int-expr
high-width is      int-expr

Constraints:

6.2.2. Synchronous specification of independent references of SHADOW type for single loop

shadow-renew-clause is      SHADOW_RENEW ( renewee-list )
  or     shadow-start-directive
  or     shadow-wait-directive
   
renewee is      dist-array-name [ ( shadow-edge-list )] [ (CORNER) ]

Constraints:

6.2.3. Computing values in shadow edges. SHADOW_COMPUTE clause

shadow-compute-clause is      SHADOW_COMPUTE

6.2.4. ACROSS specification of dependent references of SHADOW type for single loop

across-clause is      ACROSS ( dependent-array-list )
   
dependent-array is      dist-array-name ( dependence-list ) [(section-spec-list)]
   
dependence is      flow-dep-length : anti-dep-length
   
flow-dep-length is      int-constant
   
anti-dep-length is      int-constant
   
section-spec is      SECTION (section-subscript-list)

Constraint:

6.2.5. Asynchronous specification of independent references of SHADOW type

shadow-group-directive is      SHADOW_GROUP shadow-group-name
                                      ( renewee-list )
   
shadow-start-directive is      SHADOW_START shadow-group-name
   
shadow-wait-directive is      SHADOW_WAIT shadow-group-name

Constraints:

6.3.1. REMOTE_ACCESS directive

remote-access-directive is      REMOTE_ACCESS
         ( [ remote-group-name : ] regular-reference-list)
   
regular-reference is      dist-array-name [( regular-subscript-list )]
   
regular-subscript is      int-expr
  or     do-variable-use
  or     :
   
remote-access-clause is     remote-access-directive

6.3.3. Asynchronous specification of REMOTE type references

remote-group-directive is      REMOTE_GROUP remote-group-name-list

Constraint:

prefetch-directive is      PREFETCH remote-group-name
   
reset-directive is      RESET remote-group-name

Constraints:

6.3.4.2.1. ASYNCID directive

asyncid-directive is      ASYNCID async-name-list

6.3.4.2.2. F90 directive

f90-directive is      F90 copy-statement
   
copy-statement is      array-section = array-section
   
array-section is      array-name [ ( section-subscript-list ) ]
   
section-subscript is      subscript
  or     subscript-triplet
   
subscript-triplet is      [ subscript ] : [ subscript ] [ : stride]
   
subscript is      int-expr
   
stride is      int-expr

6.3.4.2.3. ASYNCHRONOUS and END ASYNCHRONOUS directives

asynchronous-construct is      asynchronous-directive
                       f90-directive
                     [ f90-directive ]
                     copy-loop
                     [ copy-loop ]
           end-asynchronous-directive
   
asynchronous-directive is      ASYNCHRONOUS async-name
   
end-asynchronous-directive is      END ASYNCHRONOUS

6.3.4.2.4. ASYNCWAIT directive

asyncwait-directive is      ASYNCWAIT async-name

6.4.2. Asynchronous specification of REDUCTION type references

reduction-group-directive is      REDUCTION_GROUP reduction-group-name-list
   
reduction-start-directive is      REDUCTION_START reduction-group-name
   
reduction-wait-directive is      REDUCTION_WAIT reduction-group-name

Constraints.

7.1. Declaration of task array

task-directive is      TASK task-list
   
task is      task-name ( max-task )
   

7.2. Mapping tasks on processors. MAP directive

map-directive is      MAP task-name ( task-index )
      ONTO processors-name(processors-section-subscript-list)

7.4. Distribution of computations. TASK_REGION directive

block-task-region is      task-region-directive
                   on-block
                 [ on-block ]...
           end-task-region-directive
   
task-region-directive is      TASK_REGION task-name [ , reduction-clause ]
   
end-task-region-directive is      END TASK_REGION
   
on-block is      on-directive
                  block
           end-on-directive
   
on-directive is      ON task-name ( task-index ) [ , new-clause ]
   
end-on-directive is      END ON
   
loop-task-region is      task-region-directive
                   parallel-task-loop
           end-task-region-directive
   
parallel-task-loop is      parallel-task-loop-directive
                        do-loop
   
parallel-task-loop-directive is      PARALLEL ( do-variable )
    ON task-name ( do-variable ) [ , new-clause ]

9. Procedures

inherit-directive is      INHERIT dummy-array-name-list

 

Annex2. Code examples

Seven small scientific programs are presented to illustrate Fortran DVM language features. They are intended for solving a system of linear equations:

A x = b

where:
A - matrix of coefficients,
b - vector of free members,
x - vector of unknowns.

The following basic methods are used for solving this system.

Direct methods. The well-known Gaussian Elimination method is the most commonly used algorithm of this class. The main idea of this algorithm is to reduce the matrix A to upper triangular form and then to use backward substitution to diagonalize the matrix.

Explicit iteration methods. Jacobi Relaxation is the most known algorithm of this class. The algorithm perform the following computation iteratively

xi,jnew = (xi-1,jold + xi,j-1old + xi+1,jold + xi,j+1old ) / 4

Implicit iteration methods. Successive Over Relaxation (SOR) refers to this class. The algorithm performs the following calculation iteratively

xi,jnew = ( w / 4 ) * (xi-1,jnew + xi,j-1new + xi+1,jold + xi,j+1old ) + (1-w) * xi,jold

By using “red-black” coloring of variables each step of SOR consists of two half Jacobi steps. One processes “red”variables and the other processes “black” variables. Coloring of variables allows to overlap calculation and communication.

Example 1. Gauss elimination algorithm

	PROGRAM GAUSS
C	Solving linear equation system  Ax = b
	PARAMETER  ( N = 100 )
	REAL  A( N, N+1 ), X( N )
C	A : Coefficient matrix with dimension (N,N+1)
C	Right hand side vector of linear equations is stored 
C	into last column (N+1)-th, of matrix A
C	X : Unknown vector
C	N : Number of linear equations
*DVM$	DISTRIBUTE A (BLOCK,*)  
*DVM$	ALIGN X(I) WITH A(I,N+1)
C
C	Initialization
C
*DVM$	PARALLEL ( I ) ON  A(I,*)
	DO  100  I = 1, N
	DO  100  J = 1, N+1
	  IF  (( I .EQ. J )  THEN
	      A(I,J) = 2.0
	  ELSE
	    IF ( J .EQ. N+1)  THEN
	      A(I,J) = 0.0
	    ENDIF
	  ENDIF
100	CONTINUE
C
C	Elimination
C
	DO  1  I = 1, N
C	the I-th row of array A will be buffered before
C	execution of I-th iteration, and references A(I,K), A(I,I) 
C	will be replaced with corresponding reference to buffer 
*DVM$	PARALLEL ( J ) ON  A(J,*), REMOTE_ACCESS ( A(I,:) )
	   DO  5  J = I+1, N
	   DO  5  K = I+1, N+1
	      A(J,K) = A(J,K) - A(J,I) * A(I,K) / A(I,I)
5	   CONTINUE
1	CONTINUE
C	First calculate X(N)
	X(N) = A(N,N+1) / A(N,N)
C
C	Solve X(N-1), X(N-2), ...,X(1) by backward substitution
C
	DO  6  J = N-1, 1, -1
C	the (J+1)-th elements of array X will be buffered before
C	execution of J-th iteration, and reference X(J+1) 
C	will be replaced with reference to temporal variable
*DVM$	PARALLEL  ( I )  ON  A(I,*),  REMOTE_ACCESS ( X(J+1) )
	   DO  7  I = 1, J
	     A(I,N+1) = A(I,N+1) - A(I,J+1) * X(J+1)
7	   CONTINUE
	   X(J) = A(J,N+1) / A(J,J)
6	CONTINUE
	PRINT *,  X
	END

Example 2. Jacobi algorithm

	PROGRAM   JACOB
	PARAMETER  (K=8,  ITMAX=20)
	REAL  A(K,K), B(K,K), EPS, MAXEPS
CDVM$	DISTRIBUTE  A  (BLOCK, BLOCK) 
CDVM$	ALIGN  B(I,J)  WITH  A(I,J) 
C	 arrays A and B  with block distribution
	PRINT *,  '**********  TEST_JACOBI   **********'
	MAXEPS = 0.5E - 7
CDVM$	PARALLEL  (J,I)  ON  A(I,J)
C	nest of two parallel loops, iteration (i,j) will be executed on 
C	processor, which is owner of element A(i,j) 
	DO  1  J = 1, K
	DO  1  I = 1, K
	   A(I,J) = 0.
	   IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
		B(I,J) = 0.
	   ELSE
		B(I,J)  = 1. + I + J 
	   ENDIF
1	CONTINUE
	DO  2  IT = 1, ITMAX
	EPS = 0.
CDVM$	PARALLEL  (J,I)  ON  A(I,J),  REDUCTION ( MAX( EPS ))
C	variable EPS is used for calculation of maximum value
	DO  21  J = 2, K-1
	DO  21  I = 2, K-1
		EPS = MAX ( EPS, ABS( B(I,J) - A(I,J)))
		A(I,J) = B(I,J)
21	CONTINUE
CDVM$	PARALLEL  (J,I)  ON  B(I,J),  SHADOW_RENEW  (A)
C	copying shadow elements of array A from 
C	neighboring processors before loop execution
	DO  22  J = 2, K-1
	DO  22  I = 2, K-1
		B(I,J) = (A(I-1,J) + A(I,J-1) + A(I+1,J) + A(I,J+1)) / 4
22	CONTINUE
	PRINT *,  'IT = ', IT,  '   EPS = ', EPS
	IF ( EPS . LT . MAXEPS )  GO TO  3
2	CONTINUE
3	OPEN (3,  FILE='JACOBI.DAT',  FORM='FORMATTED')
	WRITE (3,*)  B
	CLOSE (3)
	END

Example 3. Jacobi algorithm (asynchronous version)

	PROGRAM   JACOB1
	PARAMETER   (K=8,  ITMAX=20)
	REAL   A(K,K), B(K,K), EPS, MAXEPS
CDVM$	DISTRIBUTE  A  (BLOCK, BLOCK) 
CDVM$	ALIGN  B(I,J)  WITH  A(I,J) 
C	arrays A and B  with block distribution
CDVM$	REDUCTION_GROUP  REPS 
	PRINT *,  '**********  TEST_JACOBI_ASYNCHR   **********'
CDVM$	SHADOW_GROUP  SA (A)
C	creation of shadow edge group
	MAXEPS = 0.5E - 7
CDVM$	PARALLEL  (J,I)  ON  A(I,J)
C	nest of two parallel loops, iteration (i,j) will be executed on 
C	processor, which is owner of element A(i,j) 
	DO  1  J = 1, K
	DO  1  I = 1, K
	   A(I,J) = 0.
	   IF(I.EQ.1 .OR. J.EQ.1 .OR. I.EQ.K .OR. J.EQ.K) THEN
		B(I,J) = 0.
	   ELSE
		B(I,J) = 1. + I + J 
	ENDIF
1	CONTINUE
	DO  2  IT = 1, ITMAX
	EPS = 0.
C	group of reduction operations is created 
C	and  initial values of reduction variables are stored
CDVM$	PARALLEL  (J,I)  ON  A(I,J),  SHADOW_START  SA,
CDVM$*	REDUCTION_GROUP  ( REPS : MAX( EPS ))
C	the loops iteration order is changed: 
C	at first boundary elements of A are calculated and sent,
C	then internal elements of array A are calculated 
	DO  21  J = 2, K-1
	DO  21  I = 2, K-1
		EPS = MAX ( EPS, ABS( B(I,J) - A(I,J)))
		A(I,J) = B(I,J)
21	CONTINUE
CDVM$	REDUCTION_START   REPS
C	start of reduction operation to accumulate the partial results
C	calculated in  copies of variable EPS on every processor
CDVM$	PARALLEL  (J,I)  ON  B(I,J),  SHADOW_WAIT  SA
C	the loops iteration order is changed: 
C	at first internal elements of B are calculated,
C	then shadow edge elements of array A from neighboring processors
C	are received, then boundary elements of array B are calculated
	DO  22  J = 2, K-1
	DO  22  I = 2, K-1
		B(I,J) = (A(I-1,J) + A(I,J-1) + A(I+1,J) + A(I,J+1)) / 4
22	CONTINUE
CDVM$	REDUCTION_WAIT   REPS
C	waiting completion of reduction operation
	PRINT *,  'IT = ', IT,  '   EPS = ', EPS
	IF ( EPS . LT . MAXEPS )   GO TO  3
2	CONTINUE
3	OPEN (3,  FILE='JACOBI.DAT',  FORM='FORMATTED')
	WRITE (3,*)  B
	CLOSE (3)
	END

Example 4. Successive over-relaxation

	PROGRAM  SOR
	PARAMETER  ( N = 100 )
	REAL   A( N, N ),  EPS,  MAXEPS, W
	INTEGER   ITMAX
*DVM$	DISTRIBUTE  A (BLOCK,BLOCK)
	ITMAX = 20
	MAXEPS = 0.5E - 5
	W = 0.5
*DVM$	PARALLEL  (I,J)  ON  A(I,J)
	DO  1  I = 1, N
	DO  1  J = 1, N
	   IF ( I .EQ.J)   THEN
		A(I,J) = N + 2
	   ELSE
		A(I,J) = -1.0
	   ENDIF 
1	CONTINUE
	DO  2  IT = 1, ITMAX
	EPS = 0.
*DVM$	PARALLEL  (I,J)  ON  A(I,J),  NEW (S),  
*DVM$*	REDUCTION ( MAX( EPS )),  ACROSS  (A(1:1,1:1))
C	S variable – private variable
C	(its usage is localized in the range of one iteration)
C	EPS variable is used for maximum calculation

	DO  21  I = 2, N-1
	DO  21  J = 2, N-1
		S = A(I,J)
		A(I,J) = (W / 4) * (A(I-1,J) + A(I+1,J) + A(I,J-1) +
     *		A(I,J+1)) + ( 1-W ) * A(I,J)
		EPS = MAX ( EPS,  ABS( S - A(I,J)))
21	CONTINUE
	PRINT *,  'IT = ',  IT, '   EPS = ',  EPS
	IF  (EPS  .LT.  MAXEPS )   GO TO  4
2	CONTINUE
4	PRINT *, A
	END

Example 5. Red-black successive over-relaxation

	PROGRAM  REDBLACK
	PARAMETER  ( N = 100 )
	REAL   A( N, N ),  EPS,  MAXEPS, W
	INTEGER   ITMAX
*DVM$	DISTRIBUTE  A (BLOCK,BLOCK)  
	ITMAX = 20
	MAXEPS = 0.5E - 5
	W = 0.5
*DVM$	PARALLEL  (I,J)  ON  A(I,J)
	DO  1  I = 1, N
	DO  1  J = 1, N
	   IF ( I .EQ.J)   THEN
		A(I,J) = N + 2
	   ELSE
		A(I,J) = -1.0
	   ENDIF 
1	CONTINUE
	DO  2  IT = 1, ITMAX
	EPS = 0.
C	loop for red and black variables 
	DO  3  IRB = 1,2
*DVM$	PARALLEL  (I,J)  ON  A(I,J),  NEW (S),  
*DVM$*	REDUCTION ( MAX( EPS )),  SHADOW_RENEW  (A)
C	variable S - private variable in loop iterations
C	variable EPS is used for calculation of maximum value 

C	Exception : iteration space is not rectangular

	DO  21  I = 2, N-1
	DO  21  J = 2 + MOD( I+ IRB, 2 ), N-1, 2
		S = A(I,J)
		A(I,J) = (W / 4) * (A(I-1,J) + A(I+1,J) + A(I,J-1) +
     *		A(I,J+1)) + ( 1-W ) * A(I,J)
		EPS = MAX ( EPS,  ABS( S - A(I,J)))
21	CONTINUE
3	CONTINUE
	PRINT *,  'IT = ',  IT, '   EPS = ',  EPS
	IF  (EPS  .LT.  MAXEPS )    GO TO  4
2	CONTINUE
4	PRINT *, A
	END

Example 6. Static tasks (parallel sections)

	PROGRAM    TASKS
C	rectangular grid is subdivided on two blocks
C   K
C N1 A1, B1
C N2 A2, B2
C	
	PARAMETER    (K=100,  N1 = 50,  ITMAX=10, N2 = K – N1 )
CDVM$	PROCESSORS  P(NUMBER_OF_PROCESSORS( ))
	REAL   A1(N1+1,K), A2(N2+1,K), B1(N1+1,K), B2(N2+1,K) 
	INTEGER  LP(2),  HP(2)
CDVM$	TASK  MB( 2 )
CDVM$	ALIGN  B1(I,J)  WITH  A1(I,J) 
CDVM$	ALIGN  B2(I,J)  WITH  A2(I,J) 
CDVM$	DISTRIBUTE  ::  A1, A2 
CDVM$	REMOTE_GROUP  BOUND
	CALL  DPT(LP, HP, 2)

C	Task (block) distribution over processors
C	Array distribution over tasks
CDVM$	MAP  MB( 1 ) ONTO  P( LP(1) : HP(1) )
CDVM$	REDISTRIBUTE  A1( *, BLOCK )  ONTO  MB( 1 )
CDVM$	MAP  MB( 2 )   ONTO   P( LP(2) : HP(2) )
CDVM$	REDISTRIBUTE  A2(*,BLOCK)  ONTO  MB( 2 )
C		Initialization
CDVM$	PARALLEL  (J,I)  ON  A1(I,J)
	DO  10   J  =  1, K
	DO  10   I  =  1, N1
	   IF(I.EQ.1 .OR. J.EQ.1 .OR. J.EQ.K) THEN
		A1(I,J) = 0.
		B1(I,J) = 0.
	   ELSE
		B1(I,J)  = 1. + I + J 
		A1(I,J) = B1(I, J)
	   ENDIF
10	CONTINUE
CDVM$	PARALLEL  (J,I)  ON  A2(I,J)
	DO  20   J  =  1, K
	DO  20   I  =  2, N2+1
	   IF(I.EQ.N2+1 .OR. J.EQ.1 .OR. J.EQ.K) THEN
		A2(I,J) = 0.
		B2(I,J) = 0.
	   ELSE
		B2(I,J)  = 1. + ( I + N1 – 1 ) + J 
		A2(I,J) = B2(I,J)
	   ENDIF
20	CONTINUE
	DO  2   IT  =  1, ITMAX
CDVM$	PREFETCH   BOUND

C	exchange of edges 
CDVM$	PARALLEL    ( J )   ON   A1(N1+1, J),
CDVM$*	REMOTE_ACCESS  (BOUND : B2(2,J) )
	DO  30   J  =  1, K
30	   A1(N1+1, J) = B2(2, J)
CDVM$	PARALLEL  ( J )  ON   A2(1,J),
CDVM$*	REMOTE_ACCESS  (BOUND : B1(N1,J) )
	DO  40   J  =  1, K
40	   A2(1,J) = B1(N1,J)
CDVM$	TASK_REGION  MB
CDVM$	ON   MB( 1 )
CDVM$	PARALLEL  (J,I)  ON  B1(I,J),
CDVM$*	SHADOW_RENEW ( A1 )
	DO  50   J  =  2, K-1
	DO  50   I  =  2, N1
50	   B1(I,J) = (A1(I-1,J) + A1(I,J-1) + A1(I+1,J) + A1(I,J+1)) / 4
CDVM$	PARALLEL  (J,I)  ON  A1(I,J)
	DO  60   J  =  2, K-1
	DO  60   I  =  2, N1
60	   A1(I,J) = B1(I,J)
CDVM$	END ON
CDVM$	ON  MB( 2 )
CDVM$	PARALLEL  (J,I)  ON  B2(I,J),
CDVM$*	SHADOW_RENEW ( A2 )
	DO  70   J  =  2, K-1
	DO  70   I  =  2, N2
70	   B2(I,J) = (A2(I-1,J) + A2(I,J-1) + A2(I+1,J) + A2(I,J+1)) / 4
CDVM$	PARALLEL  (J,I)  ON  A2(I,J)
	DO  80   J  =  2, K-1
	DO  80   I  =  2, N2
80	   A2(I,J) = B2(I,J)
CDVM$	END ON
CDVM$	END  TASK_REGION
2	CONTINUE
	PRINT *, 'A1 '
	PRINT *,  A1
	PRINT *, 'A2 '
	PRINT *, A2
	END

	SUBROUTINE  DPT( LP, HP, NT )
C	processor distribution for NT tasks  (NT = 2)
	INTEGER  LP(2), HP(2)
	NUMBER_OF_PROCESSORS( ) = 1
	NP = NUMBER_OF_PROCESSORS( )
	NTP = NP/NT
	IF(NP.EQ.1) THEN
		LP(1) = 1
		HP(1) = 1
		LP(2) = 1
		HP(2) = 1
	ELSE
		LP(1) = 1
		HP(1) = NTP
		LP(2) = NTP+1
		HP(2) = NP
	END IF
	END

Example 7. Dynamic tasks (task loop)

	PROGRAM  MULTIBLOCK
C	Model of multi-block task.
C	The number of blocks, size of each block,
C	external and internal edges 
C	are defined during program execution.
C	Test of following FDVM constructs: dynamic arrays,
C	dynamic tasks, asynchronous REMOTE_ACCESS for dynamic
C	arrays (formal arguments)
*DVM$	PROCESSORS  MBC100( NUMBER_OF_PROCESSORS( ) )
	PARAMETER (M = 8, N =8, NTST = 1)
C	MXSIZE –  dynamic memory size
C	MXBL –  maximal number of blocks
	PARAMETER ( MXS=10000 )
	PARAMETER ( MXBL=2 )
C	HEAP –  dynamic memory
	REAL  HEAP(MXS)
C	PA,PB – arrays of pointers for dynamic arrays
C	PA(I),PB(I) – function value on previous and current step 
C			in I-th block
*DVM$	REAL, POINTER (:,:) :: PA, PB, P1, P2
*DVM$	DYNAMIC  PA, PB, P1, P2
	INTEGER  PA(MXBL), PB(MXBL), P1, P2
C	SIZE( 1:2, I) –  sizes of dimensions of I-th block 
	INTEGER  SIZE( 2, MXBL ) , ALLOCATE
C	TINB( :,I ) –  table of internal edges of I-th block
C	TINB( 1,I ) - - the number of  edges (from 1 till 4)
C	TINB( 2,I ) = J  - adjacent block number
C	TINB( 3,I ),TINB( 4,I ) -  edges of one-dimensional section
C	TINB( 5,I ) -  dimension number in I-th block (1 or 2)
C	TINB( 6,I ) -  dimension coordinate in I-th block
C	TINB( 7,I ) - dimension number in J-th block (1 or 2)
C	TINB( 8,I ) -  dimension coordinate in J-th block
	INTEGER  TINB( 29, MXBL )
C	TEXB( :,I ) – table of external edges of I-th block
C	TEXB( 1,I ) - ( 1  4) edges amount (from 1 to 4)
C	TEXB( 2,I ),TEXB( 3,I ) - coordinates of one-dimensional array 
C						section for 1-th edge
C	TEXB( 4,I ) - dimension number (1 or 2)
C	TEXB( 5,I ) - coordinate of given dimension
	INTEGER  TEXB(17,MXBL)
C	NBL -  the number of blocks
C	NTST – the number of steps
	INTEGER  NBL, NTST
C	IDM – pointer to free dynamic memory
	INTEGER  IDM
	COMMON IDM,MXSIZE
C	postponed distribution of arrays on each block
*DVM$	DISTRIBUTE :: PA, P1
*DVM$	ALIGN :: PB, P2
C	task array
*DVM$	TASK  TSA ( MXBL )
C	name of group exchange of internal edges
*DVM$	REMOTE_GROUP  GRINB
C	LP( I ), HP( I ) – edges of processor array section of I-th block
	INTEGER  LP(MXBL), HP(MXBL)
C	TGLOB( :, I ) – table of global coordinates
C			in Jacobi algorithm grid for I-th block
C	TGLOB( 1, I ) – 1-th dimension coordinate
C	TGLOB( 2, I ) – 2-th dimension coordinate
	INTEGER TGLOB(2,MXBL)
	MXSIZE = MXS
C	subdividing M*N block on sub-blocks
	CALL DISDOM(NBL,TGLOB,TEXB,TINB,SIZE,M,N,MXBL)
C	Dividing processor array on blocks
	CALL MPROC(LP,HP,SIZE,NBL)
C	Distribution of tasks (blocks) over processors.
C	Array distribution over tasks
	IDM = 1
	DO  10  IB = 1, NBL
*DVM$	MAP  TSA( IB )  ONTO  MBC100( LP(IB) : HP(IB) )
	PA(IB) = ALLOCATE ( SIZE(1,IB))
	P1 = PA(IB)
*DVM$	REDISTRIBUTE  (*,BLOCK)  ONTO  TSA(IB) :: P1
	PB(IB) = ALLOCATE ( SIZE(1,IB))
	P2 = PB(I)
*DVM$	REALIGN  P2(I,J)  WITH  P1(I,J)
10	CONTINUE
C	External edge initialization
	DO 20 IB=1,NBL
	LS = 0
	DO 20 IS = 1,TEXB(1,IB)
	CALL INEXB (HEAP(PA(IB)), HEAP(PB(IB)), SIZE(1,IB), SIZE(2,IB),
     *	     TEXB(LS+2,IB), TEXB(LS+3,IB), TEXB(LS+4,IB), TEXB(LS+5,IB) )
	LS = LS+4
20	CONTINUE
C	Initialization of blocks
	DO 25 IB = 1,NBL
	CALL INDOM (HEAP(PA(IB)), HEAP(PB(IB)), SIZE(1,IB), SIZE(2,IB),
     *       TGLOB(1,IB), TGLOB(2,IB))
	LS = LS+4
25	CONTINUE
	DO 65  IB = 1,NBL
	CALL PRTB(HEAP(PA(IB)), SIZE(1,IB), SIZE(2,IB ),IB)
65	CONTINUE
C	Iteration loop
	DO  30  IT = 1, NTST
C	surpassed pumping of buffers for internal edges
*DVM$	PREFETCH  GRINB
C	value calculation on internal edges
	DO  40  IB = 1, NBL
	LS = 0
	DO  40  IS = 1, TINB(1,IB)
	J = TINB(LS+2, IB)
	CALL CMPINB (HEAP(PA(IB)), HEAP(PA(J)),     
     *        SIZE(1,IB), SIZE(2,IB), SIZE(1,J), SIZE(2,J),
     *        TINB(LS+3,IB), TINB(LS+4,IB), TINB(LS+5,IB),
     *        TINB(LS+6,IB), TINB(LS+7,IB), TINB(LS+8,IB) )
	LS = LS+7
40	CONTINUE
C	value calculation inside blocks 
C	each block is a task
*DVM$	TASK_REGION   TSA
*DVM$	PARALLEL  ( IB )  ON  TSA( IB )
	DO 50  IB = 1,NBL
	CALL JACOBI(HEAP(PA(IB)), HEAP(PB(IB)), SIZE(1,IB), SIZE(2,IB))
50	CONTINUE
*DVM$	END  TASK_REGION
	
30	CONTINUE
C	end of iterations
C	output of array values
	DO 60  IB = 1,NBL
	CALL PRTB(HEAP(PA(IB)), SIZE(1,IB), SIZE(2,IB ),IB)
60	CONTINUE
	END

	INTEGER  FUNCTION ALLOCATE( SIZE )
C	dynamic array distribution for sequential execution
	INTEGER SIZE(2)
	COMMON IDM,MXSIZE
	ALLOCATE = IDM
	IDM = IDM + SIZE(1)*SIZE(2)
	IF(IDM.GT.MXSIZE) THEN
		PRINT *, 'NO MEMORY'
		STOP
	ENDIF
	RETURN
	END

	SUBROUTINE  CMPINB ( AI, AJ, N1, N2, M1, M2, S1, S2, 
     *                                    ID, INDI, JD, INDJ)
C	value calculation on internal edges
	DIMENSION AI(N1,N2), AJ(M1, M2)
	INTEGER S1, S2
*DVM$	INHERIT  AI, AJ
*DVM$	REMOTE_GROUP  GRINB
	IF ( ID .EQ. 1 )  THEN
	IF ( JD .EQ. 1 )  THEN
*DVM$	PARALLEL  ( K )  ON  AI(INDI,K),
*DVM$*	REMOTE_ACCESS (GRINB : AJ(INDJ,K) )
	DO 10  K = S1,S2
10	AI(INDI,K) = AJ(INDJ,K)
	ELSE
*DVM$	PARALLEL  ( K )  ON  AI( INDI, K ),
*DVM$*	REMOTE_ACCESS (GRINB : AJ(K,INDJ) )
	DO 20  K = S1, S2
20	AI(INDI,K) = AJ(K,INDJ)
	ENDIF
	ELSE
	IF ( JD .EQ. 1 )  THEN
*DVM$	PARALLEL  ( K )  ON  AI(K,INDI),
*DVM$*	REMOTE_ACCESS (GRINB : AJ(INDJ,K) )
	DO 30  K = S1,S2
30	AI(K, INDI) = AJ(INDJ,K)
	ELSE
*DVM$	PARALLEL  ( K )  ON  AI(K,INDI),
*DVM$*	REMOTE_ACCESS (GRINB : AJ(K,INDJ) )
	DO 40  K = S1, S2
40	AI(K,INDI) = AJ(K,INDJ)
	ENDIF
	ENDIF
	END

	SUBROUTINE  MPROC(LP,HP,SIZE,NBL)
C	processor distribution  over blocks
	INTEGER LP(NBL),HP(NBL),SIZE(2,NBL)
C	distribution for two blocks NBL=2
	NUMBER_OF_PROCESSORS( ) = 1
	NP = NUMBER_OF_PROCESSORS( )
	NPT = NP/NBL
	IF(NP.EQ.1) THEN
		LP(1) = 1
		HP(1) = 1
		LP(2) = 1
		HP(2) = 1
	ELSE
		LP(1) = 1
		HP(1) = NPT
		LP(2) = NPT+1
		HP(2) = NP
	ENDIF
	END

	SUBROUTINE  INEXB(A,B,N1,N2,S1,S2,ID,INDI)
C	external edge initialization
	DIMENSION A(N1,N2),B(N1,N2)
	INTEGER S1,S2
*DVM$	INHERIT A,B
	IF(ID.EQ.1)  THEN
*DVM$	PARALLEL  (K)  ON  A(INDI,K)
	DO 10 K = S1,S2
	A(INDI,K) = 0
	B(INDI,K) = 0
10	CONTINUE
	ELSE
*DVM$	PARALLEL  (K)  ON  A(K,INDI)
	DO 20 K = S1,S2
	A(K,INDI) = 0
	B(K,INDI) = 0
20	CONTINUE	
	ENDIF
	END

	SUBROUTINE  INDOM(A,B,M,N,X1,X2)
C	block initialization
	DIMENSION A(M,N), B(M,N)
	INTEGER X1,X2
*DVM$	INHERIT A,B
*DVM$	PARALLEL (I,J) ON A(I,J)
	DO 10 I = 2,M-1
	DO 10 J = 2,N-1
	A(I,J) = I+J+X1+X2-3
	B(I,J) = A(I,J)
10 	CONTINUE
	END

	SUBROUTINE  JACOBI(A,B,N,M)
	DIMENSION A(N,M), B(N,M)
*DVM$	INHERIT  A,B
*DVM$	PARALLEL  (I,J)  ON  B(I,J)
	DO 10 I = 2,N-1
	DO 10 J = 2,M-1
10	B(I,J) = (A(I-1,J)+A(I+1,J)+A(I,J-1)+A(I,J+1))/4
*DVM$	PARALLEL  (I,J)  ON  A(I,J)
	DO 20 I = 2,N-1
	DO 20 J = 2,M-1
20	A(I,J) = B(I,J)
	END

	SUBROUTINE  PRTB(B,N,M,IB)
C	print data for IB block
	DIMENSION B(N,M)
*DVM$	INHERIT B
	PRINT *, 'BLOCK', IB
	PRINT *, B
	END

	SUBROUTINE DISDOM (NBL,TGL,TEXB,TINB,SIZE,M,N,MXBL)
	INTEGER TGL(2,MXBL), TEXB(17,MXBL), TINB(29,MXBL), SIZE(2,MXBL)
	INTEGER DM(20), DN(20),KDM,KDN,S,GM,GN
C	subdividing M*N block on two sub-blocks M*(N/2) and M* (N-N/2)
	DM(1) = M
	KDM = 1
	DN(1) = N/2
	DN(2) = N - N/2
	KDN = 2
	S = 0
	DO 10 I = 1,KDM
10	S = S + DM(I)
	IF(S.NE.M) THEN
		PRINT *, 'wrong division M'
		STOP
	ENDIF
	DO 15 IB = 1,MXBL
	TEXB(1,IB) = 0
	TINB(1,IB) = 0
15	CONTINUE       
	S = 0
	DO 20 J = 1,KDN
20	S = S + DN(J)
	IF(S.NE.N) THEN
		PRINT *, 'wrong division N'
		STOP
	ENDIF
	DM(1) = DM(1) - 1
	DN(1) = DN(1) - 1
	DM(KDM) = DM(KDM) - 1
	DN(KDN) = DN(KDN) - 1
C	producing tables (graphs) of external and internal edges
	IB = 1
	GM = 2
	GN = 2
	DO 30 J = 1,KDN
	DO 40 I = 1,KDM
	IF (I.EQ.1) THEN
		L = TEXB(1,IB)*4
		TEXB(L+2,IB) = 1 
		TEXB(L+3,IB) = DN(J)+2
		TEXB(L+4,IB) = 1  
		TEXB(L+5,IB) = 1 
		TEXB(1,IB) = TEXB(1,IB)+1
	ELSE
		L = TINB(1,IB)*7
		TINB(L+2,IB) = IB-1 
		TINB(L+3,IB) = 1
		TINB(L+4,IB) = DN(J)+2
		TINB(L+5,IB) = 1
		TINB(L+6,IB) = 1
		TINB(L+7,IB) = 1 
		TINB(L+8,IB) = DM(I-1)+1
		TINB(1,IB) = TINB(1,IB)+1
	ENDIF
	IF (I.EQ.KDM) THEN
		L = TEXB(1,IB)*4
		TEXB(L+2,IB) = 1 
		TEXB(L+3,IB) = DN(J)+2
		TEXB(L+4,IB) = 1  
		TEXB(L+5,IB) = DM(I)+2 
		TEXB(1,IB) = TEXB(1,IB)+1
	ELSE
		L = TINB(1,IB)*7
		TINB(2,IB) = IB+1 
		TINB(3,IB) = 1
		TINB(4,IB) = DN(J)+2
		TINB(5,IB) = 1
		TINB(6,IB) = DM(I)+2
		TINB(7,IB) = 1 
		TINB(8,IB) = 2
		TINB(1,IB) = TINB(1,IB)+1
	ENDIF
	IF (J.EQ.1) THEN
		L = TEXB(1,IB)*4
		TEXB(L+2,IB) = 1 
		TEXB(L+3,IB) = DM(I)+2
		TEXB(L+4,IB) = 2  
		TEXB(L+5,IB) = 1 
		TEXB(1,IB) = TEXB(1,IB)+1
	ELSE
		L = TINB(1,IB)*7
		TINB(L+2,IB) = IB-KDM 
		TINB(L+3,IB) = 1
		TINB(L+4,IB) = DM(I)+2            
		TINB(L+5,IB) = 2
		TINB(L+6,IB) = 1
		TINB(L+7,IB) = 2 
		TINB(L+8,IB) = DN(J-1)+1
		TINB(1,IB) = TINB(1,IB)+1
	ENDIF
	IF (J.EQ.KDN) THEN
		L = TEXB(1,IB)*4
		TEXB(L+2,IB) = 1 
		TEXB(L+3,IB) = DM(I)+2
		TEXB(L+4,IB) = 2  
		TEXB(L+5,IB) = DN(J)+2 
		TEXB(1,IB) = TEXB(1,IB)+1
	ELSE
		L = TINB(1,IB)*7
		TINB(L+2,IB) = IB+KDM 
		TINB(L+3,IB) = 1
		TINB(L+4,IB) = DM(I)+2            
		TINB(L+5,IB) = 2
		TINB(L+6,IB) = DN(J)+2
		TINB(L+7,IB) = 2 
		TINB(L+8,IB) = 2
		TINB(1,IB) = TINB(1,IB)+1
	ENDIF
	SIZE(1,IB) = DM(I)+2
	SIZE(2,IB) = DN(J)+2
	TGL(1,IB) = GM
	TGL(2,IB) = GN
	GM = GM+DM(I)
	IB = IB+1
40	CONTINUE
	GM = 2
	GN = GN+DN(J)
30	CONTINUE
	NBL = IB-1
	END

Fortran DVM - contents Part 1 (1-4) Part 2 (5-6) Part 3 (7-12) Part 4 (Annexes)