C-DVM - оглавление Часть 1 (1-4) Часть 2 (5-11) Часть 3 (Приложения)
создан:   апрель 2001 - последнее обновление 03.05.01 -

Приложение 1. Примеры программ

Шесть небольших программ из научной области приводятся для иллюстрации свойств языка C-DVM. Они предназначены для решения систем линейных уравнений:

A x = b

где
A – матрица коэффициентов,
b – вектор свободных членов,
x – вектор неизвестных.

Для решения этой системы используются следующие основные методы.

Прямые методы. Хорошо известный метод исключения Гаусса является наиболее широко используемым алгоритмом этого класса. Основная идея алгоритма заключается в преобразовании матрицы А в верхнетреугольную матрицу и использовании затем обратной подстановки, чтобы привести ее к диагональной форме.

Явные итерационные методы. Наиболее известным алгоритмом этого класса является метод релаксации Якоби. Алгоритм выполняет следующие итерационные вычисления

xi,jnew = ( xi-1,jold + xi,j-1old + xi+1,jold + xi,j+1old ) / 4

Неявные итерационные методы. К этому классу относится метод последовательной верхней релаксации. Итерационное приближение вычисляется по формуле

xi,jnew = ( w / 4 ) * ( xi-1,jnew + xi,j-1new + xi+1,jold + xi,j+1old ) + (1-w) * xi,jold

При использовании "красно-черного" упорядочивания переменных каждый итерационный шаг разделяется на два полушага Якоби. На одном полушаге вычисляются значения "красных" переменных, на другом – "черных" переменных. "Красно-черное" упорядочивание позволяет совместить вычисления и обмены данными.

Пример 1. Алгоритм метода исключения Гаусса

Коэффициенты матрицы A хранятся в секции массива A[0:N-1][0:N-1], а вектор B – в секции A[0:N-1][N] того же массива.

/* GAUSS  program */

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)
#define N 10

int main (int argn, char **args)
{
long  i, j, k;

/* declaration of dynamic distributed arrays */
DVM(DISTRIBUTE [BLOCK] [])  float (*A)[N+1];
DVM(DISTRIBUTE [BLOCK])  float (*X);
/* creation of arrays */
A = malloc( N*(N+1)*sizeof(float));
X = malloc( N*sizeof(float));
/* initialize array A*/
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,N-1,1)
DO(j,0,N,1)
	if (i==j || j==N)  A[i][j] = 1.f;
	else A[i][j]=0.f;

/* elimination */
for (i=0; i<N-1; i++)
{
DVM(PARALLEL [k][j] ON A[k][j]; REMOTE_ACCESS A[i][])
DO (k,i+1,N-1,1)
DO (j,i+1,N,1)
	A[k][j] = A[k][j]-A[k][i]*A[i][j]/A[i][i];
}
/* reverse substitution */
X[N-1] = A[N-1][N]/A[N-1][N-1];
for (j=N-2; j>=0; j-=1)
{
DVM(PARALLEL [k] ON A[k][]; REMOTE_ACCESS X[j+1])
DO (k,0,j,1)
    	A[k][N] = A[k][N]-A[k][j+1]*X[j+1];
X[j]=A[j][N]/A[j][j];
DVM(REMOTE_ACCESS X[j]) 
	printf(“j=%4i  X[j]=%3.3E\n”,j,X[j]);
}
return 0;
}

Пример 2. Алгоритм Якоби

/* JACOBI program */

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define Max(a,b) ((a)>(b)?(a): (b))

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)

#define  L  8
#define  ITMAX 20

int i,j,it,k;
double eps;
double MAXEPS = 0.5;
FILE *f;

/* 2D arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN[i][j] WITH A[i][j]) double B[L][L];

int main(int argn, char **args)
{
/* 2D loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
      {A[i][j]=0.;
      B[i][j]=1.+i+j;
      }

/******  iteration loop  *************************/
DO(it,1,ITMAX,1)
{
  eps= 0.;
/* Parallel loop with base array A             */
/* calculating maximum in variable eps          */
DVM(PARALLEL [i][j] ON A[i][j]; REDUCTION MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
	{eps =  Max(fabs(B[i][j]-A[i][j]),eps);
	A[i][j] = B[i][j];
	}
/* Parallel loop with base array  B  and         */
/* with prior updating shadow elements of array A  */
DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_RENEW A)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
    B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4.;

  printf(“it=%4i   eps=%3.3E\n”, it,eps);
  if (eps < MAXEPS) break;
  }/*DO it*/
f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}

Пример 3. Асинхронная версия алгоритма Якоби

/* Asynchronous JACOBI */ 
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define Max(a,b) ((a)>(b)? (a): (b))

#define DVM(dvmdir)
#define DO(v,l,u,s)   for(v=l; v<=u; v+=s)

#define  L 8
#define  ITMAX 20

int i,j,it,k;
double eps;
double MAXEPS   = 0.5;
FILE *f;
/* declare groups for shadow and reduction operations */
DVM(SHADOW_GROUP) void *grshad;
DVM(REDUCTION_GROUP) void *emax;
/* 2D arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN [i][j] WITH A[i][j]) double B[L][L];

int main(int argn, char **args)
{
/* 2D parallel loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
        {A[i][j]=0.;
         B[i][j]=1.+i+j;}
/* Create group for shadow operation */
DVM(CREATE_SHADOW_GROUP grshad: A);

/************  iteration loop  *************************/
DO(it,1,ITMAX,1)
{
  eps= 0.;
/* Parallel loop with base array A: */
/* at first elements of array A exported by the */
/* neighbor processor are calculated and sent and */
/* then internal elements of array A are calculated */
DVM(PARALLEL [i][j] ON A[i][j]; 
	SHADOW_START grshad; REDUCTION emax : MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
         {eps = max(fabs(B[i][j]-A[i][j]),eps);
          A[i][j] = B[i][j];
         }
/* Start asynchronous calculation of maximum */
DVM(REDUCTION_START emax);
/* Parallel loop with base array B: */
/* internal elements of array B are calculated at first */
/* then completion of array A shadow edges update is */ 
/* awaited and the loop iterations, requiring shadow */ 
/* elements of array A are calculated */
DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_WAIT grshad)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
    B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4;
/* Wait for completion of reduction */
DVM(REDUCTION_WAIT emax);

printf( "it=%4i  eps=%3.3E\n”, it,eps);
if (eps < MAXEPS) break;
}/*DO it*/

f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}

Пример 4. "Красно-черная" последовательная верхняя релаксация

Точки обсчитываются в шахматном порядке: сначала “красные”, потом “черные”. Чтобы задать соответствующее (непрямоугольное) индексное пространство, допускается особая форма заголовка цикла: DO (var, a+b%2, ub, 2).

/* RED-BLACK program */

#include <stdlib.h>
#include <math.h>

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)

#define Max(a,b) ((a)>(b)?(a):(b))
#define ITMAX 10

int main(int an, char ** as)
{long i, j, it, irb;
float eps=0.;
float MAXEPS = 0.5E-5;
float w = 0.5;

/* Groups of asynchronous operations */
DVM(SHADOW_GROUP) void *sha;
DVM(REDUCTION_GROUP) void *rg;

int  N;

/* redefine N as constant */
#define     N 8
DVM(DISTRIBUTE [BLOCK] [BLOCK])  float (*A)[N];
#undef      N /* restore N */

/* "Calculate"  of array size (=8!). Create array */
N = 8;
A = malloc( N*N*sizeof(float));

/* Specification of members of shadow group  */
DVM(CREATE_SHADOW_GROUP sha : A);

/* Initialization: parallel loop with surpassing */
/* calculation and sending of exported data */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha)
DO (i,0,N-1,1)
DO (j,0,N-1,1)
    if(i==0 || i==N-1 || j==0 || j==N-1) A[i][j]=0.;
    else A[i][j] = 1.+i+j;

DVM(SHADOW_WAIT sha);

/* iteration loop */
DO (it,0,ITMAX,1)
{
eps = 0.;

/* Parallel loop with reduction on RED points */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha;
                                REDUCTION rg: MAX(eps) )
DO (i,1,N-2,1)
DO (j,1+i%2,N-2,2)
    {float s;
    s = A[i][j];
    A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+
             A[i][j-1]+A[i][j+1])+(1-w)*A[i][j];
    eps = Max (fabs(s-A[i][j]), eps);
    }

/* Start red reduction -- as early as possible */
/* Then wait shadows */
DVM(REDUCTION_START rg);
DVM(SHADOW_WAIT sha);

/* Parallel loop on BLACK points (without reduction) */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha)
DO (i,1,N-2,1)
DO (j,1+(i+1)%2,N-2,2)
    {A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+
             A[i][j-1]+A[i][j+1])+(1-w)*A[i][j];
    }
/* Wait shadows, then */
/* wait red point's reduction -- as late as possible */
DVM(SHADOW_WAIT sha);
DVM(REDUCTION_WAIT rg);

printf("it=%d eps=%e\n",it,eps);
if (eps < MAXEPS) break;

} /* DO it */

free(A);
return 0;
}

Пример 5. Многосеточная задача

/* MULTIGRID  program */

#include <stdio.h>
#include <stdlib.h>

#define DVM(dvm)
#define DO(v,l,h,s) for(v=(l); v<=(h); v+=(s))

void oper(
	DVM(*DISTRIBUTE [*][*][*])  float *AA,
	int AAN, int AAM, int AAK,
	DVM(*DISTRIBUTE [*][*][*])  float *BB,
	int BBN, int BBM, int BBK)
/*Parameters: two distributed 3D arrays and */
/* their  actual dimensions */
{
#define AA(i,j,k) AA[((i)*AAM+(j))*AAK+(k)]
#define BB(i,j,k) AA[((i)*BBM+(j))*BBK+(k)]
int i, j,k;
/* Alignment of array BB with array AA using stretching*/
DVM(REALIGN BB[i][j][k] WITH AA[2*i][2*j][2*k] NEW);
/* forming array BB from elements of array AA with even indexes */
DVM(PARALLEL [i][j][k] ON BB[i][j][k])
FOR(i,BBN)
FOR(j,BBM)
FOR(k,BBK)
	BB(i,j,k)=AA(i*2,j*2,k*2);
#undef AA
#undef BB
}

int main(int argn, char **args)
{
int N1=8,M1=12,K1=16;
int N2,M2,K2;
int Narr=5,Nit=5;
int grid=0;
int ACM,ACK;
int i,j,k;
int step_grid=1;

/* Up to 20 distributed arrays */
DVM(DISTRIBUTE[BLOCK][BLOCK][]) float *A[20];
/* Pointer to current distributed array */
DVM(*DISTRIBUTE[*][*][*]) float *AC;
	/* creation of array  A[0] */
A[0]=malloc(N1*M1*K1*sizeof(float));
AC=A[0];
ACM=M1;
ACK=K1;
#define AC(i,j,k) AC[((i)*ACM+(j))*ACK+(k)]
	/* initialization of source array */
DVM(PARALLEL [i][j][k] ON AC[i][j][k])
DO(i,0,N1-1,1)
DO(j,0,M1-1,1)
DO(k,0,K1-1,1)
	AC(i,j,k)=1.+i+j+k ;
#undef AC

do{
printf("N1=%d M1=%d K1=%d \n”,N1,M1,K1);
N2=N1/2;
M2=M1/2;
K2=K1/2;
grid++;
/* creation of array A[grid] */
A[grid]=malloc(N2*M2*K2*sizeof(float));
oper(A[grid-1],N1,M1,K1,A[grid],N2,M2,K2);
N1=N2;
M1=M2;
K1=K2;
} while (N2>2 && grid<Narr)

for(i=0;i<=grid;i++)
	free(A[i]);
return 0;
}

Пример 6. Многообластная задача

/*  PROGRAM TASKS                                 */
/* rectangular grid is distributed on two blocks  */
/*                                                */
/*                     <------- A2,B2 ----->      */
/*                     0    1   2    ... N2       */
/*       <--- A1,B1 -------->                     */
/*       0   1   ...  N1-1  N1  N1+1 ... N1+N2-1  */
/*------------------------------------------------*/
/* 0   | .   .         .    .   .    ...  .       */
/* ... |                                          */
/* K-1 | .   .         .    .   .    ...  .       */
/**************************************************/
#include <stdlib.h>
#include <math.h>

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)
#define FOR(v,n)  for(v=0; v<n; v++)
#define Max(a,b) ((a)>(b)?(a):(b))

#define NUMBER_OF_PROCESSORS() 1

#define K   8
#define N1  4
#define N2  K-N1
#define ITMAX 10

DVM(PROCESSORS)  void * P[NUMBER_OF_PROCESSORS()];
DVM(TASK) void * MB[2];
double eps0;
DVM(DISTRIBUTE) float (*A1)[K], (*A2)[K];
DVM(ALIGN)      float (*B1)[K], (*B2)[K];

int main(int argn, char** args)
{
int i,j, it;
int NP;

printf("---------- starting --------------\n");

DVM(DEBUG 1 -d0)
{
NP = NUMBER_OF_PROCESSORS() / 2;
}

DVM(MAP MB[0] ONTO P[0:(NP?NP-1:0)]);
DVM(MAP MB[1] ONTO P[NP:(NP?2*NP-1:NP)]);

A1=malloc((N1+1)*K*sizeof(float));
DVM(REDISTRIBUTE A1[][BLOCK] ONTO MB[0]);
B1=malloc((N1+1)*K*sizeof(float));
DVM(REALIGN B1[i][j] WITH A1[i][j]);

A2=malloc((N2+1)*K*sizeof(float));
DVM(REDISTRIBUTE A2[][BLOCK] ONTO MB[1]);
B2=malloc((N2+1)*K*sizeof(float));
DVM(REALIGN B2[i][j] WITH A2[i][j]);

    /* Initialization */
DVM(PARALLEL [i][j] ON A1[i][j])
FOR(i,N1+1)
FOR(j,K)
{if (i==0 || j==0 || j==K-1) {
    A1[i][j] = 0.f;
    B1[i][j] = 0.f;
} else  {
    B1[i][j] = 1.f + i + j ;
    A1[i][j] = B1[i][j];
}}
DVM(PARALLEL [i][j] ON A2[i][j])
FOR(i,N2+1)
FOR(j,K)
{if(i == N2  || j==0 || j==K-1) {
    A2[i][j] = 0.f;
    B2[i][j] = 0.f;
} else {
    B2[i][j] = 1.f + ( i + N1 - 1 ) + j ;
    A2[i][j] = B2[i][j];
}}

DO(it,1,ITMAX,1)
{
    eps0 = 0.;

/*     exchange bounds */

DVM(PARALLEL [j] ON A1[(N1)][j]; REMOTE_ACCESS B2[1][])
    FOR(j,K) A1[N1][j] = B2[1][j];

DVM(PARALLEL [j] ON A2[0][j]; REMOTE_ACCESS B1[N1-1][])
    FOR(j,K) A2[0][j] = B1[N1-1][j];


/* Block task region */
DVM(TASK_REGION MB; REDUCTION MAX(eps0))
{
DVM(ON MB[0])
{
double eps=0.; /* private reduction variable */
DVM(PARALLEL [i][j] ON B1[i][j]; SHADOW_RENEW A1[][])
DO(i,1, N1-1, 1)
DO(j,1, K-2, 1)
   B1[i][j]=(A1[i-1][j]+A1[i][j-1]+A1[i+1][j]+A1[i][j+1])/4.f;

DVM(PARALLEL [i][j] ON A1[i][j]; REDUCTION MAX(eps))
DO(i,1,N1-1,1)
DO(j,1,K-2,1)
        {eps =  Max(fabs(B1[i][j]-A1[i][j]),eps);
        A1[i][j] = B1[i][j];
        }
eps0=Max(eps0,eps);
}

DVM(ON MB[1])
{
double eps=0.; /* private reduction variable */
DVM(PARALLEL [i][j] ON B2[i][j]; SHADOW_RENEW A2[][])
DO(i,1,N2-1,1)
DO(j,1,K-2,1)
  B2[i][j]=(A2[i-1][j]+A2[i][j-1]+A2[i+1][j]+A2[i][j+1])/4.f;

DVM(PARALLEL [i][j] ON A2[i][j]; REDUCTION MAX(eps))
DO(i,1,N2-1,1)
DO(j,1,K-2,1)
        {eps =  Max(fabs(B2[i][j]-A2[i][j]),eps);
        A2[i][j] = B2[i][j];
        }
eps0=Max(eps0,eps);
}
} /* TASK_REGION */

printf("it=%d eps0=%f\n",it,eps0);
}/* DO IT */
return 0;
}

Приложение 2. Сводный синтаксис директив C-DVM

directive ::=     DVM ( DVM-directive [ ; DVM-directive ] )
DVM-directive ::= specification-directive
  | executable-directive
specification-directive ::= processors-directive
  | align-directive
  | distribute-directive
  | template-directive
  | shadow-directive
  | shadow-group-directive
  | reduction-group-directive
  | remote-group-directive
  | task-directive
executable-directive ::= realign-directive
  | redistribute-directive
  | create-template-directive
  | parallel-directive
  | remote-access-directive
  | create-shadow-group-directive
  | shadow-start-directive
  | shadow-wait-directive
  | reduction-start-directive
  | reduction-wait-directive
  | prefetch-directive
  | reset-directive
  | map-directive
  | task-region-directive
  | on-directive
processors-directive ::= PROCESSORS
distribute-directive ::= DISTRIBUTE [ dist-directive-stuff ]
redistribute-directive ::= REDISTRIBUTE
    distributee dist-directive-stuff [ NEW ]
dist-directive-stuff ::= dist-format... [ dist-onto-clause ]
distributee ::= array-name
dist-format ::= [BLOCK]
  | [GENBLOCK ( block-array-name )]
  | [WGTBLOCK ( block-array-name,nblock )]
  | []
dist-onto-clause ::= ONTO dist-target
dist-target ::= processors-name [ section-subscript ]…
align-directive ::= ALIGN [ align-directive-stuff ]
realign-directive ::= REALIGN
    alignee align-directive-stuff [ NEW ]
align-directive-stuff ::= align-source... align-with-clause
alignee ::= array-name
align-source ::= [ ]
  | [ align-dummy ]
align-dummy ::= scalar-int-variable
align-with-clause ::= WITH align-spec
align-spec ::= align-target [ align-subscript ]…
align-target ::= array-name
  | template-name
align-subscript ::= [ int-expr ]
  | [ align-subscript-use ]
  | [ ]
align-subscript-use ::= [ primary-expr * ] align-dummy
[ add-op primary-expr ]
primary-expr ::= int-constant
  | int-variable
  | ( int-expr )
add-op ::= +
  | -
create-template-directive ::= CREATE_TEMPLATE
template_name size...
parallel-directive ::= PARALLEL loop-variable...
ON iteration-align-spec
      [ ; reduction-clause]
      [ ; shadow-renew-clause]
      [ ; remote-access-clause ]
      [ ; across-clause ]
iteration-align-spec ::= align-target iteration-align-subscript...
iteration-align-subscript ::= [ int-expr ]
  | [ do-variable-use ]
  | []
loop-variable ::= [ do-variable ]
do-variable-use ::= [ primary-expr * ] do-variable
                             [ add-op primary-expr ]
reduction-clause ::= REDUCTION [ group_name : ] reduction-op...
reduction-op ::= reduction-op-name ( reduction-variable )
  | reduction-loc-name ( reduction-variable, loc-variable )
reduction-variable ::= array-name
  | scalar-variable-name
reduction-op-name ::= SUM
  | PRODUCT
  | MAX
  | MIN
  | AND
  | OR
reduction-loc-name ::= MAXLOC
  | MINLOC
across-clause ::= ACROSS dependent-array...
dependent-array ::= dist-array-name dependence...
dependence ::= [ flow-dep-length : anti-dep-length ]
flow-dep-length ::= int-constant
anti-dep-length ::= int-constant
shadow-directive ::= SHADOW shadow-array...
shadow-array ::= array-name shadow-edge...
shadow-edge ::= [ width ]
  | [ low-width : high-width ]
width ::= int-expr
low-width ::= int-expr
high-width ::= int-expr
shadow-renew-clause ::= SHADOW_RENEW renewee...
renewee ::= dist-array-name [ shadow-edge ]… [CORNER]
remote-access-directive ::= REMOTE_ACCESS
      [ remote-group-name : ] regular-reference...
regular-reference ::= dist-array-name [ regular-subscript ]…
regular-subscript ::= [ int-expr ]
  | [ do-variable-use ]
  | [ ]
remote-access-clause ::= remote-access-directive
prefetch-directive ::= PREFETCH group-name
reset-directive ::= RESET group-name
shadow-group-directive ::= CREATE_SHADOW_GROUP
         shadow-group-name : renewee...
shadow-start-directive ::= SHADOW_START shadow-group-name
shadow-wait-directive ::= SHADOW_WAIT shadow-group-name
shadow-renew-clause ::= .   .   .
  | | shadow-start-directive
  | | shadow-wait-directive
reduction-group-directive ::= REDUCTION_GROUP
reduction-start-directive ::= REDUCTION_START reduction-group-name
reduction-wait-directive ::= REDUCTION_WAIT reduction-group-name
task-directive ::= TASK
map-directive ::= MAP task-name [ task-index ]
    ONTO processors-name [ section-subscript ]…
dist-target ::= . . .
  | task-name [ task-index ]
block-task-region ::= DVM( task-region-directive ) {
               on-block...
    }
task-region-directive ::= TASK_REGION task-name [ ; reduction-clause]
on-block ::= DVM( on-directive ) operator
on-directive ::= ON task-name [ task-index ]
loop-task-region ::= DVM( task-region-directive ) {
               parallel-task-loop
    }
parallel-task-loop   DVM( parallel-task-loop-directive )
               do-loop
parallel-task-loop-directive ::= PARALLEL [ do-variable ]
ON
task-name [ do-variable ]
     
copy-flag-directive ::= COPY_FLAG
copy-start-directive ::= COPY_START flag_addr
copy-wait-directive ::= COPY_WAIT flag_addr
copy-directive ::= COPY

C-DVM - оглавление Часть 1 (1-4) Часть 2 (5-11) Часть 3 (Приложения)