home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 10 Tools
/
10-Tools.zip
/
adaptor.zip
/
adapt.zip
/
adaptor
/
dalib
/
pvm3
/
schedul1.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-11-29
|
52KB
|
1,567 lines
/**************************************************************************
* *
* Author : Dr. Thomas Brandes, GMD, I1.HR *
* Copyright : GMD St. Augustin, Germany *
* Date : Apr 92 *
* Last Update : May 92 *
* *
* This Module is part of the DALIB *
* *
* Module : schedule.c *
* *
* Function : Operations for indirect addressing of distributed arrays *
* *
* Export : FORTRAN Interface *
* *
* - A, P, Q, ... are conform and are distributed *
* - B is indirect accessed, rank determines number of P, Q, ... *
* - A and B have same type *
* *
* A = B [P] : A[j] = B[P[j]] for all j *
* *
* void dalib_global_getm1_ (a, a_size, a_elems, *
* b, b_N1, p, mask ) *
* *
* B [P] = A : B[P[j]] = A[j] for all j *
* *
* void dalib_global_setm1_ (op, b, b_N1, p, *
* a, a_size, a_elems, mask) *
* *
* A = B [P,Q] : A[j] = B[P[j],Q[j]] for all j *
* *
* void dalib_global_getm2_ (a, a_size, a_elems, *
* b, b_N1, b_N2, p, q, mask ) *
* *
* B [P,Q] = A : B[P[j],Q[j]] = A[j] for all j *
* *
* void dalib_global_setm2_ (op, b, b_N1, b_N2, p, q, *
* a, a_size, a_elems, mask) *
* *
**************************************************************************/
#undef DEBUG
#include "system.h"
#include <stdio.h>
/*********************************************************
* *
* Global data for a schedule *
* *
*********************************************************/
/******************************************************************
* *
* send_schedule *
* ------------------------------------------------------------- *
* | s_no[0] | s_no[1] | | s_no[k-1] | *
* ------------------------------------------------------------- *
* /\ /\ /\ /\ *
* | s_ptr[0] | s_ptr[1] ....... | s_ptr[k-1] *
* *
* *
* s_no[p-1] : number of indexes that point to process p *
* *
* send_schedule [s_ptr[p-1]] index values *
* send_schedule [s_ptr[p-1]+1] that point to *
* send_schedule [s_ptr[p-1]+2] process p *
* ............................ *
* send_schedule [s_ptr[p-1]+s_no[p-1]-1] *
* *
* send_values : size of array send_schedule *
* *
* index[i] is in send_schedule[local_index[i]] *
* *
******************************************************************/
int index_values; /* number of elements in index, local part */
int send_values; /* number of true index elements, mask! */
int *send_schedule; /* size is send_values */
int *s_ptr; /* size is nproc() */
int *s_no; /* size is nproc() */
int *local_index; /* size is send_values */
/******************************************************************
* *
* recv_schedule *
* ------------------------------------------------------------- *
* | r_no[0] | r_no[1] | | r_no[k-1] | *
* ------------------------------------------------------------- *
* /\ /\ /\ /\ *
* | r_ptr[0] | r_ptr[1] ....... | r_ptr[k-1] *
* *
* *
* r_no[p-1] : number of indexes of process p that point to me *
* *
* recv_schedule [r_ptr[p-1]] index values *
* recv_schedule [r_ptr[p-1]+1] of process p that *
* recv_schedule [r_ptr[p-1]+2] point to values *
* ............................ belonging to *
* recv_schedule [r_ptr[p-1]+r_no[p-1]-1] this process *
* *
* recv_values : size of array recv_schedule *
* *
******************************************************************/
int recv_values;
int *recv_schedule;
int *r_ptr;
int *r_no;
int operation; /* operation for global set, 0 - 18 */
/*********************************************************
* *
* ERROR - Handling *
* *
*********************************************************/
void dalib_schedule_error (s)
char s[];
{ printf ("FATAL ERROR : dalib_schedule_...\n");
printf ("Message : %s\n", s);
exit (-1);
}
#ifdef DEBUG
/*********************************************************
* *
* P R I N T I N G T E S T O U T P U T *
* *
*********************************************************/
void dalib_print_send_schedule ()
{ int k;
/* lock (&sm->global_lock); */
/* print the send_schedule */
printf ("Process %d has %d of %d pointers to other values\n",
pcb.i, send_values, index_values);
for (k=0; k<pcb.p; k++)
printf ("Process %d needs %d values of process %d\n",
pcb.i, s_no[k], k+1);
/* unlock (&sm->global_lock); */
}
void dalib_print_recv_schedule ()
{ int k;
/* lock (&sm->global_lock); */
/* print the recv_schedule */
printf ("Process %d has %d pointers from other ones to its values\n",
pcb.i, recv_values);
for (k=0; k<pcb.p; k++)
printf ("Process %d has %d indexes of process %d\n",
pcb.i, r_no[k], k+1);
/* unlock (&sm->global_lock); */
}
void dalib_print_send_sched_values ()
{ /* print the send_schedule */
int j;
/* lock (&sm->global_lock); */
printf ("send_schedule of process %d \n", pcb.i);
printf ("=========================== \n");
printf ("\n");
printf ("send values = %d\n", send_values);
printf ("\n");
for (j=0;j<pcb.p;j++)
printf ("proc %d: ptr = %d, no = %d \n", j+1, s_ptr[j], s_no[j]);
printf ("\n");
for (j=0;j<send_values;j++)
printf ("send_schedule (%d) = %d\n", j, send_schedule[j]);
printf ("\n");
/* unlock (&sm->global_lock); */
} /* dalib_print_send_sched_values */
void dalib_print_recv_sched_values ()
{ /* print the recv schedule */
int j;
/* lock (&sm->global_lock); */
printf ("recv_schedule of process %d \n", pcb.i);
printf ("=========================== \n");
printf ("\n");
for (j=0;j<pcb.p;j++)
printf ("proc %d: ptr = %d , no = %d \n", j+1, r_ptr[j], r_no[j]);
printf ("\n");
printf ("recv_values = %d\n", recv_values);
printf ("\n");
for (j=0;j<recv_values;j++)
printf ("recv_schedule(%d,%d) = %d\n", pcb.i, j, recv_schedule[j]);
printf ("\n");
/* unlock (&sm->global_lock); */
} /* dalib_print_recv_sched_values */
#endif
/*********************************************************
* *
* Help function to determine fictive zero of array *
* *
*********************************************************/
unsigned char * dalib_array_zero1 (a, a_size, a_N1)
unsigned char *a;
int a_size, a_N1;
{ int lb, ub, offset;
/* determination of offset */
dalib_local_extensions (pcb.i, a_N1, &lb, &ub);
offset = (lb - 1) * a_size;
return (a - offset);
}
unsigned char *dalib_array_zero2 (a, a_size, a_N1, a_N2)
unsigned char *a;
int a_size, a_N1, a_N2;
{ int lb, ub, offset;
/* determination of offset */
dalib_local_extensions (pcb.i, a_N2, &lb, &ub);
offset = a_N1 * (lb - 1);
offset = offset * a_size;
return (a - offset);
}
/*********************************************************
* *
* PROCEDURES for global SET *
* *
*********************************************************/
/*************************************
* *
* C O P Y *
* *
*************************************/
void dalib_set_copy (a_zero, values, size)
unsigned char *a_zero, *values;
int size;
{ int i, j;
unsigned char *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = values[j] */
ptr = a_zero + recv_schedule[j] * size;
for (i=0;i<size;i++)
ptr[i] = values[j*size + i];
}
} /* dalib_set_copy */
/*************************************
* *
* M I N V A L *
* *
*************************************/
void dalib_set_min_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = min (a[recv-schedule[j], values[j]) */
ptr = a_zero + recv_schedule[j];
if (values[j] < *ptr) *ptr = values[j];
}
} /* dalib_set_min_ints */
void dalib_set_min_reals (a_zero, values)
float *a_zero, *values;
{ int j;
float *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = min (a[recv-schedule[j], values[j]) */
ptr = a_zero + recv_schedule[j];
if (values[j] < *ptr) *ptr = values[j];
}
} /* dalib_set_min_ints */
void dalib_set_min_doubles (a_zero, values)
double *a_zero, *values;
{ int j;
double *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = min (a[recv-schedule[j], values[j]) */
ptr = a_zero + recv_schedule[j];
if (values[j] < *ptr) *ptr = values[j];
}
} /* dalib_set_min_doubles */
/*************************************
* *
* M A X V A L *
* *
*************************************/
void dalib_set_max_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = max (a[recv-schedule[j], values[j]) */
ptr = a_zero + recv_schedule[j];
if (values[j] > *ptr) *ptr = values[j];
}
} /* dalib_set_max_ints */
void dalib_set_max_reals (a_zero, values)
float *a_zero, *values;
{ int j;
float *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = max (a[recv-schedule[j], values[j]) */
ptr = a_zero + recv_schedule[j];
if (values[j] > *ptr) *ptr = values[j];
}
} /* dalib_set_max_ints */
void dalib_set_max_doubles (a_zero, values)
double *a_zero, *values;
{ int j;
double *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = max (a[recv-schedule[j], values[j]) */
ptr = a_zero + recv_schedule[j];
if (values[j] > *ptr) *ptr = values[j];
}
} /* dalib_set_max_doubles */
/*************************************
* *
* S U M *
* *
*************************************/
void dalib_set_add_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] + values[j] */
ptr = a_zero + recv_schedule[j];
*ptr += values[j];
}
} /* dalib_set_add_ints */
void dalib_set_add_reals (a_zero, values)
float *a_zero, *values;
{ int j;
float *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] + values[j] */
ptr = a_zero + recv_schedule[j];
*ptr += values[j];
}
} /* dalib_set_add_reals */
void dalib_set_add_doubles (a_zero, values)
double *a_zero, *values;
{ int j;
double *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] + values[j] */
ptr = a_zero + recv_schedule[j];
*ptr += values[j];
}
} /* dalib_set_add_doubles */
/*************************************
* *
* P R O D U C T *
* *
*************************************/
void dalib_set_mult_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] * values[j] */
ptr = a_zero + recv_schedule[j];
*ptr *= values[j];
}
} /* dalib_set_mult_ints */
void dalib_set_mult_reals (a_zero, values)
float *a_zero, *values;
{ int j;
float *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] * values[j] */
ptr = a_zero + recv_schedule[j];
*ptr *= values[j];
}
} /* dalib_set_mult_reals */
void dalib_set_mult_doubles (a_zero, values)
double *a_zero, *values;
{ int j;
double *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] * values[j] */
ptr = a_zero + recv_schedule[j];
*ptr *= values[j];
}
} /* dalib_set_mult_doubles */
/*************************************
* *
* AND, OR, EOR *
* *
*************************************/
void dalib_set_and_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] & values[j] */
ptr = a_zero + recv_schedule[j];
*ptr = *ptr | values[j];
}
} /* dalib_set_and_ints */
void dalib_set_or_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] | values[j] */
ptr = a_zero + recv_schedule[j];
*ptr = *ptr | values[j];
}
} /* dalib_set_or_ints */
void dalib_set_eor_ints (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] ^ values[j] */
ptr = a_zero + recv_schedule[j];
*ptr = *ptr ^ values[j];
}
} /* dalib_set_eor_ints */
/*************************************
* *
* ALL, ANY, PARITY *
* *
*************************************/
void dalib_set_or_bools (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] || values[j] */
ptr = a_zero + recv_schedule[j];
*ptr = *ptr || values[j];
}
} /* dalib_set_or_bools */
void dalib_set_and_bools (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] || values[j] */
ptr = a_zero + recv_schedule[j];
*ptr = *ptr && values[j];
}
} /* dalib_set_and_bools */
void dalib_set_neq_bools (a_zero, values)
int *a_zero, *values;
{ int j;
int *ptr;
for (j=0; j<recv_values; j++)
{ /* a[recv_schedule[j]] = a[recv-schedule[j] || values[j] */
ptr = a_zero + recv_schedule[j];
if (values[j]) *ptr = !(*ptr);
}
} /* dalib_set_neq_bools */
/*******************************************************************
* *
* dalib_schedule_init_ (b_N, index, index_size, mask) *
* *
* Initialization of a schedule *
* *
* in: b_N (number of elements in the distributed dimension of b) *
* index (index values for array b) *
* index_size (number of elements on this processor) *
* mask (mask for values to be indexed, mask == index pos) *
* *
* out: index_values, send_values, recv_values *
* s_no, r_no *
* *
*******************************************************************/
void dalib_schedule_init_ (b_N, index, index_elems, mask)
/* index must be pointer to distributed integer*4 array
index_elems is number of elements of index locally
b_N is number of elements in the distributed dimension
mask must be pointer to distributed logical*4 array
index == mask means there is no mask */
int *b_N, *index, *index_elems, *mask;
{ int b_elems;
int id, nproc, p; /* processor numbers */
int j, k;
id = pcb.i;
nproc = pcb.p;
index_values = *index_elems;
#ifdef DEBUG
printf ("Process %d starts schedule_init, b_N = %d, indexes = %d\n",
id, *b_N, index_values);
#endif
/* intialize s_no */
s_no = (int *) malloc (4 * nproc);
#ifdef MEIKO_CS2
ew_touchBuf(s_no, sizeof(int)*nproc);
#endif
for (k=0; k<nproc; k++)
s_no [k] = 0;
local_index = (int *) malloc (4 * index_values);
/* saves the processor ids */
/* count number of sending values */
b_elems = *b_N;
if (index == mask) /* no mask */
{ for (j=0; j < index_values;j++)
{ /* which processor has b[index[j]] */
p = (index[j] * nproc - 1) / b_elems + 1;
local_index[j] = p;
/* make sure that p is between 1 and nproc */
if ((p<=0) || (p>nproc))
{ printf ("Index value %d out of range\n", index[j]);
printf ("Fatal ERROR, will exit\n");
exit(-1);
}
s_no [p-1] += 1;
}
send_values = index_values;
}
else /* there is a mask */
{ send_values = 0;
for (j=0; j< index_values;j++)
{ if (mask[j])
{ /* which processor has b[index[j]] */
p = (index[j] * nproc - 1) / b_elems + 1;
local_index[j] = p;
/* make sure that p is between 1 and nproc */
if ((p<=0) || (p>nproc))
{ printf ("Index value %d out of range\n", index[j]);
printf ("Fatal ERROR, will exit\n");
exit(-1);
}
s_no [p-1] += 1;
send_values += 1;
}
else local_index[j] = -1; /* not existing processor */
} /* for */
} /* with mask */
/* now it is known how many elements of index have to be send */
for (k=0; k<nproc; k++)
asend (id, k+1, s_no + k, 4);
# ifdef DEBUG
dalib_print_send_schedule ();
# endif
/* now it will be known how many elements other processors need */
r_no = (int*) malloc (4 * nproc);
#ifdef MEIKO_CS2
ew_touchBuf(r_no, nproc*sizeof(int));
#endif
recv_values = 0;
for (k=0; k<nproc; k++)
{ areceive (id, k+1, r_no + k, 4);
recv_values += r_no[k];
}
# ifdef DEBUG
dalib_print_recv_schedule ();
# endif
/* now every process knows
- how many of my indexes point to the other processors
- how many indexes of other processors point to me
*/
} /* dalib_schedule_init */
/*********************************************************
* *
* dalib_schedule_send_ (b_N, index) *
* *
* Definition of send_schedule, sending it *
* *
* in: b_N (number of elements of b in distrib. dim) *
* index (index values for array b) *
* *
* out: s_ptr, send_schedule *
* indexes will be send *
* *
*********************************************************/
void dalib_schedule_send_ (b_N, index)
int *b_N, *index;
{ int id, nproc, p; /* processor numbers */
int j, k;
id = pcb.i;
nproc = pcb.p;
/* make the s_ptr array */
s_ptr = (int *) malloc (4 * nproc);
s_ptr[0] = 0;
for (k=1; k<nproc; k++)
{ s_ptr [k] = s_ptr[k-1] + s_no [k-1];
}
/* make the send_schedule and local_index array */
send_schedule = (int *) malloc (4 * send_values);
#ifdef MEIKO_CS2
ew_touchBuf(send_schedule, sizeof(int)*send_values);
#endif
for (j=0; j<index_values;j++)
{ /* collect indexes in send_schedule, local_index */
p = local_index[j];
if (p >= 0) /* mask is hidden in local_index */
{ local_index[j] = s_ptr[p-1];
send_schedule [s_ptr[p-1]] = index[j] - 1;
s_ptr[p-1] += 1;
}
}
/* reset the s_ptr array */
s_ptr[0] = 0;
for (k=1; k<nproc; k++)
s_ptr [k] = s_ptr[k-1] + s_no [k-1];
/* send the index values that point to other processors */
for (k=0; k<nproc; k++)
if (s_no[k] > 0)
asend (id, k+1, send_schedule + s_ptr[k], s_no[k] * 4);
# ifdef DEBUG
dalib_print_send_sched_values ();
# endif
free (send_schedule);
} /* dalib_schedule_send */
/*********************************************************
* *
* dalib_schedule_send2_ (b_N1, b_N2, index1, index2 ) *
* *
* Definition of send_schedule, sending it *
* *
* in: b_N (description of indexed array, rank = 2) *
* index1 (index values for array b) *
* index2 (index values for array b) *
* *
* out: s_ptr, send_schedule *
* indexes will be send *
* *
*********************************************************/
void dalib_schedule_send2_ (b_N1, b_N2, index1, index2)
int *b_N1, *b_N2, *index1, *index2;
{ int id, nproc, p; /* processor numbers */
int j, k;
id = pcb.i;
nproc = pcb.p;
/* make the s_ptr array */
s_ptr = (int *) malloc (4 * nproc);
s_ptr[0] = 0;
for (k=1; k<nproc; k++)
{ s_ptr [k] = s_ptr[k-1] + s_no [k-1];
}
/* make the send_schedule and local_index array */
send_schedule = (int *) malloc (4 * send_values);
#ifdef MEIKO_CS2
ew_touchBuf(send_schedule, sizeof(int)*send_values);
#endif
for (j=0; j<index_values;j++)
{ /* collect indexes in send_schedule, local_index */
p = local_index[j];
if (p >= 0) /* mask is hidden in local_index */
{ local_index[j] = s_ptr[p-1];
send_schedule [s_ptr[p-1]] = (index2[j] - 1) * *b_N1
+ (index1[j] - 1) ;
s_ptr[p-1] += 1;
}
}
/* reset the s_ptr array */
s_ptr[0] = 0;
for (k=1; k<nproc; k++)
s_ptr [k] = s_ptr[k-1] + s_no [k-1];
/* send the index values that point to other processors */
for (k=0; k<nproc; k++)
if (s_no[k] > 0)
asend (id, k+1, send_schedule + s_ptr[k], s_no[k] * 4);
# ifdef DEBUG
dalib_print_send_sched_values ();
# endif
free (send_schedule);
} /* dalib_schedule_send2 */
/*********************************************************
* *
* dalib_schedule_recv_ () *
* *
* Definition of recv_schedule, receiving it *
* *
* out: s_ptr, send_schedule *
* indexes will be send *
* *
*********************************************************/
void dalib_schedule_recv_ ()
{ int id, nproc;
int j, k;
id = pcb.i;
nproc = pcb.p;
/* make the r_ptr array */
r_ptr = (int *) malloc (4 * nproc);
/* set r_ptr by using numbers of r_no */
r_ptr[0] = 0;
for (k=1; k<nproc; k++)
r_ptr [k] = r_ptr[k-1] + r_no [k-1];
/* make the recv_schedule array */
recv_schedule = (int *) malloc (recv_values * 4);
#ifdef MEIKO_CS2
ew_touchBuf(recv_schedule, sizeof(int)*recv_values);
#endif
/* receive the indexes */
for (k=0; k<nproc; k++)
if (r_no[k] > 0)
areceive (id, k+1, recv_schedule + r_ptr[k], r_no[k] * 4);
# ifdef DEBUG
dalib_print_recv_sched_values ();
# endif
} /* dalib_schedule_recv */
/*********************************************************
* *
* dalib_switching_init_ (b_N, index, index_elems) *
* *
* Initialization of a schedule of switching *
* *
* in: b_N (siye of b in distributed dimension) *
* index (index values for array b) *
* index_elems (number of elems of A, index) *
* *
* out: send_values, recv_values *
* s_no, r_no *
* *
*********************************************************/
void dalib_switching_init_ (b_N, index, index_elems)
int *b_N;
int *index;
int *index_elems;
/* index must be pointer to distributed integer*4 array */
{ int id, nproc, p; /* processor numbers */
int j, k;
int b_elems; /* is equal *b_N */
id = pcb.i;
nproc = pcb.p;
index_values = *index_elems;
send_values = index_values; /* no mask in this case */
/* intialize s_no */
s_no = (int *) malloc (4 * nproc);
for (k=0; k<nproc; k++)
s_no [k] = 0;
local_index = (int *) malloc (4 * index_values);
/* saves the processor ids */
/* count number of sending values */
b_elems = *b_N;
for (j=0; j<index_values;j++)
{ /* which processor has b[index[j]] */
p = (index[j] * nproc - 1) / b_elems + 1;
local_index[j] = p;
/* make sure that p is between 1 and nproc */
if ((p<=0) || (p>nproc))
{ printf ("Index value %d out of range\n", index[j]);
printf ("Fatal ERROR, will exit\n");
exit(-1);
}
s_no [p-1] += 1;
}
/* now it is known how many elements of index have to be send and recvd */
/* now it will be known how many elements other processors need */
r_no = (int*) malloc (4 * nproc);
recv_values = send_values ;
for (k=0; k<nproc; k++)
r_no[k] = s_no[k];
} /* dalib_switching_init */
/*********************************************************
* *
* dalib_schedule_get_send (b_zero, b_size) *
* *
* - send the requrested values of b to other processes *
* *
* in : - b_zero : my fiktive zero address of b *
* - b_size : number of bytes of one elem in b *
* *
* - uses recv_schedule that tells which elements *
* are needed by the other processors *
* *
* - done by building array of values and sending it *
* *
*********************************************************/
void dalib_schedule_get_send_ (b_zero, b_size)
unsigned char *b_zero; /* fiktive zero address of b */
int *b_size; /* number of bytes for one element of b */
{ int id, nproc;
int j, k, i;
int size;
unsigned char *values; /* arrays with the values */
unsigned char *ptr; /* pointer for copy values */
id = pcb.i;
nproc = pcb.p;
/* Step 1: use recv_schedudule to get values needed by other processes */
/* size of one element of array b */
size = *b_size;
values = (unsigned char *) malloc (recv_values * size);
if (size == 4)
dalib_memget4 (values, b_zero, recv_schedule, recv_values);
else if (size == 8)
dalib_memget8 (values, b_zero, recv_schedule, recv_values);
else
{ for (j=0;j<recv_values;j++)
{ /* values [j] = b [recv_schedule[j]] */
ptr = b_zero + recv_schedule[j] * size;
for (i=0;i<size;i++)
values [j*size+i] = ptr[i];
}
}
/* Step 2: send these values to other processors */
for (k=0; k<nproc; k++)
if (r_no[k] > 0)
asend (id, k+1, values + r_ptr[k]*size, r_no[k] * size);
free (values);
} /* dalib_schedule_get_send_ */
/*********************************************************
* *
* dalib_schedule_get_recv (a, a_size) *
* *
* - get the requested values and put in in my a *
* *
* in : - a : my address of array a *
* - a_size : number of bytes of one elem in a *
* *
* - uses send_schedule that tells which elements *
* processor got by the other processors *
* *
* - use also local_index that tells where to put *
* the received values in my array a *
* *
*********************************************************/
void dalib_schedule_get_recv_ (a, a_size)
int *a_size; /* number of bytes for one element */
unsigned char *a; /* pointer to first element of a */
{ int id, nproc;
int i, j, k;
int size;
unsigned char *values; /* arrays with the received values */
unsigned char *ptr; /* pointer for copy values */
id = pcb.i;
nproc = pcb.p;
/* size of one element of array a */
size = *a_size;
/* Step 1: recv my values from other processors */
values = (unsigned char *) malloc (send_values * size);
#ifdef MEIKO_CS2
ew_touchBuf(values, send_values*size);
#endif
for (k=0; k<nproc; k++)
if (s_no[k] > 0)
areceive (id, k+1, values + s_ptr[k]*size, s_no[k] * size);
/* Step 2: put the values to the right place in array a */
if (size == 4)
dalib_memgetm4 (a, values, local_index, index_values);
else if (size == 8)
dalib_memgetm8 (a, values, local_index, index_values);
else
{ for (j=0; j<index_values; j++)
{ /* a[j] = b [index[j]] */
if (local_index[j] >= 0) /* only if mask */
{ ptr = values + local_index[j] * size;
for (i=0;i<size;i++)
a[j*size+i] = ptr[i];
}
}
}
free (values);
}
/*********************************************************
* *
* dalib_schedule_set_send (a, a_size) *
* *
* - send the values of a to the processors that get it *
* *
* in : - a : my address of array a *
* - a_size : number of bytes of one elem in a *
* *
* - uses local_index that tells how to pack my *
* elements to be sent to other processors *
* *
*********************************************************/
void dalib_schedule_set_send_ (a, a_size)
int *a_size;
unsigned char *a;
{ int id, nproc;
int j, k, i;
int size;
unsigned char *values; /* arrays with the values */
unsigned char *ptr; /* pointer for copy values */
id = pcb.i;
nproc = pcb.p;
/* Step 1: use recv_schedudule to get values needed by other processes */
/* size of one element of array a */
size = *a_size;
values = (unsigned char *) malloc (send_values * size);
#ifdef MEIKO_CS2
ew_touchBuf(values, send_values*size);
#endif
for (j=0;j<index_values;j++)
{ /* values [local_index[j]] = a [j] */
if (local_index[j] >= 0)
{ ptr = values + local_index[j]*size;
for (i=0;i<size;i++)
ptr[i] = a[j*size+i];
}
}
/* Step 2: send these values to other processors */
for (k=0; k<nproc; k++)
if (s_no[k] > 0)
asend (id, k+1, values + s_ptr[k]*size, s_no[k] * size);
free (values);
} /* dalib_schedule_set_send_ */
/*********************************************************
* *
* dalib_schedule_set_recv_ (b_zero, b_size) *
* *
* receive values from other processors and are *
* stored/scattered in local part of array b *
* *
* in : - b_zero is fiktive zero address of my b *
* - b_size is number of bytes for one elem of b *
* *
* - uses recv_schedule that tells where I have *
* to put the received values *
* *
*********************************************************/
void dalib_schedule_set_recv_ (b_zero, b_size)
int *b_size;
unsigned char *b_zero;
{ int id, nproc;
int i, j, k;
int size;
unsigned char *values; /* arrays with the received values */
unsigned char *ptr; /* pointer for copy values */
id = pcb.i;
nproc = pcb.p;
/* size of one element of array b */
size = *b_size;
/* Step 1: recv my values from other processors */
values = (unsigned char *) malloc (recv_values * size);
#ifdef MEIKO_CS2
ew_touchBuf(values, recv_values*size);
#endif
for (k=0; k<nproc; k++)
if (r_no[k] > 0)
areceive (id, k+1, values + r_ptr[k]*size, r_no[k] * size);
/* Step 2: put the values to the right place in local array b */
switch (operation) {
case 0 : dalib_set_copy (b_zero, values, size); break;
case 1 : dalib_set_min_ints (b_zero, values); break;
case 2 : dalib_set_min_reals (b_zero, values); break;
case 3 : dalib_set_min_doubles (b_zero, values); break;
case 4 : dalib_set_max_ints (b_zero, values); break;
case 5 : dalib_set_max_reals (b_zero, values); break;
case 6 : dalib_set_max_doubles (b_zero, values); break;
case 7 : dalib_set_add_ints (b_zero, values); break;
case 8 : dalib_set_add_reals (b_zero, values); break;
case 9 : dalib_set_add_doubles (b_zero, values); break;
case 10 : dalib_set_mult_ints (b_zero, values); break;
case 11 : dalib_set_mult_reals (b_zero, values); break;
case 12 : dalib_set_mult_doubles (b_zero, values); break;
case 13 : dalib_set_and_ints (b_zero, values); break;
case 14 : dalib_set_or_ints (b_zero, values); break;
case 15 : dalib_set_eor_ints (b_zero, values); break;
case 16 : dalib_set_and_bools (b_zero, values); break;
case 17 : dalib_set_or_bools (b_zero, values); break;
case 18 : dalib_set_neq_bools (b_zero, values); break;
default : dalib_schedule_error ("unknown op for global set"); break;
}
free (values);
}
/*********************************************************
* *
* dalib_schedule_exit_ () *
* *
* Reset of a schedule, frees the memory *
* *
*********************************************************/
void dalib_schedule_exit_ ()
{
free (recv_schedule);
free (r_ptr);
free (r_no);
free (local_index);
/* free (send_schedule) has already been done */
free (s_ptr);
free (s_no);
} /* dalib_schedule_exit */
/***********************************************************************
* *
* Check Routines *
* *
* check1 : A , B [P], MASK *
* check2 : A , B [P,Q], MASK *
* *
* - P, Q must be integer*4 *
* - MASK must be logical*4 *
* - A, B must be of same type *
* - A, P, Q, MASK must be of same shape *
* - rank of B must be 1 (check1) or 2 (check2) *
* *
***********************************************************************/
/*******************************************************************
* *
* MASK: A = B [P] : A[j] = B[P[j]] for all j with MASK(j) *
* *
* void dalib_global_getm1_ (a, a_size, a_elems, *
* b, b_N1, p, mask ) *
* *
*******************************************************************/
void dalib_global_getm1_ (a, a_size, a_elems, b, b_N1, p, mask)
int *a_size; /* number of local elements of a and p */
int *a_elems; /* number of elemens in a, p */
int *b_N1; /* number of elements of b */
int *p, *mask; /* p == mask if mask is not available */
unsigned char *a, *b;
{ unsigned char *b_zero;
/* Step 1: init schedule */
/* a_elems is also number of local elements for index array p */
dalib_schedule_init_ (b_N1, p, a_elems, mask);
/* Step 2: define schedule */
dalib_schedule_send_ (b_N1, p);
dalib_schedule_recv_ ();
/* Step 3: make get with schedule */
b_zero = dalib_array_zero1 (b, *a_size, *b_N1);
dalib_schedule_get_send_ (b_zero, a_size); /* send requested values of b */
dalib_schedule_get_recv_ (a, a_size);
/* Step 4: release schedule */
dalib_schedule_exit_ ();
} /* dalib_global_getm1 */
/*******************************************************************
* *
* MASK: B [P] = A : B[P[j]] = A[j] for all j with MASK(j) *
* *
* void dalib_global_setm1_ (op, b, b_N1, p, *
* a, a_size, a_elems, mask) *
* *
*******************************************************************/
void dalib_global_setm1_ (op, b, b_N1, p, a, a_size, a_elems, mask)
unsigned char *a, *b;
int *op;
int *a_size, *a_elems;
int *p, *mask;
int *b_N1;
{ unsigned char *b_zero;
/* make assertions */
operation = *op; /* global set for operation */
/* Step 1: init schedule */
/* a_elems is also number of local elements for index array p */
dalib_schedule_init_ (b_N1, p, a_elems, mask);
/* Step 2: define schedule */
dalib_schedule_send_ (b_N1, p);
dalib_schedule_recv_ ();
/* Step 3: make set with schedule */
dalib_schedule_set_send_ (a, a_size);
b_zero = dalib_array_zero1 (b, *a_size, *b_N1);
dalib_schedule_set_recv_ (b_zero, a_size);
/* Step 4: release schedule */
dalib_schedule_exit_ ();
} /* dalib_global_setm1 */
/***********************************************************************
* *
* MASK: A = B [P,Q] : A[j] = B[P[j],Q[j]] for all j with MASK(j) *
* *
* void dalib_global_getm2_ (a, a_size, a_elems, *
* b, b_N1, b_N2, p, q, mask ) *
* *
***********************************************************************/
void dalib_global_getm2_ (a, a_size, a_elems, b, b_N1, b_N2, p, q, mask)
int *a_size; /* number of bytes for elem size of a and b */
int *a_elems; /* number of local elements in a, p, q */
int *b_N1, *b_N2; /* global dimensions of b */
int *p, *q, *mask; /* q == mask if mask is not available */
unsigned char *a, *b;
{ unsigned char *b_zero;
/* make assertions */
#ifdef DEBUG
printf ("Process %d starts getm2\n", pcb.i);
#endif
/* Step 1: init schedule */
/* a_elems is also number of local elements for index array q */
dalib_schedule_init_ (b_N2, q, a_elems, mask);
/* Step 2: define schedule */
dalib_schedule_send2_ (b_N1, b_N2, p, q);
dalib_schedule_recv_ ();
/* Step 3: make get with schedule */
b_zero = dalib_array_zero2 (b, *a_size, *b_N1, *b_N2);
dalib_schedule_get_send_ (b_zero, a_size);
dalib_schedule_get_recv_ (a, a_size);
/* Step 4: release schedule */
dalib_schedule_exit_ ();
} /* dalib_global_getm2 */
/***********************************************************************
* *
* MASK: B [P,Q] = A : B[P[j],Q[j]] = A[j] for all j with MASK(j) *
* *
* void dalib_global_setm2_ (op, b, b_N1, b-N2, p, q *
* a, a_size, a_elems, mask) *
* *
***********************************************************************/
void dalib_global_setm2_ (op, b, b_N1, b_N2, p, q,
a, a_size, a_elems, mask)
int *op;
int *a_size; /* number of bytes for elem size of a and b */
int *a_elems; /* number of local elements in a, p, q */
int *b_N1, *b_N2; /* global dimensions of b */
int *p, *q, *mask; /* q == mask if mask is not available */
unsigned char *a, *b;
{ unsigned char *b_zero;
operation = *op; /* global set of scatter operation */
/* Step 1: init schedule */
/* a_elems is also number of local elements for index array q */
dalib_schedule_init_ (b_N2, q, a_elems, mask);
/* Step 2: define schedule */
dalib_schedule_send2_ (b_N1, b_N2, p, q);
dalib_schedule_recv_ ();
/* Step 3: make set with schedule */
dalib_schedule_set_send_ (a, a_size); /* values of b will be send */
b_zero = dalib_array_zero2 (b, *a_size, *b_N1, *b_N2);
dalib_schedule_set_recv_ (b_zero, a_size); /* recv values in b */
/* Step 4: release schedule */
dalib_schedule_exit_ ();
} /* dalib_global_setm2 */
/*******************************************************************
* *
* FORTRAN INTERFACE *
* *
* void dalib_global_switch_ (a, a_size, a_elems, b, b_N1, p) *
* *
*******************************************************************/
void dalib_global_switch_ (a, a_size, a_elems, b, b_N1, p)
unsigned char *a, *b;
int *a_size, *a_elems, *b_N1;
int *p;
{ unsigned char *b_zero;
/* Step 1: init schedule */
dalib_switching_init_ (b_N1, p, a_elems);
/* Step 2: define schedule */
dalib_schedule_send_ (b_N1, p);
dalib_schedule_recv_ ();
/* Step 3: make get with schedule */
b_zero = dalib_array_zero1 (b, *a_size, *b_N1);
dalib_schedule_get_send_ (b_zero, a_size);
dalib_schedule_get_recv_ (a, a_size);
/* Step 4: release schedule */
dalib_schedule_exit_ ();
} /* dalib_global_switch */
/*******************************************************************
* *
* FORTRAN INTERFACE *
* *
* - subroutines without a mask parameter *
* *
*******************************************************************/
void dalib_global_get1_ (a, a_size, a_elems, b, b_N1, p)
int *a_size; /* size of elements of a and b in Bytes */
int *a_elems; /* number of elements in a and p */
int *b_N1; /* number of elements of b */
int *p;
unsigned char *a, *b;
{ /* mask = p means no mask */
dalib_global_getm1_ (a, a_size, a_elems, b, b_N1, p, p);
}
void dalib_global_get2_ (a, a_size, a_elems, b, b_N1, b_N2, p, q)
int *a_size; /* number of bytes for elem size of a and b */
int *a_elems; /* number of local elements in a, p, q */
int *b_N1, b_N2; /* global dimensions of b */
int *p, *q;
unsigned char *a, *b;
{ /* mask = p means no mask */
dalib_global_getm2_ (a, a_size, a_elems, b, b_N1, b_N2, p, q, q);
}
/*******************************************************************
* *
* FORTRAN INTERFACE *
* *
* - splitted subroutines to reuse a fixed schedule *
* *
*******************************************************************/
void dalib_schedule_fix1__ (b_N1, p, p_elems, mask)
int *p_elems; /* number of elemens in p */
int *b_N1; /* number of elements of b */
int *p, *mask; /* p == mask if mask is not available */
{ dalib_schedule_init_ (b_N1, p, p_elems, mask);
dalib_schedule_send_ (b_N1, p);
dalib_schedule_recv_ ();
}
void dalib_schedule_fix2__ (b_N1, b_N2, p, q, q_elems, mask)
int *q_elems; /* number of elemens in q */
int *b_N1; /* number of elements of b */
int *b_N2; /* number of elements of b */
int *p, *q, *mask; /* q == mask if mask is not available */
{ dalib_schedule_init_ (b_N1, q, q_elems, mask);
dalib_schedule_send2_ (b_N1, b_N2, p, q);
dalib_schedule_recv_ ();
}
void dalib_schedule_get1__ (b, b_N1, a, size)
unsigned char *b, *a;
int *b_N1, *size;
{ unsigned char *b_zero;
b_zero = dalib_array_zero1 (b, *size, *b_N1);
dalib_schedule_get_send_ (b_zero, size);
dalib_schedule_get_recv_ (a, size);
}
void dalib_schedule_get2__ (b, b_N1, b_N2, a, size)
unsigned char *b, *a;
int *b_N1, *b_N2, *size;
{ unsigned char *b_zero;
b_zero = dalib_array_zero2 (b, *size, *b_N1, *b_N2);
dalib_schedule_get_send_ (b_zero, size);
dalib_schedule_get_recv_ (a, size);
}
void dalib_schedule_set1__ (op, b, b_N1, a, a_size)
unsigned char *a, *b;
int *op;
int *a_size, *b_N1;
{ unsigned char *b_zero;
operation = *op; /* global set for operation */
dalib_schedule_set_send_ (a, a_size);
b_zero = dalib_array_zero1 (b, *a_size, *b_N1);
dalib_schedule_set_recv_ (b_zero, a_size);
} /* dalib_schedule_set1 */
void dalib_schedule_set2__ (op, b, b_N1, b_N2, a, a_size)
unsigned char *a, *b;
int *op;
int *a_size, *b_N1, *b_N2;
{ unsigned char *b_zero;
operation = *op; /* global set for operation */
dalib_schedule_set_send_ (a, a_size);
b_zero = dalib_array_zero1 (b, *a_size, *b_N1, *b_N2);
dalib_schedule_set_recv_ (b_zero, a_size);
} /* dalib_schedule_set2 */