Here is a first simple program to compute Pi, extracted from the book "Using MPI" from W. Gropp, E. Lusk, A. SKjellum, p.25. It makes uses of a broadcast (Bcast) and reduce (Reduce) collective operation
import p2pmpi.mpi.*;
public class Pi {
public static void main(String[] args) {
int rank, size, i;
double PI25DT = 3.141592653589793238462643;
double h, sum, x;
MPI.Init(args);
double startTime = MPI.Wtime();
size = MPI.COMM_WORLD.Size();
rank = MPI.COMM_WORLD.Rank();
int[] n = new int[1];
double[] mypi = new double[1];
double[] pi = new double[1];
if(rank == 0) {
n[0] = 1000000; // number of interval
}
MPI.COMM_WORLD.Bcast(n, 0, 1, MPI.INT, 0);
h = 1.0 / (double)n[0];
sum = 0.0;
for(i = rank + 1; i <= n[0]; i+= size) {
x = h * ((double)i - 0.5);
sum += (4.0/(1.0 + x*x));
}
mypi[0] = h * sum;
MPI.COMM_WORLD.Reduce(mypi, 0, pi, 0, 1, MPI.DOUBLE, MPI.SUM, 0);
if(rank == 0) {
System.out.println("Pi is approximately " + pi[0]);
System.out.println("Error is " + (pi[0] - PI25DT));
double stopTime = MPI.Wtime();
System.out.println("Time usage = " + (stopTime - startTime) + " ms");
}
MPI.Finalize();
}
}
This example demonstrates the use of Recv and Send.
import p2pmpi.mpi.*;
public class MatrixPar {
public static void main(String[] args) {
int N = 1500;
int MASTER = 0;
int FROM_MASTER = 1;
int FROM_WORKER = 2;
int numtasks, /* number of tasks in partition */
taskid, /* a task identifier */
numworkers, /* number of worker tasks */
source, /* task id of message source */
dest, /* task id of message destination */
nbytes, /* number of bytes in message */
mtype, /* message type */
intsize, /* size of an integer in bytes */
dbsize, /* size of a double float in bytes */
averow, extra, /* used to determine rows sent to each worker */
i, j, k, /* misc */
count;
int[] a = new int[N*N]; /* matrix A to be multiplied */
int[] b = new int[N*N]; /* matrix B to be multiplied */
int[] c = new int[N*N]; /* result matrix C */
int[] offset = new int[1];
int[] rows = new int[1]; /* rows of matrix A sent to each worker */
long[] computeTime = new long[1];
long[] maxComputeTime = new long[1];
MPI.Init(args);
taskid = MPI.COMM_WORLD.Rank();
numtasks = MPI.COMM_WORLD.Size();
numworkers = numtasks - 1;
/* *************** Master Task ****************** */
if(taskid == MASTER) {
//Init matrix A,B
for(i = 0; i < N; i++) {
for(j = 0; j < N; j++) {
a[(i*N)+j] = 1;
b[(i*N)+j] = 2;
}
}
// Send matrix data to worker tasks
long start = System.currentTimeMillis();
averow = N / numworkers;
extra = N % numworkers;
offset[0] = 0;
mtype = FROM_MASTER;
long startsend = System.currentTimeMillis();
for(dest = 1; dest <= numworkers; dest++) {
if(dest <= extra) {
rows[0] = averow + 1;
} else {
rows[0] = averow;
}
MPI.COMM_WORLD.Send(offset, 0, 1, MPI.INT, dest, mtype);
MPI.COMM_WORLD.Send(rows, 0, 1, MPI.INT, dest, mtype);
count = rows[0] * N;
MPI.COMM_WORLD.Send(a, (offset[0]*N), count, MPI.INT, dest, mtype);
count = N*N;
MPI.COMM_WORLD.Send(b, 0, count, MPI.INT, dest, mtype);
offset[0] = offset[0] + rows[0];
}
long stopsend = System.currentTimeMillis();
// Wait for results from all worker tasks
computeTime[0] = 0;
mtype = FROM_WORKER;
for(i = 1; i <= numworkers; i++) {
source = i;
MPI.COMM_WORLD.Recv(computeTime, 0, 1, MPI.LONG, source, mtype);
System.out.println("Rank " + i + " uses " + computeTime[0] + " for computing");
MPI.COMM_WORLD.Recv(offset, 0, 1, MPI.INT, source, mtype);
MPI.COMM_WORLD.Recv(rows, 0, 1, MPI.INT, source, mtype);
count = rows[0] * N;
MPI.COMM_WORLD.Recv(c, offset[0]*N, count, MPI.INT, source, mtype);
}
long stop = System.currentTimeMillis();
//System.out.println("Result of matrix c[0] = " + c[0] + ", c[1000*1000] = " + c[100*100]);
System.out.println("Time Usage = " + (stop - start));
System.out.println("Sending Time Usage = " + (stopsend - startsend));
}
/* *************************** worker task *********************************** */
if(taskid > MASTER) {
mtype = FROM_MASTER;
source = MASTER;
MPI.COMM_WORLD.Recv(offset, 0, 1, MPI.INT, source, mtype);
MPI.COMM_WORLD.Recv(rows, 0, 1, MPI.INT, source, mtype);
count = rows[0] * N;
MPI.COMM_WORLD.Recv(a, 0, count, MPI.INT, source, mtype);
count = N * N;
MPI.COMM_WORLD.Recv(b, 0, count, MPI.INT, source, mtype);
long startCompute = System.currentTimeMillis();
for(i = 0; i < rows[0]; i++) {
for(k = 0; k < N; k++) {
c[(i*N)+k] = 0;
for(j = 0; j < N; j++) {
c[(i*N)+k] = c[(i*N)+k] + a[(i*N)+j] * b[(j*N)+k];
}
}
}
long stopCompute = System.currentTimeMillis();
computeTime[0] = (stopCompute - startCompute);
mtype = FROM_WORKER;
MPI.COMM_WORLD.Send(computeTime, 0, 1, MPI.LONG, MASTER, mtype);
MPI.COMM_WORLD.Send(offset, 0, 1, MPI.INT, MASTER, mtype);
MPI.COMM_WORLD.Send(rows, 0, 1, MPI.INT, MASTER, mtype);
MPI.COMM_WORLD.Send(c, 0, rows[0]*N, MPI.INT, MASTER, mtype);
}
MPI.COMM_WORLD.Reduce(computeTime, 0, maxComputeTime, 0, 1, MPI.LONG, MPI.MAX, 0);
if(taskid == 0) {
System.out.println("Max compute time/machine = " + maxComputeTime[0]);
}
MPI.Finalize();
}
}
This program is an implementation of the algorithm called LCR that solves the problem of a disributed election. It uses Send, IRecv, Wait, with equivalents of MPI_ANY_TAG and MPI_ANY_SOURCE.
/**
* Election algorithm on a ring "LCR" after the names of Le Lann,Chang, and Roberts.
*
* The Communication is unidirectional. The size of the ring is not known.
* Only the leader have to perform output as leader.
* Uses the comparison on UIDs of every node.
* Process with the largest UID output as leader.
* Each process sends its identifier around the ring, when a process receives
* an incoming identifier, it compare that identifier to its own.
* If the incoming identifier is greater then its own, it keeps passing the identifier.
* If it is less than its own, it discard it and
* if it is equal to its own the process declare itself as leader.
*
* The leader declares itself leader after n rounds (n = size of the ring)
**/
import p2pmpi.mpi.*;
import java.util.Random;
public class ElectionLCR {
public static void main(String[] args) {
int rank, size, i, nbround;
int TAGUID=1;
int TAGSTATE=2;
MPI.Init(args);
double startTime;
Request req = null;
Status status = null;
size = MPI.COMM_WORLD.Size();
rank = MPI.COMM_WORLD.Rank();
int mynum;
int[] r = new int[1];
int[] s = new int[1];
int e;
for (e=0;e<200;e++) {
System.out.println("\nElection number : "+e);
System.out.println("=======================================================");
// generates an uid (assumes it is unique)
mynum = MPI.Rand(1000);
s[0] = mynum;
System.out.println("[rank " + rank + "] generates uid="+mynum+". Let us start.");
nbround=1;
startTime = MPI.Wtime();
MPI.COMM_WORLD.Send(s, 0, 1, MPI.INT, (rank+1)%size,TAGUID);
// loop receiving message from left neighbourg on ring,
while (true) {
req = MPI.COMM_WORLD.Irecv(r,0,1, MPI.INT, (rank == 0 ? size-1 : rank-1), MPI.ANY_TAG);
status = req.Wait();
// ----- Election phase -------
if (status.tag == TAGUID) {
if ( r[0] > s[0] ) {
MPI.COMM_WORLD.Send(r, 0, 1, MPI.INT, (rank+1)%size,TAGUID);
}
else {
if (r[0]==s[0]) {
System.out.println("[rank " + rank + "] After "+nbround+" rounds, I know I am the (unique) leader with "+s[0]);
// I am the unique leader: initiate now another round to broadcast a halting state
MPI.COMM_WORLD.Send(r, 0, 1, MPI.INT, (rank+1)%size,TAGSTATE);
// ok, the message will eventually come back. Consumes the mesage and Stop after this.
MPI.COMM_WORLD.Recv(r, 0, 1, MPI.INT, (rank == 0 ? size-1 : rank-1),TAGSTATE);
break;
}
// else ( r < s ) do nothing
}
}
// ---- Halting phase -------
if (status.tag == TAGSTATE) {
System.out.println("[rank " + rank + "] i just get informed "+r[0]+" is elected.");
MPI.COMM_WORLD.Send(r, 0, 1, MPI.INT, (rank+1)%size,TAGSTATE);
break;
}
nbround++;
}
double stopTime = MPI.Wtime();
System.out.println("Time usage = " + (stopTime - startTime) + " ms");
System.out.println("Number of iterations: "+nbround);
}
MPI.Finalize();
}
}
This program is a translation into Java of the C program for the IS from the NAS benchmark (NPB3.2). It uses Alltoall, Alltoallv, Allreduce and Reduce.
import p2pmpi.mpi.*;
public class Is {
double[] start;
double[] elapsed;
static final char CLASS = 'A';
static final int NUM_PROCS = 4;
static final int MAX_PROCS = 512;
static final int MAX_ITERATIONS = 10;
static final int TEST_ARRAY_SIZE = 5;
int TOTAL_KEYS_LOG_2;
int MAX_KEY_LOG_2;
int NUM_BUCKETS_LOG_2;
int TOTAL_KEYS;
int MAX_KEY;
int NUM_BUCKETS;
int NUM_KEYS;
int SIZE_OF_BUFFERS;
int my_rank, comm_size;
// Some Global Info
//int[] key_buff_ptr_global;
int total_local_keys, total_lesser_keys;
int passed_verification;
// These are the three main arrays
// See SIZE_OF_BUFFERS def above
int[] key_array;
int[] key_buff1;
int[] key_buff2;
int[] bucket_size;
int[] bucket_size_totals;
int[] bucket_ptrs;
int[] process_bucket_distrib_ptr1;
int[] process_bucket_distrib_ptr2;
int[] send_count;
int[] recv_count;
int[] send_displ;
int[] recv_displ;
int min_key_val;
// Partial verif info
int[] test_index_array;
int[] test_rank_array;
int[] S_test_index_array = {48427, 17148, 23627, 62548, 4431};
int[] S_test_rank_array = {0, 18, 346, 64917, 65463};
int[] W_test_index_array = {357773,934767,875723,898999,404505};
int[] W_test_rank_array = {1249,11698,1039987,1043896,1048018};
int[] A_test_index_array = {2112377,662041,5336171,3642833,4250760};
int[] A_test_rank_array = {104,17523,123928,8288932,8388264};
int[] B_test_index_array = {41869,812306,5102857,18232239,26860214};
int[] B_test_rank_array = {33422937,10244,59149,33135281,99};
int[] C_test_index_array = {44172927,72999161,74326391,129606274,21736814};
int[] C_test_rank_array = {61147,882988,266290,133997595,133525895};
// Use in randlc
// because Java does not support static local variable
// then makes it as global
int KS = 0;
double R23, R46, T23, T46;
Is() {
start = new double[64];
elapsed = new double[64];
switch(CLASS) {
case 'S' :
TOTAL_KEYS_LOG_2 = 16;
MAX_KEY_LOG_2 = 11;
NUM_BUCKETS_LOG_2 = 9;
break;
case 'W' :
TOTAL_KEYS_LOG_2 = 20;
MAX_KEY_LOG_2 = 16;
NUM_BUCKETS_LOG_2 = 10;
break;
case 'A' :
TOTAL_KEYS_LOG_2 = 23;
MAX_KEY_LOG_2 = 19;
NUM_BUCKETS_LOG_2 = 10;
break;
case 'B' :
TOTAL_KEYS_LOG_2 = 25;
MAX_KEY_LOG_2 = 21;
NUM_BUCKETS_LOG_2 = 10;
break;
case 'C' :
TOTAL_KEYS_LOG_2 = 27;
MAX_KEY_LOG_2 = 23;
NUM_BUCKETS_LOG_2 = 10;
break;
}
TOTAL_KEYS = (1 << TOTAL_KEYS_LOG_2);
MAX_KEY = (1 << MAX_KEY_LOG_2);
NUM_BUCKETS = (1 << NUM_BUCKETS_LOG_2);
NUM_KEYS = (TOTAL_KEYS/NUM_PROCS);
if(NUM_PROCS < 256) {
SIZE_OF_BUFFERS = 3*NUM_KEYS/2;
} else {
SIZE_OF_BUFFERS = 6*NUM_KEYS;
}
System.out.println("Size of buffers = " + SIZE_OF_BUFFERS);
key_array = new int[SIZE_OF_BUFFERS];
key_buff1 = new int[SIZE_OF_BUFFERS];
key_buff2 = new int[SIZE_OF_BUFFERS];
bucket_size = new int[NUM_BUCKETS+TEST_ARRAY_SIZE];
bucket_size_totals = new int[NUM_BUCKETS+TEST_ARRAY_SIZE];
bucket_ptrs = new int[NUM_BUCKETS];
process_bucket_distrib_ptr1 = new int[NUM_BUCKETS+TEST_ARRAY_SIZE];
process_bucket_distrib_ptr2 = new int[NUM_BUCKETS+TEST_ARRAY_SIZE];
send_count = new int[MAX_PROCS];
recv_count = new int[MAX_PROCS];
send_displ = new int[MAX_PROCS];
recv_displ = new int[MAX_PROCS];
test_index_array = new int[TEST_ARRAY_SIZE];
test_rank_array = new int[TEST_ARRAY_SIZE];
}
void c_print_results( String name,
char tclass,
int n1,
int n2,
int n3,
int niter,
int nprocs_compiled,
int nprocs_total,
double t,
double mops,
String optype,
int passed_verification,
String npbversion,
String compiletime,
String mpicc,
String clink,
String cmpi_lib,
String cmpi_inc,
String cflags,
String clinkflags)
{
String evalue = "1000";
System.out.println("\n\n" + name + " Benchmark Completed");
System.out.println(" Class = " + tclass);
if(n2 == 0 && n3 == 0)
System.out.println("Size = " + n1);
else
System.out.println("Size = ");
}
void timer_clear(int n) {
elapsed[n] = 0.0;
}
void timer_start(int n) {
start[n] = MPI.Wtime();
}
void timer_stop(int n) {
double t, now;
now = MPI.Wtime();
t = now - start[n];
elapsed[n] += t;
}
double timer_read(int n) {
return(elapsed[n]);
}
/**********************************************************************
***************** R A N D L C ******************
***************** ******************
***************** protable random number generator ******************
*********************************************************************/
double randlc(Object X, double A)
{
double T1, T2, T3, T4;
double A1;
double A2;
double X1;
double X2;
double Z;
int i, j;
double[] lX = (double[])X;
if(KS == 0) {
R23 = 1.0;
R46 = 1.0;
T23 = 1.0;
T46 = 1.0;
for(i = 1; i <= 23; i++) {
R23 = 0.50 * R23;
T23 = 2.0 * T23;
}
for(i = 1; i <= 46; i++) {
R46 = 0.50 * R46;
T46 = 2.0 * T46;
}
KS = 1;
}
/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N */
T1 = R23 * A;
j = (int)T1;
A1 = j;
A2 = A - T23 * A1;
/* Break X into two parts such that X = 2^23 * X1 + X2, compute
* Z = A* X2 + A2 * X1 (mod 2^23) and then
* X = 2^23 * Z + A2 * X2 (mod 2^46) */
T1 = R23 * lX[0];
j = (int)T1;
X1 = j;
X2 = lX[0] - T23 * X1;
T1 = A1 * X2 + A2 * X1;
j = (int)(R23 * T1);
T2 = j;
Z = T1 - T23 * T2;
T3 = T23 * Z + A2 * X2;
j = (int)(R46 * T3);
T4 = j;
lX[0] = T3 - T46 * T4;
return (R46 * lX[0]);
}
double find_my_seed(long kn, long np, long nn, double s, double a)
{
long i;
double t3, an;
long mq, nq, kk, ik;
double[] t1 = new double[1];
double[] t2 = new double[1];
nq = nn / np;
for(mq = 0; nq > 1; mq++, nq/=2);
t1[0] = a;
for(i = 1; i <= mq; i++) {
t2[0] = randlc(t1, t1[0]);
}
an = t1[0];
kk = kn;
t1[0] = s;
t2[0] = an;
for(i = 1; i <= 100; i++) {
ik = kk/2;
if(2 * ik != kk)
t3 = randlc(t1, t2[0]);
if(ik == 0)
break;
t3 = randlc(t2, t2[0]);
kk = ik;
}
return t1[0];
}
void create_seq(double seed, double a) {
double x;
int i, k;
double seeda[] = new double[1];
seeda[0] = seed;
k = MAX_KEY / 4;
for(i = 0; i < NUM_KEYS; i++) {
x = randlc(seeda, a);
x += randlc(seeda, a);
x += randlc(seeda, a);
x += randlc(seeda, a);
key_array[i] = (int)(k*x);
}
}
void full_verify() {
int i, j, k;
int [] ka = new int[1];
Request req = null;
for(i = 0; i < total_local_keys; i++) {
//CHANGE: key_array[--key_buff_ptr_global[key_buff2[i]] -
key_array[--(key_buff1[key_buff2[i] - min_key_val]) -
total_lesser_keys] = key_buff2[i];
}
if(my_rank > 0) {
req = MPI.COMM_WORLD.Irecv(ka, 0, 1,
MPI.INT, my_rank -1,
1000);
}
if(my_rank < comm_size-1) {
MPI.COMM_WORLD.Send(key_array, total_local_keys-1,
1, MPI.INT, my_rank+1, 1000);
}
if(my_rank > 0) {
//MPI.COMM_WORLD.Recv(ka, 0, 1, MPI.INT, my_rank-1, 1000);
req.Wait();
}
k = ka[0];
// Confirm that neighbot's greatest key value
// is not greater than my least key value
j = 0;
if(my_rank > 0)
if(k > key_array[0])
j++;
// Confirm keys correctly sorted : count incorrectly sorted keys, if any
for(i = 1; i < total_local_keys; i++)
if(key_array[i - 1] > key_array[i])
j++;
if(j != 0) {
//System.out.println("Processor " + my_rank + ": Full_verify: number of key out of sort:" + j);
} else {
passed_verification++;
}
}
//********************************************************************
//**************** RANK **********************
//********************************************************************
void rank(int iteration) {
int i, j, k, m;
int shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2;
int key;
int bucket_sum_accumulator;
int local_bucket_sum_accumulator;
int max_key_val;
//CHANGE: int key_buff_ptr;
//long rankTimer = System.currentTimeMillis();
// Iteration alteration of keys
if(my_rank == 0) {
key_array[iteration] = iteration;
key_array[iteration + MAX_ITERATIONS] = MAX_KEY - iteration;
}
// Initatilize
for(i = 0; i < NUM_BUCKETS+TEST_ARRAY_SIZE; i++) {
bucket_size[i] = 0;
bucket_size_totals[i] = 0;
process_bucket_distrib_ptr1[i] = 0;
process_bucket_distrib_ptr2[i] = 0;
}
// Determine where the partial verify test keys are, load into
// top of array bucket_size
for(i = 0; i < TEST_ARRAY_SIZE; i++) {
if((test_index_array[i]/NUM_KEYS) == my_rank) {
bucket_size[NUM_BUCKETS+i] =
key_array[test_index_array[i] % NUM_KEYS];
}
}
// Determine the number of keys in each bucket
for(i = 0; i < NUM_KEYS; i++)
bucket_size[key_array[i] >> shift]++;
// Accumulative bucket sizes are the bucket pointers
bucket_ptrs[0] = 0;
for(i = 1; i < NUM_BUCKETS; i++)
bucket_ptrs[i] = bucket_ptrs[i - 1] + bucket_size[i - 1];
// Sort into appropriate bucket
for(i = 0; i < NUM_KEYS; i++) {
key = key_array[i];
key_buff1[bucket_ptrs[key >> shift]++] = key;
}
timer_stop(2);
timer_start(3);
//rankTimer = System.currentTimeMillis();
MPI.COMM_WORLD.Allreduce(bucket_size, 0,
bucket_size_totals, 0,
NUM_BUCKETS+TEST_ARRAY_SIZE,
MPI.INT,
MPI.SUM);
timer_stop(3);
timer_start(2);
bucket_sum_accumulator = 0;
local_bucket_sum_accumulator = 0;
send_displ[0] = 0;
process_bucket_distrib_ptr1[0] = 0;
for(i = 0, j = 0; i < NUM_BUCKETS; i++) {
bucket_sum_accumulator += bucket_size_totals[i];
local_bucket_sum_accumulator += bucket_size[i];
if(bucket_sum_accumulator >= (j+1)*NUM_KEYS) {
send_count[j] = local_bucket_sum_accumulator;
if(j != 0) {
send_displ[j] = send_displ[j-1] + send_count[j-1];
process_bucket_distrib_ptr1[j] =
process_bucket_distrib_ptr2[j - 1] + 1;
}
process_bucket_distrib_ptr2[j++] = i;
local_bucket_sum_accumulator = 0;
}
}
timer_stop(2);
timer_start(3);
// This is the redistribution section : first find out how many keys
// each processor will sen to every other processors:
MPI.COMM_WORLD.Alltoall(send_count, 0, 1, MPI.INT,
recv_count, 0, 1, MPI.INT);
// Determine the receive array displacements for the buckets
recv_displ[0] = 0;
for(i = 1; i < comm_size; i++)
recv_displ[i] = recv_displ[i - 1] + recv_count[i - 1];
MPI.COMM_WORLD.Alltoallv(key_buff1, 0, send_count, send_displ, MPI.INT,
key_buff2, 0, recv_count, recv_displ, MPI.INT);
timer_stop(3);
timer_start(2);
// The starting and ending bucket numbers on each processor are
// multiplied by the interval size of the buckets to obtain the
// smallest possible min and greastest possible max value of any
// key on each processor
min_key_val = process_bucket_distrib_ptr1[my_rank] << shift;
max_key_val = ((process_bucket_distrib_ptr2[my_rank] + 1) << shift)-1;
// Clear the work array
for(i = 0; i < max_key_val - min_key_val + 1; i++)
key_buff1[i] = 0;
// Determine the total number of keys on all other
// processors holding keys of lesser value
m = 0;
for(k=0; k < my_rank; k++)
for(i = process_bucket_distrib_ptr1[k];
i <= process_bucket_distrib_ptr2[k];
i++)
m += bucket_size_totals[i]; // m has total # of lesser keys
// Determine total number of keys on this processor
j = 0;
for( i = process_bucket_distrib_ptr1[my_rank];
i <= process_bucket_distrib_ptr2[my_rank];
i++) {
j += bucket_size_totals[i];
}
// Ranking of all keys occurs in this section :
// shift it backwards so no subtractions are necessary in loop
//CHANGE :key_buff_ptr = key_buff1 - min_key_val;
// In this section, the keys themselves are used as thier
// own indexes to determne how many of each there are: thier
// individual population
for(i = 0; i < j; i++)
//CHANGE: key_buff_ptr[key_buff2[i]]++;
key_buff1[key_buff2[i] - min_key_val]++;
// To obtain ranks of each key, successively add the individual key
// population, not forgetting to add m, the total of less keys,
// to the first key population
//CHANGE: key_buff_ptr[min_key_val] += m;
//SO :key_buff1[min_key_val - min_key_val] += m;
key_buff1[0] += m;
for(i = min_key_val; i < max_key_val; i++) {
//CHANGE:key_buff_ptr[i + 1] += key_buff_ptr[i];
key_buff1[i + 1 - min_key_val] += key_buff1[i - min_key_val];
}
// This is the partial verify test section
// Observe that test_rank_array vals are
// shifted differently for different cases
for(i = 0; i < TEST_ARRAY_SIZE; i++) {
k = bucket_size_totals[i + NUM_BUCKETS]; // key were hidden here
if(min_key_val <= k && k <= max_key_val)
switch( CLASS ) {
case 'S' :
if( i <= 2) {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
} else {
// CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
}
break;
case 'W' :
if ( i < 2) {
//CHANGE: if(key_buff_ptr[k - 1] !=
if(key_buff1[k - 1 - min_key_val] !=
test_rank_array[i]+(iteration-2)) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
} else {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
}
break;
case 'A' :
if( i <= 2) {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+(iteration-1)) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+(iteration-1)) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
} else {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-(iteration-1)) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-(iteration-1)) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
}
break;
case 'B' :
if( i == 1 || i == 2 || i == 4) {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
} else {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
}
break;
case 'C' :
if( i <= 2) {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
} else {
//CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) {
if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) {
System.out.println("Failed partial verification:" +
"iteration " + iteration + ", processor " +
my_rank + ", test key " + i);
} else
passed_verification++;
}
break;
}
}
// Make copies of rank info for use by full_verify: these variables
// in rank are local; making them global slows down the code, probably
// since they cannot be made register by compiler
if(iteration == MAX_ITERATIONS) {
//CHANGE: key_buff_ptr_global = key_buff1[-min_val_key];
total_local_keys = j;
total_lesser_keys = m;
}
}
// **********************************************************
// ************ MAIN *************
// **********************************************************
void doTest(String[] args) {
int i, iteration;
int[] itemp= new int[1];
double[] timecounter = new double[1];
double[] maxtime = new double[1];
int[] pass_v_array = new int[1];
//////////////////// Display Varaible values ////////////////////////
/*System.out.println("Class = " + CLASS + ", NumProcs = " + NUM_PROCS);
System.out.println("Total Keys Log = " + TOTAL_KEYS_LOG_2);
System.out.println("Max Key Log = " + MAX_KEY_LOG_2);
System.out.println("Num buckets Log = " + NUM_BUCKETS_LOG_2);
System.out.println("Total Keys = " + TOTAL_KEYS);
System.out.println("Max Key = " + MAX_KEY);
System.out.println("Num buckets = " + NUM_BUCKETS);
System.out.println("Num keys = " + NUM_KEYS);*/
/////////////////////////////////////////////////////////////////////
double a = 1220703125.00;
double[] ai = new double[1];
ai[0] = a;
double t2 = randlc(ai, a);
MPI.Init(args);
comm_size = MPI.COMM_WORLD.Size();
my_rank = MPI.COMM_WORLD.Rank();
for(i = 0; i < TEST_ARRAY_SIZE; i++) {
switch( CLASS ) {
case 'S' :
test_index_array[i] = S_test_index_array[i];
test_rank_array[i] = S_test_rank_array[i];
break;
case 'A' :
test_index_array[i] = A_test_index_array[i];
test_rank_array[i] = A_test_rank_array[i];
break;
case 'W' :
test_index_array[i] = W_test_index_array[i];
test_rank_array[i] = W_test_rank_array[i];
break;
case 'B' :
test_index_array[i] = B_test_index_array[i];
test_rank_array[i] = B_test_rank_array[i];
break;
case 'C' :
test_index_array[i] = C_test_index_array[i];
test_rank_array[i] = C_test_rank_array[i];
break;
}
}
/* Check that actual and compiled number of processors agree */
if(comm_size != NUM_PROCS) {
if(my_rank == 0)
System.out.println("\nERROR: not enough processes");
MPI.Finalize();
System.exit(1);
}
/* Check to see whether total number of processes is within bounds. */
if(comm_size > MAX_PROCS) {
if(my_rank == 0)
System.out.println("\nERROR: number of processes exceeds maximum\n");
MPI.Finalize();
System.exit(1);
}
if(my_rank == 0) {
System.out.println("NAS Parallel Benchmarks 3.1 -- IS Benchmark");
System.out.println("Size : " + TOTAL_KEYS + " (class " + CLASS + ")");
System.out.println("Iteration: " + MAX_ITERATIONS);
System.out.println("Number of processes : " + comm_size);
}
/* Generate random number sequence and subsequent keys on all procs */
create_seq(find_my_seed(my_rank,
comm_size,
4*TOTAL_KEYS,
314159265.00,
1220703125.00),
1220703125.00);
//System.out.println("Starting... rank 1");
rank(1);
//System.out.println("Finishing... rank 1");
// Start verification counter
passed_verification = 0;
if(my_rank == 0 && CLASS != 'S') System.out.println("\n iteration");
//Initialize timer
timer_clear(0);
//Initalize separate communication, computation timing
for(i = 1; i <= 3; i++) timer_clear(i);
timer_start(0);
timer_start(1);
timer_start(2);
// This is the main iteration
for(iteration=1; iteration<= MAX_ITERATIONS; iteration++) {
if(my_rank == 0 && CLASS != 'S') System.out.println("\t" + iteration);
rank(iteration);
}
timer_stop(2);
timer_stop(1);
timer_stop(0);
timecounter[0] = timer_read(0);
MPI.COMM_WORLD.Reduce( timecounter, 0, maxtime, 0,
1, MPI.DOUBLE, MPI.MAX, 0);
// TIMING ENABLED
double[] tmin;
double[] tsum;
double[] tmax;
tmin = new double[1];
tsum = new double[1];
tmax = new double[1];
if(my_rank == 0) {
System.out.println("\ntimer 1/2/3 = total/computation/communication time");
System.out.println("\t\tmin\t\tavg\t\tmax");
}
for(i = 1; i <= 3; i++) {
timecounter[0] = timer_read(i);
MPI.COMM_WORLD.Reduce(timecounter, 0, tmin, 0, 1, MPI.DOUBLE, MPI.MIN, 0);
MPI.COMM_WORLD.Reduce(timecounter, 0, tsum, 0, 1, MPI.DOUBLE, MPI.SUM, 0);
MPI.COMM_WORLD.Reduce(timecounter, 0, tmax, 0, 1, MPI.DOUBLE, MPI.MAX, 0);
if(my_rank == 0) {
System.out.println("timer" + i + "\t\t" + (tmin[0]/(double)1000) +
"\t\t" + (tsum[0]/(double)(comm_size *1000)) +
"\t\t" + (tmax[0]/(double)1000));
}
}
// END TIMING ENABLED
full_verify();
itemp[0] = passed_verification;
//pass_v_array[0] = passed_verification;
MPI.COMM_WORLD.Reduce( itemp, 0, pass_v_array, 0,
1, MPI.INT, MPI.SUM, 0);
passed_verification = pass_v_array[0];
// The Final printout
if(my_rank == 0) {
if(passed_verification != 5*MAX_ITERATIONS + comm_size)
passed_verification = 0;
System.out.println("\n");
System.out.println("IS Benchmark Completed\n");
System.out.println("Class = " + CLASS);
System.out.println("Size = " + TOTAL_KEYS);
System.out.println("Iteration = " + MAX_ITERATIONS);
System.out.println("Time in seconds = " + (maxtime[0]/1000));
System.out.println("Total processes = " + comm_size);
double mopTotal = ((double)(MAX_ITERATIONS*TOTAL_KEYS))/(maxtime[0]/1000)/1000000.0;
System.out.println("Mop/s total = " + mopTotal);
System.out.println("Mop/s/process = " + (mopTotal/comm_size));
if(passed_verification != 0) {
System.out.println("Verification = SUCCESSFUL");
} else {
System.out.println("Verification = UNSUCCESSFUL");
}
}
MPI.Finalize();
}
public static void main(String[] args) {
Is isNPB = new Is();
isNPB.doTest(args);
}
}
