Here is a first simple program to compute Pi, extracted from the book "Using MPI" from W. Gropp, E. Lusk, A. SKjellum, p.25. It makes uses of a broadcast (Bcast) and reduce (Reduce) collective operation
import p2pmpi.mpi.*; public class Pi { public static void main(String[] args) { int rank, size, i; double PI25DT = 3.141592653589793238462643; double h, sum, x; MPI.Init(args); double startTime = MPI.Wtime(); size = MPI.COMM_WORLD.Size(); rank = MPI.COMM_WORLD.Rank(); int[] n = new int[1]; double[] mypi = new double[1]; double[] pi = new double[1]; if(rank == 0) { n[0] = 1000000; // number of interval } MPI.COMM_WORLD.Bcast(n, 0, 1, MPI.INT, 0); h = 1.0 / (double)n[0]; sum = 0.0; for(i = rank + 1; i <= n[0]; i+= size) { x = h * ((double)i - 0.5); sum += (4.0/(1.0 + x*x)); } mypi[0] = h * sum; MPI.COMM_WORLD.Reduce(mypi, 0, pi, 0, 1, MPI.DOUBLE, MPI.SUM, 0); if(rank == 0) { System.out.println("Pi is approximately " + pi[0]); System.out.println("Error is " + (pi[0] - PI25DT)); double stopTime = MPI.Wtime(); System.out.println("Time usage = " + (stopTime - startTime) + " ms"); } MPI.Finalize(); } }
This example demonstrates the use of Recv and Send.
import p2pmpi.mpi.*; public class MatrixPar { public static void main(String[] args) { int N = 1500; int MASTER = 0; int FROM_MASTER = 1; int FROM_WORKER = 2; int numtasks, /* number of tasks in partition */ taskid, /* a task identifier */ numworkers, /* number of worker tasks */ source, /* task id of message source */ dest, /* task id of message destination */ nbytes, /* number of bytes in message */ mtype, /* message type */ intsize, /* size of an integer in bytes */ dbsize, /* size of a double float in bytes */ averow, extra, /* used to determine rows sent to each worker */ i, j, k, /* misc */ count; int[] a = new int[N*N]; /* matrix A to be multiplied */ int[] b = new int[N*N]; /* matrix B to be multiplied */ int[] c = new int[N*N]; /* result matrix C */ int[] offset = new int[1]; int[] rows = new int[1]; /* rows of matrix A sent to each worker */ long[] computeTime = new long[1]; long[] maxComputeTime = new long[1]; MPI.Init(args); taskid = MPI.COMM_WORLD.Rank(); numtasks = MPI.COMM_WORLD.Size(); numworkers = numtasks - 1; /* *************** Master Task ****************** */ if(taskid == MASTER) { //Init matrix A,B for(i = 0; i < N; i++) { for(j = 0; j < N; j++) { a[(i*N)+j] = 1; b[(i*N)+j] = 2; } } // Send matrix data to worker tasks long start = System.currentTimeMillis(); averow = N / numworkers; extra = N % numworkers; offset[0] = 0; mtype = FROM_MASTER; long startsend = System.currentTimeMillis(); for(dest = 1; dest <= numworkers; dest++) { if(dest <= extra) { rows[0] = averow + 1; } else { rows[0] = averow; } MPI.COMM_WORLD.Send(offset, 0, 1, MPI.INT, dest, mtype); MPI.COMM_WORLD.Send(rows, 0, 1, MPI.INT, dest, mtype); count = rows[0] * N; MPI.COMM_WORLD.Send(a, (offset[0]*N), count, MPI.INT, dest, mtype); count = N*N; MPI.COMM_WORLD.Send(b, 0, count, MPI.INT, dest, mtype); offset[0] = offset[0] + rows[0]; } long stopsend = System.currentTimeMillis(); // Wait for results from all worker tasks computeTime[0] = 0; mtype = FROM_WORKER; for(i = 1; i <= numworkers; i++) { source = i; MPI.COMM_WORLD.Recv(computeTime, 0, 1, MPI.LONG, source, mtype); System.out.println("Rank " + i + " uses " + computeTime[0] + " for computing"); MPI.COMM_WORLD.Recv(offset, 0, 1, MPI.INT, source, mtype); MPI.COMM_WORLD.Recv(rows, 0, 1, MPI.INT, source, mtype); count = rows[0] * N; MPI.COMM_WORLD.Recv(c, offset[0]*N, count, MPI.INT, source, mtype); } long stop = System.currentTimeMillis(); //System.out.println("Result of matrix c[0] = " + c[0] + ", c[1000*1000] = " + c[100*100]); System.out.println("Time Usage = " + (stop - start)); System.out.println("Sending Time Usage = " + (stopsend - startsend)); } /* *************************** worker task *********************************** */ if(taskid > MASTER) { mtype = FROM_MASTER; source = MASTER; MPI.COMM_WORLD.Recv(offset, 0, 1, MPI.INT, source, mtype); MPI.COMM_WORLD.Recv(rows, 0, 1, MPI.INT, source, mtype); count = rows[0] * N; MPI.COMM_WORLD.Recv(a, 0, count, MPI.INT, source, mtype); count = N * N; MPI.COMM_WORLD.Recv(b, 0, count, MPI.INT, source, mtype); long startCompute = System.currentTimeMillis(); for(i = 0; i < rows[0]; i++) { for(k = 0; k < N; k++) { c[(i*N)+k] = 0; for(j = 0; j < N; j++) { c[(i*N)+k] = c[(i*N)+k] + a[(i*N)+j] * b[(j*N)+k]; } } } long stopCompute = System.currentTimeMillis(); computeTime[0] = (stopCompute - startCompute); mtype = FROM_WORKER; MPI.COMM_WORLD.Send(computeTime, 0, 1, MPI.LONG, MASTER, mtype); MPI.COMM_WORLD.Send(offset, 0, 1, MPI.INT, MASTER, mtype); MPI.COMM_WORLD.Send(rows, 0, 1, MPI.INT, MASTER, mtype); MPI.COMM_WORLD.Send(c, 0, rows[0]*N, MPI.INT, MASTER, mtype); } MPI.COMM_WORLD.Reduce(computeTime, 0, maxComputeTime, 0, 1, MPI.LONG, MPI.MAX, 0); if(taskid == 0) { System.out.println("Max compute time/machine = " + maxComputeTime[0]); } MPI.Finalize(); } }
This program is an implementation of the algorithm called LCR that solves the problem of a disributed election. It uses Send, IRecv, Wait, with equivalents of MPI_ANY_TAG and MPI_ANY_SOURCE.
/** * Election algorithm on a ring "LCR" after the names of Le Lann,Chang, and Roberts. * * The Communication is unidirectional. The size of the ring is not known. * Only the leader have to perform output as leader. * Uses the comparison on UIDs of every node. * Process with the largest UID output as leader. * Each process sends its identifier around the ring, when a process receives * an incoming identifier, it compare that identifier to its own. * If the incoming identifier is greater then its own, it keeps passing the identifier. * If it is less than its own, it discard it and * if it is equal to its own the process declare itself as leader. * * The leader declares itself leader after n rounds (n = size of the ring) **/ import p2pmpi.mpi.*; import java.util.Random; public class ElectionLCR { public static void main(String[] args) { int rank, size, i, nbround; int TAGUID=1; int TAGSTATE=2; MPI.Init(args); double startTime; Request req = null; Status status = null; size = MPI.COMM_WORLD.Size(); rank = MPI.COMM_WORLD.Rank(); int mynum; int[] r = new int[1]; int[] s = new int[1]; int e; for (e=0;e<200;e++) { System.out.println("\nElection number : "+e); System.out.println("======================================================="); // generates an uid (assumes it is unique) mynum = MPI.Rand(1000); s[0] = mynum; System.out.println("[rank " + rank + "] generates uid="+mynum+". Let us start."); nbround=1; startTime = MPI.Wtime(); MPI.COMM_WORLD.Send(s, 0, 1, MPI.INT, (rank+1)%size,TAGUID); // loop receiving message from left neighbourg on ring, while (true) { req = MPI.COMM_WORLD.Irecv(r,0,1, MPI.INT, (rank == 0 ? size-1 : rank-1), MPI.ANY_TAG); status = req.Wait(); // ----- Election phase ------- if (status.tag == TAGUID) { if ( r[0] > s[0] ) { MPI.COMM_WORLD.Send(r, 0, 1, MPI.INT, (rank+1)%size,TAGUID); } else { if (r[0]==s[0]) { System.out.println("[rank " + rank + "] After "+nbround+" rounds, I know I am the (unique) leader with "+s[0]); // I am the unique leader: initiate now another round to broadcast a halting state MPI.COMM_WORLD.Send(r, 0, 1, MPI.INT, (rank+1)%size,TAGSTATE); // ok, the message will eventually come back. Consumes the mesage and Stop after this. MPI.COMM_WORLD.Recv(r, 0, 1, MPI.INT, (rank == 0 ? size-1 : rank-1),TAGSTATE); break; } // else ( r < s ) do nothing } } // ---- Halting phase ------- if (status.tag == TAGSTATE) { System.out.println("[rank " + rank + "] i just get informed "+r[0]+" is elected."); MPI.COMM_WORLD.Send(r, 0, 1, MPI.INT, (rank+1)%size,TAGSTATE); break; } nbround++; } double stopTime = MPI.Wtime(); System.out.println("Time usage = " + (stopTime - startTime) + " ms"); System.out.println("Number of iterations: "+nbround); } MPI.Finalize(); } }
This program is a translation into Java of the C program for the IS from the NAS benchmark (NPB3.2). It uses Alltoall, Alltoallv, Allreduce and Reduce.
import p2pmpi.mpi.*; public class Is { double[] start; double[] elapsed; static final char CLASS = 'A'; static final int NUM_PROCS = 4; static final int MAX_PROCS = 512; static final int MAX_ITERATIONS = 10; static final int TEST_ARRAY_SIZE = 5; int TOTAL_KEYS_LOG_2; int MAX_KEY_LOG_2; int NUM_BUCKETS_LOG_2; int TOTAL_KEYS; int MAX_KEY; int NUM_BUCKETS; int NUM_KEYS; int SIZE_OF_BUFFERS; int my_rank, comm_size; // Some Global Info //int[] key_buff_ptr_global; int total_local_keys, total_lesser_keys; int passed_verification; // These are the three main arrays // See SIZE_OF_BUFFERS def above int[] key_array; int[] key_buff1; int[] key_buff2; int[] bucket_size; int[] bucket_size_totals; int[] bucket_ptrs; int[] process_bucket_distrib_ptr1; int[] process_bucket_distrib_ptr2; int[] send_count; int[] recv_count; int[] send_displ; int[] recv_displ; int min_key_val; // Partial verif info int[] test_index_array; int[] test_rank_array; int[] S_test_index_array = {48427, 17148, 23627, 62548, 4431}; int[] S_test_rank_array = {0, 18, 346, 64917, 65463}; int[] W_test_index_array = {357773,934767,875723,898999,404505}; int[] W_test_rank_array = {1249,11698,1039987,1043896,1048018}; int[] A_test_index_array = {2112377,662041,5336171,3642833,4250760}; int[] A_test_rank_array = {104,17523,123928,8288932,8388264}; int[] B_test_index_array = {41869,812306,5102857,18232239,26860214}; int[] B_test_rank_array = {33422937,10244,59149,33135281,99}; int[] C_test_index_array = {44172927,72999161,74326391,129606274,21736814}; int[] C_test_rank_array = {61147,882988,266290,133997595,133525895}; // Use in randlc // because Java does not support static local variable // then makes it as global int KS = 0; double R23, R46, T23, T46; Is() { start = new double[64]; elapsed = new double[64]; switch(CLASS) { case 'S' : TOTAL_KEYS_LOG_2 = 16; MAX_KEY_LOG_2 = 11; NUM_BUCKETS_LOG_2 = 9; break; case 'W' : TOTAL_KEYS_LOG_2 = 20; MAX_KEY_LOG_2 = 16; NUM_BUCKETS_LOG_2 = 10; break; case 'A' : TOTAL_KEYS_LOG_2 = 23; MAX_KEY_LOG_2 = 19; NUM_BUCKETS_LOG_2 = 10; break; case 'B' : TOTAL_KEYS_LOG_2 = 25; MAX_KEY_LOG_2 = 21; NUM_BUCKETS_LOG_2 = 10; break; case 'C' : TOTAL_KEYS_LOG_2 = 27; MAX_KEY_LOG_2 = 23; NUM_BUCKETS_LOG_2 = 10; break; } TOTAL_KEYS = (1 << TOTAL_KEYS_LOG_2); MAX_KEY = (1 << MAX_KEY_LOG_2); NUM_BUCKETS = (1 << NUM_BUCKETS_LOG_2); NUM_KEYS = (TOTAL_KEYS/NUM_PROCS); if(NUM_PROCS < 256) { SIZE_OF_BUFFERS = 3*NUM_KEYS/2; } else { SIZE_OF_BUFFERS = 6*NUM_KEYS; } System.out.println("Size of buffers = " + SIZE_OF_BUFFERS); key_array = new int[SIZE_OF_BUFFERS]; key_buff1 = new int[SIZE_OF_BUFFERS]; key_buff2 = new int[SIZE_OF_BUFFERS]; bucket_size = new int[NUM_BUCKETS+TEST_ARRAY_SIZE]; bucket_size_totals = new int[NUM_BUCKETS+TEST_ARRAY_SIZE]; bucket_ptrs = new int[NUM_BUCKETS]; process_bucket_distrib_ptr1 = new int[NUM_BUCKETS+TEST_ARRAY_SIZE]; process_bucket_distrib_ptr2 = new int[NUM_BUCKETS+TEST_ARRAY_SIZE]; send_count = new int[MAX_PROCS]; recv_count = new int[MAX_PROCS]; send_displ = new int[MAX_PROCS]; recv_displ = new int[MAX_PROCS]; test_index_array = new int[TEST_ARRAY_SIZE]; test_rank_array = new int[TEST_ARRAY_SIZE]; } void c_print_results( String name, char tclass, int n1, int n2, int n3, int niter, int nprocs_compiled, int nprocs_total, double t, double mops, String optype, int passed_verification, String npbversion, String compiletime, String mpicc, String clink, String cmpi_lib, String cmpi_inc, String cflags, String clinkflags) { String evalue = "1000"; System.out.println("\n\n" + name + " Benchmark Completed"); System.out.println(" Class = " + tclass); if(n2 == 0 && n3 == 0) System.out.println("Size = " + n1); else System.out.println("Size = "); } void timer_clear(int n) { elapsed[n] = 0.0; } void timer_start(int n) { start[n] = MPI.Wtime(); } void timer_stop(int n) { double t, now; now = MPI.Wtime(); t = now - start[n]; elapsed[n] += t; } double timer_read(int n) { return(elapsed[n]); } /********************************************************************** ***************** R A N D L C ****************** ***************** ****************** ***************** protable random number generator ****************** *********************************************************************/ double randlc(Object X, double A) { double T1, T2, T3, T4; double A1; double A2; double X1; double X2; double Z; int i, j; double[] lX = (double[])X; if(KS == 0) { R23 = 1.0; R46 = 1.0; T23 = 1.0; T46 = 1.0; for(i = 1; i <= 23; i++) { R23 = 0.50 * R23; T23 = 2.0 * T23; } for(i = 1; i <= 46; i++) { R46 = 0.50 * R46; T46 = 2.0 * T46; } KS = 1; } /* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N */ T1 = R23 * A; j = (int)T1; A1 = j; A2 = A - T23 * A1; /* Break X into two parts such that X = 2^23 * X1 + X2, compute * Z = A* X2 + A2 * X1 (mod 2^23) and then * X = 2^23 * Z + A2 * X2 (mod 2^46) */ T1 = R23 * lX[0]; j = (int)T1; X1 = j; X2 = lX[0] - T23 * X1; T1 = A1 * X2 + A2 * X1; j = (int)(R23 * T1); T2 = j; Z = T1 - T23 * T2; T3 = T23 * Z + A2 * X2; j = (int)(R46 * T3); T4 = j; lX[0] = T3 - T46 * T4; return (R46 * lX[0]); } double find_my_seed(long kn, long np, long nn, double s, double a) { long i; double t3, an; long mq, nq, kk, ik; double[] t1 = new double[1]; double[] t2 = new double[1]; nq = nn / np; for(mq = 0; nq > 1; mq++, nq/=2); t1[0] = a; for(i = 1; i <= mq; i++) { t2[0] = randlc(t1, t1[0]); } an = t1[0]; kk = kn; t1[0] = s; t2[0] = an; for(i = 1; i <= 100; i++) { ik = kk/2; if(2 * ik != kk) t3 = randlc(t1, t2[0]); if(ik == 0) break; t3 = randlc(t2, t2[0]); kk = ik; } return t1[0]; } void create_seq(double seed, double a) { double x; int i, k; double seeda[] = new double[1]; seeda[0] = seed; k = MAX_KEY / 4; for(i = 0; i < NUM_KEYS; i++) { x = randlc(seeda, a); x += randlc(seeda, a); x += randlc(seeda, a); x += randlc(seeda, a); key_array[i] = (int)(k*x); } } void full_verify() { int i, j, k; int [] ka = new int[1]; Request req = null; for(i = 0; i < total_local_keys; i++) { //CHANGE: key_array[--key_buff_ptr_global[key_buff2[i]] - key_array[--(key_buff1[key_buff2[i] - min_key_val]) - total_lesser_keys] = key_buff2[i]; } if(my_rank > 0) { req = MPI.COMM_WORLD.Irecv(ka, 0, 1, MPI.INT, my_rank -1, 1000); } if(my_rank < comm_size-1) { MPI.COMM_WORLD.Send(key_array, total_local_keys-1, 1, MPI.INT, my_rank+1, 1000); } if(my_rank > 0) { //MPI.COMM_WORLD.Recv(ka, 0, 1, MPI.INT, my_rank-1, 1000); req.Wait(); } k = ka[0]; // Confirm that neighbot's greatest key value // is not greater than my least key value j = 0; if(my_rank > 0) if(k > key_array[0]) j++; // Confirm keys correctly sorted : count incorrectly sorted keys, if any for(i = 1; i < total_local_keys; i++) if(key_array[i - 1] > key_array[i]) j++; if(j != 0) { //System.out.println("Processor " + my_rank + ": Full_verify: number of key out of sort:" + j); } else { passed_verification++; } } //******************************************************************** //**************** RANK ********************** //******************************************************************** void rank(int iteration) { int i, j, k, m; int shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2; int key; int bucket_sum_accumulator; int local_bucket_sum_accumulator; int max_key_val; //CHANGE: int key_buff_ptr; //long rankTimer = System.currentTimeMillis(); // Iteration alteration of keys if(my_rank == 0) { key_array[iteration] = iteration; key_array[iteration + MAX_ITERATIONS] = MAX_KEY - iteration; } // Initatilize for(i = 0; i < NUM_BUCKETS+TEST_ARRAY_SIZE; i++) { bucket_size[i] = 0; bucket_size_totals[i] = 0; process_bucket_distrib_ptr1[i] = 0; process_bucket_distrib_ptr2[i] = 0; } // Determine where the partial verify test keys are, load into // top of array bucket_size for(i = 0; i < TEST_ARRAY_SIZE; i++) { if((test_index_array[i]/NUM_KEYS) == my_rank) { bucket_size[NUM_BUCKETS+i] = key_array[test_index_array[i] % NUM_KEYS]; } } // Determine the number of keys in each bucket for(i = 0; i < NUM_KEYS; i++) bucket_size[key_array[i] >> shift]++; // Accumulative bucket sizes are the bucket pointers bucket_ptrs[0] = 0; for(i = 1; i < NUM_BUCKETS; i++) bucket_ptrs[i] = bucket_ptrs[i - 1] + bucket_size[i - 1]; // Sort into appropriate bucket for(i = 0; i < NUM_KEYS; i++) { key = key_array[i]; key_buff1[bucket_ptrs[key >> shift]++] = key; } timer_stop(2); timer_start(3); //rankTimer = System.currentTimeMillis(); MPI.COMM_WORLD.Allreduce(bucket_size, 0, bucket_size_totals, 0, NUM_BUCKETS+TEST_ARRAY_SIZE, MPI.INT, MPI.SUM); timer_stop(3); timer_start(2); bucket_sum_accumulator = 0; local_bucket_sum_accumulator = 0; send_displ[0] = 0; process_bucket_distrib_ptr1[0] = 0; for(i = 0, j = 0; i < NUM_BUCKETS; i++) { bucket_sum_accumulator += bucket_size_totals[i]; local_bucket_sum_accumulator += bucket_size[i]; if(bucket_sum_accumulator >= (j+1)*NUM_KEYS) { send_count[j] = local_bucket_sum_accumulator; if(j != 0) { send_displ[j] = send_displ[j-1] + send_count[j-1]; process_bucket_distrib_ptr1[j] = process_bucket_distrib_ptr2[j - 1] + 1; } process_bucket_distrib_ptr2[j++] = i; local_bucket_sum_accumulator = 0; } } timer_stop(2); timer_start(3); // This is the redistribution section : first find out how many keys // each processor will sen to every other processors: MPI.COMM_WORLD.Alltoall(send_count, 0, 1, MPI.INT, recv_count, 0, 1, MPI.INT); // Determine the receive array displacements for the buckets recv_displ[0] = 0; for(i = 1; i < comm_size; i++) recv_displ[i] = recv_displ[i - 1] + recv_count[i - 1]; MPI.COMM_WORLD.Alltoallv(key_buff1, 0, send_count, send_displ, MPI.INT, key_buff2, 0, recv_count, recv_displ, MPI.INT); timer_stop(3); timer_start(2); // The starting and ending bucket numbers on each processor are // multiplied by the interval size of the buckets to obtain the // smallest possible min and greastest possible max value of any // key on each processor min_key_val = process_bucket_distrib_ptr1[my_rank] << shift; max_key_val = ((process_bucket_distrib_ptr2[my_rank] + 1) << shift)-1; // Clear the work array for(i = 0; i < max_key_val - min_key_val + 1; i++) key_buff1[i] = 0; // Determine the total number of keys on all other // processors holding keys of lesser value m = 0; for(k=0; k < my_rank; k++) for(i = process_bucket_distrib_ptr1[k]; i <= process_bucket_distrib_ptr2[k]; i++) m += bucket_size_totals[i]; // m has total # of lesser keys // Determine total number of keys on this processor j = 0; for( i = process_bucket_distrib_ptr1[my_rank]; i <= process_bucket_distrib_ptr2[my_rank]; i++) { j += bucket_size_totals[i]; } // Ranking of all keys occurs in this section : // shift it backwards so no subtractions are necessary in loop //CHANGE :key_buff_ptr = key_buff1 - min_key_val; // In this section, the keys themselves are used as thier // own indexes to determne how many of each there are: thier // individual population for(i = 0; i < j; i++) //CHANGE: key_buff_ptr[key_buff2[i]]++; key_buff1[key_buff2[i] - min_key_val]++; // To obtain ranks of each key, successively add the individual key // population, not forgetting to add m, the total of less keys, // to the first key population //CHANGE: key_buff_ptr[min_key_val] += m; //SO :key_buff1[min_key_val - min_key_val] += m; key_buff1[0] += m; for(i = min_key_val; i < max_key_val; i++) { //CHANGE:key_buff_ptr[i + 1] += key_buff_ptr[i]; key_buff1[i + 1 - min_key_val] += key_buff1[i - min_key_val]; } // This is the partial verify test section // Observe that test_rank_array vals are // shifted differently for different cases for(i = 0; i < TEST_ARRAY_SIZE; i++) { k = bucket_size_totals[i + NUM_BUCKETS]; // key were hidden here if(min_key_val <= k && k <= max_key_val) switch( CLASS ) { case 'S' : if( i <= 2) { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } else { // CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } break; case 'W' : if ( i < 2) { //CHANGE: if(key_buff_ptr[k - 1] != if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+(iteration-2)) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } else { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } break; case 'A' : if( i <= 2) { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+(iteration-1)) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+(iteration-1)) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } else { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-(iteration-1)) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-(iteration-1)) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } break; case 'B' : if( i == 1 || i == 2 || i == 4) { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } else { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } break; case 'C' : if( i <= 2) { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]+iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]+iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } else { //CHANGE: if(key_buff_ptr[k - 1] != test_rank_array[i]-iteration) { if(key_buff1[k - 1 - min_key_val] != test_rank_array[i]-iteration) { System.out.println("Failed partial verification:" + "iteration " + iteration + ", processor " + my_rank + ", test key " + i); } else passed_verification++; } break; } } // Make copies of rank info for use by full_verify: these variables // in rank are local; making them global slows down the code, probably // since they cannot be made register by compiler if(iteration == MAX_ITERATIONS) { //CHANGE: key_buff_ptr_global = key_buff1[-min_val_key]; total_local_keys = j; total_lesser_keys = m; } } // ********************************************************** // ************ MAIN ************* // ********************************************************** void doTest(String[] args) { int i, iteration; int[] itemp= new int[1]; double[] timecounter = new double[1]; double[] maxtime = new double[1]; int[] pass_v_array = new int[1]; //////////////////// Display Varaible values //////////////////////// /*System.out.println("Class = " + CLASS + ", NumProcs = " + NUM_PROCS); System.out.println("Total Keys Log = " + TOTAL_KEYS_LOG_2); System.out.println("Max Key Log = " + MAX_KEY_LOG_2); System.out.println("Num buckets Log = " + NUM_BUCKETS_LOG_2); System.out.println("Total Keys = " + TOTAL_KEYS); System.out.println("Max Key = " + MAX_KEY); System.out.println("Num buckets = " + NUM_BUCKETS); System.out.println("Num keys = " + NUM_KEYS);*/ ///////////////////////////////////////////////////////////////////// double a = 1220703125.00; double[] ai = new double[1]; ai[0] = a; double t2 = randlc(ai, a); MPI.Init(args); comm_size = MPI.COMM_WORLD.Size(); my_rank = MPI.COMM_WORLD.Rank(); for(i = 0; i < TEST_ARRAY_SIZE; i++) { switch( CLASS ) { case 'S' : test_index_array[i] = S_test_index_array[i]; test_rank_array[i] = S_test_rank_array[i]; break; case 'A' : test_index_array[i] = A_test_index_array[i]; test_rank_array[i] = A_test_rank_array[i]; break; case 'W' : test_index_array[i] = W_test_index_array[i]; test_rank_array[i] = W_test_rank_array[i]; break; case 'B' : test_index_array[i] = B_test_index_array[i]; test_rank_array[i] = B_test_rank_array[i]; break; case 'C' : test_index_array[i] = C_test_index_array[i]; test_rank_array[i] = C_test_rank_array[i]; break; } } /* Check that actual and compiled number of processors agree */ if(comm_size != NUM_PROCS) { if(my_rank == 0) System.out.println("\nERROR: not enough processes"); MPI.Finalize(); System.exit(1); } /* Check to see whether total number of processes is within bounds. */ if(comm_size > MAX_PROCS) { if(my_rank == 0) System.out.println("\nERROR: number of processes exceeds maximum\n"); MPI.Finalize(); System.exit(1); } if(my_rank == 0) { System.out.println("NAS Parallel Benchmarks 3.1 -- IS Benchmark"); System.out.println("Size : " + TOTAL_KEYS + " (class " + CLASS + ")"); System.out.println("Iteration: " + MAX_ITERATIONS); System.out.println("Number of processes : " + comm_size); } /* Generate random number sequence and subsequent keys on all procs */ create_seq(find_my_seed(my_rank, comm_size, 4*TOTAL_KEYS, 314159265.00, 1220703125.00), 1220703125.00); //System.out.println("Starting... rank 1"); rank(1); //System.out.println("Finishing... rank 1"); // Start verification counter passed_verification = 0; if(my_rank == 0 && CLASS != 'S') System.out.println("\n iteration"); //Initialize timer timer_clear(0); //Initalize separate communication, computation timing for(i = 1; i <= 3; i++) timer_clear(i); timer_start(0); timer_start(1); timer_start(2); // This is the main iteration for(iteration=1; iteration<= MAX_ITERATIONS; iteration++) { if(my_rank == 0 && CLASS != 'S') System.out.println("\t" + iteration); rank(iteration); } timer_stop(2); timer_stop(1); timer_stop(0); timecounter[0] = timer_read(0); MPI.COMM_WORLD.Reduce( timecounter, 0, maxtime, 0, 1, MPI.DOUBLE, MPI.MAX, 0); // TIMING ENABLED double[] tmin; double[] tsum; double[] tmax; tmin = new double[1]; tsum = new double[1]; tmax = new double[1]; if(my_rank == 0) { System.out.println("\ntimer 1/2/3 = total/computation/communication time"); System.out.println("\t\tmin\t\tavg\t\tmax"); } for(i = 1; i <= 3; i++) { timecounter[0] = timer_read(i); MPI.COMM_WORLD.Reduce(timecounter, 0, tmin, 0, 1, MPI.DOUBLE, MPI.MIN, 0); MPI.COMM_WORLD.Reduce(timecounter, 0, tsum, 0, 1, MPI.DOUBLE, MPI.SUM, 0); MPI.COMM_WORLD.Reduce(timecounter, 0, tmax, 0, 1, MPI.DOUBLE, MPI.MAX, 0); if(my_rank == 0) { System.out.println("timer" + i + "\t\t" + (tmin[0]/(double)1000) + "\t\t" + (tsum[0]/(double)(comm_size *1000)) + "\t\t" + (tmax[0]/(double)1000)); } } // END TIMING ENABLED full_verify(); itemp[0] = passed_verification; //pass_v_array[0] = passed_verification; MPI.COMM_WORLD.Reduce( itemp, 0, pass_v_array, 0, 1, MPI.INT, MPI.SUM, 0); passed_verification = pass_v_array[0]; // The Final printout if(my_rank == 0) { if(passed_verification != 5*MAX_ITERATIONS + comm_size) passed_verification = 0; System.out.println("\n"); System.out.println("IS Benchmark Completed\n"); System.out.println("Class = " + CLASS); System.out.println("Size = " + TOTAL_KEYS); System.out.println("Iteration = " + MAX_ITERATIONS); System.out.println("Time in seconds = " + (maxtime[0]/1000)); System.out.println("Total processes = " + comm_size); double mopTotal = ((double)(MAX_ITERATIONS*TOTAL_KEYS))/(maxtime[0]/1000)/1000000.0; System.out.println("Mop/s total = " + mopTotal); System.out.println("Mop/s/process = " + (mopTotal/comm_size)); if(passed_verification != 0) { System.out.println("Verification = SUCCESSFUL"); } else { System.out.println("Verification = UNSUCCESSFUL"); } } MPI.Finalize(); } public static void main(String[] args) { Is isNPB = new Is(); isNPB.doTest(args); } }