21 #include "ompi_partest.h" 22 #include "partest_opts.h" 29 #include <common/bgp_personality.h> 30 #include <common/bgp_personality_inlines.h> 55 int barrier_after_start(MPI_Comm comm)
65 int barrier_after_malloc(MPI_Comm comm)
76 int barrier_after_open(MPI_Comm comm)
86 int barrier_after_write(MPI_Comm comm)
96 int barrier_after_read(MPI_Comm comm)
106 int barrier_after_close(MPI_Comm comm)
116 int barrier_before_unlink(MPI_Comm comm)
126 int barrier_after_unlink(MPI_Comm comm)
136 static char * __collective_print_pointer;
138 int collective_print_gather(
char *cbuffer, MPI_Comm comm )
142 int num_threads = omp_get_num_threads();
148 __collective_print_pointer = (
char *) malloc(MAXCHARLEN * num_threads);
153 memcpy(__collective_print_pointer+(MAXCHARLEN * omp_get_thread_num()),cbuffer,MAXCHARLEN);
159 MPI_Comm_size(comm, &size);
160 MPI_Comm_rank(comm, &rank);
161 if(rank==0) lbuffer = (
char *) malloc(MAXCHARLEN * num_threads * size);
164 MPI_Gather(__collective_print_pointer, MAXCHARLEN*num_threads, MPI_CHAR, lbuffer, MAXCHARLEN*num_threads, MPI_CHAR, 0, comm);
169 for (p = 0; p < (size*num_threads); p++) {
171 fprintf(stderr,
"%s", lbuffer + p * MAXCHARLEN);
175 if(rank==0) free(lbuffer);
177 free(__collective_print_pointer);
194 static void * __thread_sync_pointer;
196 void reduce_omp(
void *syncdata,
void * out, MPI_Op op,
int dtype)
198 int thread_num = omp_get_thread_num();
199 int num_threads = omp_get_num_threads();
206 case _PARTEST_SION_INT32:
208 __thread_sync_pointer = malloc(
sizeof(sion_int32)*num_threads);
211 case _PARTEST_SION_INT64:
213 __thread_sync_pointer = malloc(
sizeof(sion_int64)*num_threads);
216 case _PARTEST_DOUBLE:
218 __thread_sync_pointer = malloc(
sizeof(
double)*num_threads);
223 __thread_sync_pointer = malloc(
sizeof(sion_int64)*num_threads);
232 case _PARTEST_SION_INT32:
234 ((sion_int32 *)__thread_sync_pointer)[thread_num] = * (sion_int32*)syncdata;
237 case _PARTEST_SION_INT64:
239 ((sion_int64 *)__thread_sync_pointer)[thread_num] = *((sion_int64*)syncdata);
242 case _PARTEST_DOUBLE:
244 ((
double *)__thread_sync_pointer)[thread_num] = *(
double*)syncdata;
249 ((sion_int64 *)__thread_sync_pointer)[thread_num] = *(sion_int64*)syncdata;
261 case _PARTEST_SION_INT32:
264 *((sion_int32 *) out) = 0;
265 for(i=0;i<num_threads;i++){
266 *((sion_int32 *) out) += ((sion_int32 *)__thread_sync_pointer)[i];
268 }
else if(op == MPI_MAX){
269 *((sion_int32 *) out) = ((sion_int32 *)__thread_sync_pointer)[0];
270 for(i=1;i<num_threads;i++){
271 if(((sion_int32 *)__thread_sync_pointer)[i] > *((sion_int32 *) out)) *((sion_int32 *) out) = ((sion_int32 *)__thread_sync_pointer)[i];
276 case _PARTEST_SION_INT64:
279 *((sion_int64 *) out) = 0;
280 for(i=0;i<num_threads;i++){
281 *((sion_int64 *) out) += ((sion_int64 *)__thread_sync_pointer)[i];
283 }
else if(op == MPI_MAX){
284 *((sion_int64 *) out) = ((sion_int64 *)__thread_sync_pointer)[0];
285 for(i=1;i<num_threads;i++){
286 if(((sion_int64 *)__thread_sync_pointer)[i] > *((sion_int64 *) out)) *((sion_int64 *) out) = ((sion_int64 *)__thread_sync_pointer)[i];
291 case _PARTEST_DOUBLE:
294 *((
double *) out) = 0;
295 for(i=0;i<num_threads;i++){
296 *((
double *) out) += ((
double *)__thread_sync_pointer)[i];
298 }
else if(op == MPI_MAX){
299 *((
double *) out) = ((
double *)__thread_sync_pointer)[0];
300 for(i=1;i<num_threads;i++){
301 if(((
double *)__thread_sync_pointer)[i] > *((
double *) out)) *((
double *) out) = ((
double *)__thread_sync_pointer)[i];
309 *((sion_int64 *) out) = 0;
310 for(i=0;i<num_threads;i++){
311 *((sion_int64 *) out) += ((sion_int64 *)__thread_sync_pointer)[i];
313 }
else if(op == MPI_MAX){
314 *((sion_int64 *) out) = ((sion_int64 *)__thread_sync_pointer)[0];
315 for(i=1;i<num_threads;i++){
316 if(((sion_int64 *)__thread_sync_pointer)[i] > *((sion_int64 *) out)) *((sion_int64 *) out) = ((sion_int64 *)__thread_sync_pointer)[i];
322 free(__thread_sync_pointer);
327 int split_communicator(
_test_communicators * communicators,
int bluegene,
int bluegene_np,
int numfiles,
int read_task_offset,
int verbose);
330 int main(
int argc,
char **argv)
335 sion_int64 commwork_size64 = 1;
343 MPI_Init(&argc, &argv);
344 MPI_Comm_size(MPI_COMM_WORLD, &size);
345 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
346 DPRINTFTS(rank,
"after MPI_Init");
352 init_options(&options);
356 parse_options_std(argc, argv, &options);
361 rc=parse_options_long(argc, argv, &options);
367 MPI_Bcast(&rc, 1, MPI_INT, 0, MPI_COMM_WORLD);
369 MPI_Abort(MPI_COMM_WORLD, 1);
372 distribute_options_mpi(&options);
376 communicators.
all = MPI_COMM_WORLD;
377 MPI_Comm_size(MPI_COMM_WORLD, &communicators.all_size);
378 MPI_Comm_rank(MPI_COMM_WORLD, &communicators.all_rank);
379 split_communicator(&communicators, options.bluegene, options.bluegene_np, options.numfiles, options.read_task_offset, options.verbose);
382 sion_int64 num_threads = (sion_int64) omp_get_max_threads();
383 MPI_Allreduce(&num_threads, &commwork_size64, 1, SION_MPI_INT64, MPI_SUM, communicators.
work);
385 if (options.globalsize > 0) {
386 options.totalsize = (sion_int64) options.globalsize / commwork_size64;
389 options.globalsize = options.totalsize * commwork_size64;
392 if((options.totalsize>options.bufsize) || (options.read_task_offset>0) || (options.do_write==0)) {
393 options.suppress_checksum=1;
396 if(options.fsblksize<0) options.fsblksize=-1;
398 if ( (communicators.work_size>0) && (communicators.work_rank==0) ) {
400 fprintf(stderr,
"------------------------------------------------------------------------------------------\n");
401 fprintf(stderr,
"SION parallel file I/O benchmark 'ompi_partest': start at %s", ctime(&t));
402 fprintf(stderr,
"partest Number of MPI tasks that will use the file tasks: running on %d tasks\n", size);
403 fprintf(stderr,
"------------------------------------------------------------------------------------------\n");
405 fprintf(stderr,
"partest parameter: CHECKSUM DISABLED!\n\n");
407 if(options.suppress_checksum) {
408 fprintf(stderr,
"partest parameter: CHECKSUM not possible, DISABLED!\n\n");
411 fprintf(stderr,
"partest parameter: (-f) datafile = %s\n", options.filename);
412 fprintf(stderr,
"partest parameter: (-n) number of files = %d\n", options.numfiles);
413 fprintf(stderr,
"partest parameter: (-F) random factor = %13.4f\n", options.factor);
414 fprintf(stderr,
"partest parameter: (-X) remove files after test = %d\n", options.unlink_files);
415 fprintf(stderr,
"partest parameter: (-b/-B) local buffer size / sion task = %15lld bytes %10.3f MB\n", options.bufsize, options.bufsize / (1.0 MB));
416 fprintf(stderr,
"partest parameter: (-g/-G) global total data size = %15lld bytes %10.3f GB\n", options.globalsize, options.globalsize / (1024.0 MB));
417 fprintf(stderr,
"partest parameter: (-s/-S) total data size / sion task = %15lld bytes %10.3f MB\n", options.totalsize, options.totalsize / (1.0 MB));
418 fprintf(stderr,
"partest parameter: (-r/-R) sion chunk size = %15lld bytes %10.3f MB\n", options.chunksize, options.chunksize / (1.0 MB));
419 fprintf(stderr,
"partest parameter: (-Q) fs block size = %15d bytes %10.3f MB\n", options.fsblksize, options.fsblksize / (1.0 MB));
420 if (options.type == 0)
421 fprintf(stderr,
"partest parameter: (-T) test type = %d (sion OMPI, collective read)\n", options.type);
422 if (options.type == 1)
423 fprintf(stderr,
"partest parameter: (-T) test type = %d (sion OMPI, independant read)\n", options.type);
424 if (options.type == 2)
425 fprintf(stderr,
"partest parameter: (-T) test type = %d (sion OMP, collective read)\n", options.type);
426 if (options.type == 3)
427 fprintf(stderr,
"partest parameter: (-T) test type = %d (sion OMP, independant read)\n", options.type);
428 fprintf(stderr,
"partest parameter: (-j) serialize_blocknum = %d\n", options.serialize_blocknum);
429 fprintf(stderr,
"partest parameter: (-Z) read task offset = %d\n", options.read_task_offset);
430 fprintf(stderr,
"partest parameter: (-o) start offset bytes = %d\n", options.startoffset);
431 fprintf(stderr,
"partest parameter: (-v) verbose = %d\n", options.verbose);
432 fprintf(stderr,
"partest parameter: (-d) debug = %d\n", options.debug);
433 fprintf(stderr,
"partest parameter: (-D) Debug = %d\n", options.Debug);
434 fprintf(stderr,
"partest parameter: (-M) collective write = %d\n", options.collectivewrite);
435 fprintf(stderr,
"partest parameter: (-m) collective read = %d\n", options.collectiveread);
438 fprintf(stderr,
"partest parameter: (-P) Blue Gene, I/O nodes = %d\n", options.bluegene);
439 fprintf(stderr,
"partest parameter: () Blue Gene: tasks/IO-node = %d\n", options.bluegene_np);
440 fprintf(stderr,
"partest parameter: (-w) MPI-IO, IBM, Large Block IO = %d\n", options.mpiio_lb);
441 fprintf(stderr,
"partest parameter: (-W) MPI-IO, IBM, IO bufsize = %d KB\n", options.mpiio_bs);
442 fprintf(stderr,
"partest parameter: (-x) MPI-IO, IBM, sparse access = %d\n", options.mpiio_sa);
443 fprintf(stderr,
"partest parameter: ( ) OpenMP number of threads = %d\n", omp_get_max_threads());
444 fprintf(stderr,
"partest parameter: ( ) commwork_size64 = %lld\n", commwork_size64);
445 fprintf(stderr,
"partest parameter: ( ) suppress_checksum = %d\n", options.suppress_checksum);
446 fprintf(stderr,
"partest parameter: ( ) do_write = %d\n", options.do_write);
447 fprintf(stderr,
"partest parameter: ( ) do_read = %d\n", options.do_read);
448 fprintf(stderr,
"partest parameter: ( ) use_posix = %d\n", options.use_posix);
452 barrier_after_start(MPI_COMM_WORLD);
454 if ( (communicators.work_size>0) && (communicators.work_rank==0) ) {
455 fprintf(stderr,
"partest parameter: ( ) comm(all) = %d of %d\n", communicators.all_rank, communicators.all_size);
456 fprintf(stderr,
"partest parameter: ( ) comm(work) = %d of %d\n", communicators.work_rank, communicators.work_size);
457 fprintf(stderr,
"partest parameter: ( ) comm(local) = %d of %d\n", communicators.local_rank, communicators.local_size);
458 fprintf(stderr,
"------------------------------------------------------------------------------------------\n");
460 #pragma omp parallel private(localbuffer,t) 462 barrier_after_start(MPI_COMM_WORLD);
463 char l_filename[MAXCHARLEN];
464 strcpy(l_filename,options.filename);
468 DPRINTFTS(rank,
"after pstart");
470 localbuffer = (
char *) malloc(options.bufsize);
472 srand(time(NULL)*local_communicators.work_rank*omp_get_thread_num());
477 memset (localbuffer,
'a'+rank%26, local_options.bufsize);
478 barrier_after_malloc(MPI_COMM_WORLD);
479 DPRINTFTS(rank,
"after malloc");
483 if(local_options.factor>0.0) {
484 if((local_options.collectivewrite) || (local_options.collectiveread)) {
485 if(local_options.bufsize<local_options.totalsize*(1+local_options.factor)) {
488 fprintf(stderr,
"partest: ERROR deadlock possible if collective read/write and random factor used, and buffer is too small aborting\n");
489 MPI_Abort(MPI_COMM_WORLD,0);
496 local_options.totalsize += ((sion_int64) (local_options.factor * (sion_int64) local_options.totalsize * (sion_int64) rand() / (sion_int64) RAND_MAX));
497 local_options.chunksize += ((sion_int64) (local_options.factor * (sion_int64) local_options.totalsize * (sion_int64) rand() / (sion_int64) RAND_MAX));
498 fprintf(stderr,
"partest parameter: ( ) new totalsize[%4d,t%4d] = %lld\n", local_communicators.work_rank,omp_get_thread_num(),local_options.totalsize);
503 DPRINTFTS(rank,
"before scall2");
506 if ( (local_communicators.work_size>0) && (local_communicators.work_rank==0) ) {
507 fprintf(stderr,
"partest parameter: ( ) new totalsize = %lld\n", local_options.totalsize);
511 barrier_after_malloc(MPI_COMM_WORLD);
512 if (local_options.type == 0) {
513 local_options.collectiveopenforread = 1;
514 test_paropen_multi_ompi(l_filename, localbuffer, &local_communicators, &local_options);
516 }
else if(local_options.type == 1) {
517 local_options.collectiveopenforread = 0;
518 test_paropen_multi_ompi(l_filename, localbuffer, &local_communicators, &local_options);
519 }
else if(local_options.type == 2) {
520 local_options.collectiveopenforread = 1;
521 test_paropen_omp(l_filename, localbuffer, &local_communicators, &local_options);
522 }
else if(local_options.type == 3) {
523 local_options.collectiveopenforread = 0;
524 test_paropen_omp(l_filename, localbuffer, &local_communicators, &local_options);
527 DPRINTFTS(rank,
"before MPI_Finalize");
528 barrier_after_malloc(MPI_COMM_WORLD);
531 if ( (local_communicators.work_size>0) && (local_communicators.work_rank==0) ) {
533 fprintf(stderr,
"SION parallel file I/O benchmark 'ompi_partest': end at %s\n", ctime(&t));
544 int split_communicator(
_test_communicators * communicators,
int bluegene,
int bluegene_np,
int numfiles,
int read_task_offset,
int verbose)
548 communicators->work_size = communicators->work_rank = -2;
549 communicators->local_size = communicators->local_rank = -2;
555 _BGP_Personality_t personality;
556 MPI_Comm commSame, commDiff, commTemp;
557 int sizeSame, sizeDiff;
558 int rankSame, rankDiff;
560 unsigned procid, x, y, z, t;
561 char cbuffer[MAXCHARLEN];
564 Kernel_GetPersonality(&personality,
sizeof(personality));
565 BGP_Personality_getLocationString(&personality, location);
566 procid = Kernel_PhysicalProcessorID();
567 MPIX_rank2torus(communicators->all_rank, &x, &y, &z, &t);
570 MPIX_Pset_diff_comm_create(&commDiff);
571 MPI_Comm_size(commDiff, &sizeDiff);
572 MPI_Comm_rank(commDiff, &rankDiff);
573 communicators->ionode_number = rankDiff;
576 MPIX_Pset_same_comm_create(&commSame);
577 MPI_Comm_size(commSame, &sizeSame);
578 MPI_Comm_rank(commSame, &rankSame);
581 if (bluegene_np == 0) {
582 bluegene_np = sizeSame;
586 MPI_Comm_split(communicators->
all, (rankSame < bluegene_np), communicators->all_rank, &communicators->
work);
587 MPI_Comm_size(communicators->
work, &communicators->work_size);
588 MPI_Comm_rank(communicators->
work, &communicators->work_rank);
589 if (rankSame >= bluegene_np) {
591 communicators->work_size = communicators->work_rank = -1;
592 communicators->local_size = communicators->local_rank = -1;
598 communicators->
local = communicators->
work;
600 else if(numfiles<0) {
603 MPI_Comm_split(commSame, (rankSame < bluegene_np), communicators->all_rank, &communicators->
local);
607 communicators->
local=commDiff;
610 MPI_Comm_size(communicators->
local, &communicators->local_size);
611 MPI_Comm_rank(communicators->
local, &communicators->local_rank);
616 if(numfiles==-1) communicators->file_number = rankDiff;
617 else communicators->file_number = rankSame;
620 communicators->file_number = -1;
624 sprintf(cbuffer,
"");
625 if (rankSame < bluegene_np) {
627 sprintf(cbuffer,
"BGP[%05d] diff_comm: %4d of %4d same_comm: %5d of %5d file_comm: %5d of %5d %s phys_xyzt(%d,%d,%d,%d)\n",
628 communicators->all_rank, rankDiff + 1, sizeDiff, rankSame + 1, sizeSame, communicators->local_rank + 1, communicators->local_size,
629 location, x, y, z, t);
632 collective_print_gather(cbuffer, communicators->
work);
639 _BGP_Personality_t personality;
640 MPI_Comm commSame, commDiff, commTemp;
641 int sizeSame, sizeDiff;
642 int rankSame, rankDiff;
643 char location[BGLPERSONALITY_MAX_LOCATION];
644 unsigned procid, x, y, z, t;
645 BGLPersonality personality;
646 char cbuffer[MAXCHARLEN];
649 rts_get_personality(&personality,
sizeof(personality));
650 BGLPersonality_getLocationString(&personality, location);
651 x = BGLPersonality_xCoord(&personality),
652 y = BGLPersonality_yCoord(&personality), z = BGLPersonality_zCoord(&personality), t = rts_get_processor_id()
655 PMI_Pset_diff_comm_create(&commDiff);
656 MPI_Comm_size(commDiff, &sizeDiff);
657 MPI_Comm_rank(commDiff, &rankDiff);
658 *ionode_number = rankDiff;
661 PMI_Pset_same_comm_create(&commSame);
662 MPI_Comm_size(commSame, &sizeSame);
663 MPI_Comm_rank(commSame, &rankSame);
666 if (bluegene_np == 0) {
667 bluegene_np = sizeSame;
671 MPI_Comm_split(communicators->
all, (rankSame < bluegene_np), communicators->all_rank, &communicators->
work);
672 MPI_Comm_size(communicators->
work, &communicators->work_size);
673 MPI_Comm_rank(communicators->
work, &communicators->work_rank);
678 communicators->
local = communicators->
work;
682 MPI_Comm_split(commSame, (rankSame < bluegene_np), communicators->all_rank, &communicators->
local);
684 MPI_Comm_size(communicators->
local, &communicators->local_size);
685 MPI_Comm_rank(communicators->
local, &communicators->local_rank);
690 communicators->file_number = rankDiff;
693 communicators->file_number = -1;
697 sprintf(cbuffer,
"");
698 if (rankSame < bluegene_np) {
700 sprintf(cbuffer,
"BGL[%05d] diff_comm: %4d of %4d same_comm: %5d of %5d file_comm: %5d of %5d %s phys_xyzt(%d,%d,%d,%d)\n",
701 communicators->all_rank, rankDiff + 1, sizeDiff, rankSame + 1, sizeSame,
702 communicators->local_rank + 1, communicators->local_size, location, x, y, z, t);
705 collective_print_gather(cbuffer, communicators->
work);
715 if (communicators->work_size == -2) {
717 communicators->
work = communicators->
all;
718 MPI_Comm_size(communicators->
work, &communicators->work_size);
719 MPI_Comm_rank(communicators->
work, &communicators->work_rank);
722 if (communicators->local_size == -2) {
724 communicators->
local = communicators->
work;
728 numfiles = communicators->work_size / 2;
732 proc_per_file = communicators->work_size / numfiles;
735 if (communicators->work_rank >= (numfiles * proc_per_file)) {
736 communicators->file_number = numfiles - 1;
739 communicators->file_number = communicators->work_rank / proc_per_file;
742 MPI_Comm_split(communicators->
work, communicators->file_number, communicators->all_rank, &communicators->
local);
744 MPI_Comm_size(communicators->
local, &communicators->local_size);
745 MPI_Comm_rank(communicators->
local, &communicators->local_rank);
747 communicators->ionode_number = communicators->file_number;
754 gethostname(location, 256);
755 char cbuffer[MAXCHARLEN];
756 sprintf(cbuffer,
"LINUX[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s\n",
757 communicators->all_rank, communicators->all_rank, communicators->all_size,
758 communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location);
759 collective_print_gather(cbuffer, communicators->
all);
768 gethostname(location, 256);
769 int sizeSame = 0, sizeDiff = 0;
770 int rankSame, rankDiff;
771 char cbuffer[MAXCHARLEN];
772 sprintf(cbuffer,
"AIX[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s\n",
773 communicators->all_rank, communicators->all_rank, communicators->all_size,
774 communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location);
775 collective_print_gather(cbuffer, communicators->
all);
783 gethostname(location, 256);
784 int sizeSame = 0, sizeDiff = 0;
785 int rankSame, rankDiff;
786 char cbuffer[MAXCHARLEN];
787 sprintf(cbuffer,
"XT[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s\n",
788 communicators->all_rank, communicators->all_rank, communicators->all_size,
789 communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location);
790 collective_print_gather(cbuffer, communicators->
all);
795 if (communicators->work_size != -1) {
798 newtasknr=(communicators->work_rank+read_task_offset)%communicators->work_size;
799 MPI_Comm_split(communicators->
work, 0, newtasknr, &communicators->
workread);
801 MPI_Comm_size(communicators->
workread, &communicators->workread_size);
802 MPI_Comm_rank(communicators->
workread, &communicators->workread_rank);
810 communicators->workread_size = communicators->workread_rank = -1;
811 communicators->local_size = communicators->local_rank = -1;