SIONlib  1.6.2
Scalable I/O library for parallel access to task-local files
partest_split_comm.c
1 /****************************************************************************
2 ** SIONLIB http://www.fz-juelich.de/jsc/sionlib **
3 *****************************************************************************
4 ** Copyright (c) 2008-2016 **
5 ** Forschungszentrum Juelich, Juelich Supercomputing Centre **
6 ** **
7 ** See the file COPYRIGHT in the package base directory for details **
8 ****************************************************************************/
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 #include <unistd.h>
14 #include <mpi.h>
15 #include <time.h>
16 #include <math.h>
17 
18 #include "partest_split_comm.h"
19 #include "partest_util.h"
20 
21 #ifdef _SION_BGQ
22 #include <firmware/include/personality.h>
23 #include <spi/include/kernel/process.h>
24 #include <spi/include/kernel/location.h>
25 #ifdef __GNUC__
26 #include <hwi/include/bqc/A2_inlines.h> /* for GetTimebase() */
27 #endif
28 #include <hwi/include/common/uci.h>
29 #include <mpix.h>
30 #endif
31 
32 #ifdef _SION_BGP
33 #include <common/bgp_personality.h>
34 #include <common/bgp_personality_inlines.h>
35 #endif
36 
37 #ifdef _SION_FX
38 #include <mpi-ext.h>
39 #endif
40 
41 #ifdef _SION_XT
42 #include <unistd.h>
43 #include <pmi.h>
44 #include <rca_lib.h>
45 #endif
46 
47 #ifdef _SION_AIX
48 #include <unistd.h>
49 #endif
50 
51 #ifdef _SION_LINUX
52 #include <unistd.h>
53 #endif
54 
55 
56 int split_communicator(_test_communicators * communicators, int bluegene, int bluegene_np, int bluegene_sort, int numfiles, int read_task_offset, int verbose)
57 {
58  int proc_per_file;
59 
60  communicators->work_size = communicators->work_rank = -2;
61  communicators->local_size = communicators->local_rank = -2;
62 
63 
64 
65 #ifdef _SION_BGP
66  if (bluegene) { /* order MPI-tasks by I/O-node */
67  _BGP_Personality_t personality;
68  MPI_Comm commSame, commDiff, commTemp;
69  int sizeSame, sizeDiff;
70  int rankSame, rankDiff;
71  char location[128];
72  unsigned procid, x, y, z, t;
73  char cbuffer[MAXCHARLEN];
74 
75  /* get location information */
76  Kernel_GetPersonality(&personality, sizeof(personality));
77  BGP_Personality_getLocationString(&personality, location);
78  procid = Kernel_PhysicalProcessorID();
79  MPIX_rank2torus(communicators->all_rank, &x, &y, &z, &t);
80 
81  /* task of communicator working with different I/O-nodes */
82  MPIX_Pset_diff_comm_create(&commDiff);
83  MPI_Comm_size(commDiff, &sizeDiff);
84  MPI_Comm_rank(commDiff, &rankDiff);
85  communicators->ionode_number = rankDiff;
86 
87  /* communicator consists of all task working with the same I/O-node */
88  MPIX_Pset_same_comm_create(&commSame);
89  MPI_Comm_size(commSame, &sizeSame);
90  MPI_Comm_rank(commSame, &rankSame);
91 
92  /* if -p not specified all proc will write! */
93  if (bluegene_np == 0) {
94  bluegene_np = sizeSame;
95  }
96 
97  /* Get a communicator with all writing tasks => new global communicator */
98  MPI_Comm_split(communicators->all, (rankSame < bluegene_np), communicators->all_rank, &communicators->work);
99  MPI_Comm_size(communicators->work, &communicators->work_size);
100  MPI_Comm_rank(communicators->work, &communicators->work_rank);
101  if (rankSame >= bluegene_np) {
102  /* not working task */
103  communicators->work_size = communicators->work_rank = -1;
104  communicators->local_size = communicators->local_rank = -1;
105  }
106 
107  /* If only one file will be used => dont split further */
108  /* if numfile > 1 sion will generate correct local communicator */
109  if (numfiles >= 1) {
110  communicators->local = communicators->work;
111  }
112  else if(numfiles<0) {
113  if(numfiles==-1) {
114  /* Split the common communicator for each IO node to get a local comm with only the writing tasks for this IO Node */
115  MPI_Comm_split(commSame, (rankSame < bluegene_np), communicators->all_rank, &communicators->local);
116  } else {
117  /* local communicator contains only one task per IO-node */
118  /* bluegene_np has to be 512 */
119  communicators->local=commDiff;
120  }
121  }
122  MPI_Comm_size(communicators->local, &communicators->local_size);
123  MPI_Comm_rank(communicators->local, &communicators->local_rank);
124 
125  /* determine filenumber */
126  if (numfiles < 1) {
127  /* one file per I/O-node */
128  if(numfiles==-1) communicators->file_number = rankDiff;
129  else communicators->file_number = rankSame;
130  }
131  else {
132  communicators->file_number = -1;
133  }
134 
135  /* print log message about location, ... */
136  sprintf(cbuffer, "");
137  if (rankSame < bluegene_np) {
138  if (verbose) {
139  sprintf(cbuffer, "BGP[%05d] diff_comm: %4d of %4d same_comm: %5d of %5d file_comm: %5d of %5d %s phys_xyzt(%d,%d,%d,%d)\n",
140  communicators->all_rank, rankDiff + 1, sizeDiff, rankSame + 1, sizeSame, communicators->local_rank + 1, communicators->local_size,
141  location, x, y, z, t);
142  }
143  }
144  collective_print_gather(cbuffer, communicators->work);
145 
146  }
147 #endif
148 
149 #ifdef _SION_BGQ
150  if (bluegene) { /* order MPI-tasks by I/O-node */
151  Personality_t personality;
152  MPI_Comm commSame, commDiff, commTemp;
153  MPIX_Hardware_t hw;
154  int sizeSame, sizeDiff;
155  int rankSame, rankDiff;
156  int baserank;
157  int factor, bridgeid,core, hwthread,procid;
158  int dist_to_bridge, isonbridge;
159  char cbuffer[MAXCHARLEN];
160  char location[64];
161  BG_UniversalComponentIdentifier uci;
162  unsigned int row, col, mp, nb, cc;
163  double starttime;
164  int key, color, baseid;
165 
166  if(communicators->all_rank==0) {
167  starttime=MPI_Wtime();
168  printf("partest_split_comm[%d]: starting at Wt=%10.3fs\n",communicators->all_rank,starttime);
169  }
170 
171  /* get location information */
172  Kernel_GetPersonality(&personality, sizeof(Personality_t));
173  MPIX_Hardware(&hw);
174  uci = personality.Kernel_Config.UCI;
175  bg_decodeComputeCardOnNodeBoardUCI(uci, &row, &col, &mp, &nb, &cc);
176 
177  procid = Kernel_ProcessorID(); /* 0-63 */
178  core = Kernel_ProcessorCoreID(); /* 0-15 */
179  hwthread = Kernel_ProcessorThreadID(); /* 0-3 */
180 
181  sprintf(location, "R%x%x-M%d-N%02x-J%02x <%d,%d,%d,%d,%d> p%02dc%02dt%1d", row, col, mp, nb, cc,
182  personality.Network_Config.Acoord, personality.Network_Config.Bcoord,
183  personality.Network_Config.Ccoord, personality.Network_Config.Dcoord,
184  personality.Network_Config.Ecoord,
185  procid,core,hwthread);
186 
187  if (
188  ( personality.Network_Config.Acoord==personality.Network_Config.cnBridge_A ) &&
189  ( personality.Network_Config.Bcoord==personality.Network_Config.cnBridge_B ) &&
190  ( personality.Network_Config.Ccoord==personality.Network_Config.cnBridge_C ) &&
191  ( personality.Network_Config.Dcoord==personality.Network_Config.cnBridge_D ) &&
192  ( personality.Network_Config.Ecoord==personality.Network_Config.cnBridge_E )
193  )
194  {
195  isonbridge=1;
196  } else {
197  isonbridge=0;
198  }
199 
200  dist_to_bridge=MPIX_IO_distance();
201 
202  /* following could be replaced by MPIX_IO_link_id() */
203  factor=1;
204  bridgeid = personality.Network_Config.cnBridge_E;
205  factor *= personality.Network_Config.Enodes;
206  bridgeid += personality.Network_Config.cnBridge_D*factor;
207  factor *= personality.Network_Config.Dnodes;
208  bridgeid += personality.Network_Config.cnBridge_C*factor;
209  factor *= personality.Network_Config.Cnodes;
210  bridgeid += personality.Network_Config.cnBridge_B*factor;
211  factor *= personality.Network_Config.Bnodes;
212  bridgeid += personality.Network_Config.cnBridge_A*factor;
213 
214 
215  if(bluegene==2) {
216  /* per ION */
217  /* communicator consists of all task working with the same I/O-bridge */
218  if(bluegene_sort==0) {
219  key = MPIX_IO_distance();
220  } else {
221  key = communicators->all_rank;
222  }
223 
224  MPI_Comm_split(communicators->all, bridgeid, key, &commSame);
225  MPI_Comm_size(commSame, &sizeSame);
226  MPI_Comm_rank(commSame, &rankSame);
227  baseid=bridgeid;
228 
229  /* communicator consists of all task working with the different I/O-nodes */
230  MPI_Comm_split(communicators->all, rankSame, bridgeid, &commDiff);
231  MPI_Comm_size(commDiff, &sizeDiff);
232  MPI_Comm_rank(commDiff, &rankDiff);
233 
234  communicators->ionode_number = rankDiff;
235  } else {
236  /* similar to MPIX_Pset_same_comm_create but with ION-id */
237  MPI_Comm commSameIOB, commDiffIOB;
238  int sizeSameIOB, sizeDiffIOB;
239  int rankSameIOB, rankDiffIOB;
240  int ionid;
241 
242  /* only needed for ionid distribution */
243  key = communicators->all_rank;
244  MPI_Comm_split(communicators->all, bridgeid, key, &commSameIOB);
245  MPI_Comm_size(commSameIOB, &sizeSameIOB);
246  MPI_Comm_rank(commSameIOB, &rankSameIOB);
247 
248  if(communicators->all_rank==0) {
249  printf("partest_split_comm[%d]: after split commSameIOB deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
250  }
251 
252  if(rankSameIOB==0) {
253  /* ionid=MPIX_IO_node_id() & 0xFFFF; */
254  ionid=MPIX_IO_node_id();
255  }
256  MPI_Bcast(&ionid, 1, MPI_INT, 0, commSameIOB);
257 
258  if(communicators->all_rank==0) {
259  printf("partest_split_comm[%d]: after get ionid deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
260  }
261 
262  color = (int) ionid;
263  if(bluegene_sort==0) {
264  key = MPIX_IO_distance();
265  } else {
266  key = communicators->all_rank;
267  }
268  int rc=-1;
269  rc=MPI_Comm_split(communicators->all,color,key,&commSame);
270  MPI_Comm_size(commSame, &sizeSame);
271  MPI_Comm_rank(commSame, &rankSame);
272  baseid=ionid;
273 
274  if(communicators->all_rank==0) {
275  printf("partest_split_comm[%d]: after split commSame deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
276  }
277 
278  /* distribute global rank of first task in samecomm */
279  if(rankSame==0) baserank=communicators->all_rank;
280  MPI_Bcast(&baserank, 1, MPI_INT, 0, commSame);
281 
282  if(communicators->all_rank==0) {
283  printf("partest_split_comm[%d]: after bcast baserank deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
284  }
285 
286  /* similar to MPIX_Pset_diff_comm_create but with ION-id, but without: ... *hw.ppn+hw.coreID */
287  color = rankSame;
288  key = baserank; /* rank of task 0 of samecomm in MPI_COMM_WORLD */
289  MPI_Comm_split(communicators->all,color,key,&commDiff);
290  MPI_Comm_size(commDiff, &sizeDiff);
291  MPI_Comm_rank(commDiff, &rankDiff);
292 
293  if(communicators->all_rank==0) {
294  printf("partest_split_comm[%d]: after split commDiff deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
295  }
296 
297  }
298 
299  /* printf("WF: %02d of %02d here rSame=%2d rDiff=%2d bg_np=%2d --> factor=%2d, bridgeid=%2d %s -> bridge(%d,%d,%d,%d,%d)\n",communicators->all_rank,communicators->all_size, rankSame, rankDiff, bluegene_np, factor, bridgeid,location, */
300  /* personality.Network_Config.cnBridge_A, personality.Network_Config.cnBridge_B, personality.Network_Config.cnBridge_C, personality.Network_Config.cnBridge_D, personality.Network_Config.cnBridge_E); */
301 
302  /* if -p not specified all proc will write! */
303  if (bluegene_np == 0) {
304  bluegene_np = sizeSame;
305  }
306 
307  /* Get a communicator with all writing tasks => new global communicator */
308  /* TODO: better to MPI_UNDEFINED when rankSame >= bluegene_np */
309  MPI_Comm_split(communicators->all, (rankSame < bluegene_np), communicators->all_rank, &communicators->work);
310  MPI_Comm_size(communicators->work, &communicators->work_size);
311  MPI_Comm_rank(communicators->work, &communicators->work_rank);
312  if (rankSame >= bluegene_np) {
313  /* not working task */
314  communicators->work_size = communicators->work_rank = -1;
315  communicators->local_size = communicators->local_rank = -1;
316  }
317  if(communicators->all_rank==0) {
318  printf("partest_split_comm[%d]: after split work deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
319  }
320 
321  /* If only one file will be used => dont split further */
322  /* if numfile > 1 sion will generate correct local communicator */
323  if (numfiles >= 1) {
324  communicators->local = communicators->work;
325  }
326  else if(numfiles<0) {
327  if(numfiles==-1) {
328 #ifdef SPLITCASCADE
329  /* Split the common communicator for each IO node to get a local comm with only the writing tasks for this IO Node */
330  MPI_Comm_split(commSame, (rankSame < bluegene_np), rankSame, &communicators->local);
331 #else
332  /* Split the common communicator for each IO node to get a local comm with only the writing tasks for this IO Node */
333  color=(rankSame < bluegene_np)?baseid:MPI_UNDEFINED;
334  MPI_Comm_split(communicators->all, color, rankSame, &communicators->local);
335 #endif
336  if (rankSame < bluegene_np) {
337  MPI_Comm_size(communicators->local, &communicators->local_size);
338  MPI_Comm_rank(communicators->local, &communicators->local_rank);
339  }
340 
341  if(communicators->all_rank==0) {
342  printf("partest_split_comm[%d]: after split local deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
343  }
344 
345  } else {
346  /* local communicator contains only one task per IO-node */
347  /* bluegene_np has to be 512 */
348  communicators->local=commDiff;
349  MPI_Comm_size(communicators->local, &communicators->local_size);
350  MPI_Comm_rank(communicators->local, &communicators->local_rank);
351  }
352  }
353 
354  /* determine filenumber */
355  if (numfiles < 1) {
356  /* one file per I/O-node */
357  if(numfiles==-1) communicators->file_number = rankDiff;
358  else communicators->file_number = rankSame;
359  }
360  else {
361  communicators->file_number = -1;
362  }
363 
364  if(communicators->all_rank==0) {
365  printf("partest_split_comm[%d]: before print verbose deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
366  }
367  /* print log message about location, ... */
368  if (verbose) {
369  sprintf(cbuffer, "");
370  if (rankSame < bluegene_np) {
371  sprintf(cbuffer, "BGQ[%05d] diff_comm: %4d of %4d same_comm: %5d of %5d file_comm: %5d of %5d %s bridge=%d, dist=%d\n",
372  communicators->all_rank, rankDiff + 1, sizeDiff, rankSame + 1, sizeSame, communicators->local_rank + 1,
373  communicators->local_size, location,isonbridge,dist_to_bridge);
374  }
375  collective_print_gather(cbuffer, communicators->work);
376  }
377  if(communicators->all_rank==0) {
378  printf("partest_split_comm[%d]: after print verbose/end deltaWt=%10.3fs\n",communicators->all_rank,MPI_Wtime()-starttime);
379  }
380 
381  } /* if (bluegene) */
382 #endif
383 
384 #ifdef _SION_BGL
385  if (bluegene) { /* order MPI-tasks by I/O-node */
386  _BGP_Personality_t personality;
387  MPI_Comm commSame, commDiff, commTemp;
388  int sizeSame, sizeDiff;
389  int rankSame, rankDiff;
390  char location[BGLPERSONALITY_MAX_LOCATION];
391  unsigned procid, x, y, z, t;
392  BGLPersonality personality;
393  char cbuffer[MAXCHARLEN];
394 
395  /* get location information */
396  rts_get_personality(&personality, sizeof(personality));
397  BGLPersonality_getLocationString(&personality, location);
398  x = BGLPersonality_xCoord(&personality),
399  y = BGLPersonality_yCoord(&personality), z = BGLPersonality_zCoord(&personality), t = rts_get_processor_id()
400 
401  /* task of communicator working with different I/O-nodes */
402  PMI_Pset_diff_comm_create(&commDiff);
403  MPI_Comm_size(commDiff, &sizeDiff);
404  MPI_Comm_rank(commDiff, &rankDiff);
405  *ionode_number = rankDiff;
406 
407  /* communicator consists of all task working with the same I/O-node */
408  PMI_Pset_same_comm_create(&commSame);
409  MPI_Comm_size(commSame, &sizeSame);
410  MPI_Comm_rank(commSame, &rankSame);
411 
412  /* if -p not specified all proc will write! */
413  if (bluegene_np == 0) {
414  bluegene_np = sizeSame;
415  }
416 
417  /* Get a communicator with all writing tasks => new global communicator */
418  MPI_Comm_split(communicators->all, (rankSame < bluegene_np), communicators->all_rank, &communicators->work);
419  MPI_Comm_size(communicators->work, &communicators->work_size);
420  MPI_Comm_rank(communicators->work, &communicators->work_rank);
421 
422 
423  /* If only one file will be used => dont split further */
424  if (numfiles == 1) {
425  communicators->local = communicators->work;
426  }
427  else {
428  /* Split the common communicator for each IO node to get a local comm with only the writing tasks for this IO Node */
429  MPI_Comm_split(commSame, (rankSame < bluegene_np), communicators->all_rank, &communicators->local);
430  }
431  MPI_Comm_size(communicators->local, &communicators->local_size);
432  MPI_Comm_rank(communicators->local, &communicators->local_rank);
433 
434  /* determine filenumber */
435  if (numfiles < 1) {
436  /* one file per I/O-node */
437  communicators->file_number = rankDiff;
438  }
439  else {
440  communicators->file_number = -1;
441  }
442 
443  /* print log message about location, ... */
444  sprintf(cbuffer, "");
445  if (rankSame < bluegene_np) {
446  if (verbose) {
447  sprintf(cbuffer, "BGL[%05d] diff_comm: %4d of %4d same_comm: %5d of %5d file_comm: %5d of %5d %s phys_xyzt(%d,%d,%d,%d)\n",
448  communicators->all_rank, rankDiff + 1, sizeDiff, rankSame + 1, sizeSame,
449  communicators->local_rank + 1, communicators->local_size, location, x, y, z, t);
450  }
451  }
452  collective_print_gather(cbuffer, communicators->work);
453 
454  }
455 #endif
456 
457 #ifdef _SION_AIX
458  /* no communicator adjustment */
459 #endif
460 
461 #ifdef _SION_DARWIN
462  if (verbose) {
463  char location[256];
464  gethostname(location, 256);
465  char cbuffer[MAXCHARLEN];
466  sprintf(cbuffer, "DARWIN[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s\n",
467  communicators->all_rank, communicators->all_rank, communicators->all_size,
468  communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location);
469  collective_print_gather(cbuffer, communicators->all);
470  }
471 
472 #endif
473 
474 #if defined( _SION_LINUX) && (!defined(_SION_FX))
475  if (verbose) {
476  char location[256];
477  gethostname(location, 256);
478  char cbuffer[MAXCHARLEN];
479  sprintf(cbuffer, "LINUX[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s\n",
480  communicators->all_rank, communicators->all_rank, communicators->all_size,
481  communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location);
482  collective_print_gather(cbuffer, communicators->all);
483  }
484 
485 #endif
486 
487 #ifdef _SION_AIX
488  if (verbose) {
489  char location[256];
490  gethostname(location, 256);
491  int sizeSame = 0, sizeDiff = 0;
492  int rankSame, rankDiff;
493  char cbuffer[MAXCHARLEN];
494  sprintf(cbuffer, "AIX[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s\n",
495  communicators->all_rank, communicators->all_rank, communicators->all_size,
496  communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location);
497  collective_print_gather(cbuffer, communicators->all);
498  }
499 
500 #endif
501 
502 #ifdef _SION_FX
503  if (bluegene) { /* order MPI-tasks by I/O-node */
504  int rank, x, y, z, a, b, c, rc;
505  char location[256];
506  char cbuffer[MAXCHARLEN];
507  int ionodeid;
508  MPI_Comm commSame, commDiff;
509  int sizeSame, sizeDiff;
510  int rankSame, rankDiff;
511 
512  gethostname(location, 256);
513  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
514  rc=FJMPI_Topology_sys_rank2xyzabc(rank, &x, &y, &z, &a, &b, &c);
515 
516  ionodeid=x * 65536 + y;
517 
518  if(bluegene>0) {
519  /* per ION */
520  /* communicator consists of all task working with the same I/O-node */
521  MPI_Comm_split(communicators->all, ionodeid, communicators->all_rank, &commSame);
522  MPI_Comm_size(commSame, &sizeSame);
523  MPI_Comm_rank(commSame, &rankSame);
524 
525  /* communicator consists of all task working with the different I/O-nodes */
526  MPI_Comm_split(communicators->all, rankSame, ionodeid, &commDiff);
527  MPI_Comm_size(commDiff, &sizeDiff);
528  MPI_Comm_rank(commDiff, &rankDiff);
529 
530  communicators->ionode_number = rankDiff;
531 
532  } else {
533  bluegene_np = sizeSame;
534  }
535 
536  /* Get a communicator with all writing tasks => new global communicator */
537  MPI_Comm_split(communicators->all, (rankSame < bluegene_np), communicators->all_rank, &communicators->work);
538  MPI_Comm_size(communicators->work, &communicators->work_size);
539  MPI_Comm_rank(communicators->work, &communicators->work_rank);
540  if (rankSame >= bluegene_np) {
541  /* not working task */
542  communicators->work_size = communicators->work_rank = -1;
543  communicators->local_size = communicators->local_rank = -1;
544  }
545 
546  /* If only one file will be used => dont split further */
547  /* if numfile > 1 sion will generate correct local communicator */
548  if (numfiles >= 1) {
549  communicators->local = communicators->work;
550  }
551  else if(numfiles<0) {
552  if(numfiles==-1) {
553  /* Split the common communicator for each IO node to get a local comm with only the writing tasks for this IO Node */
554  MPI_Comm_split(commSame, (rankSame < bluegene_np), communicators->all_rank, &communicators->local);
555  } else {
556  /* local communicator contains only one task per IO-node */
557  /* bluegene_np has to be 512 */
558  communicators->local=commDiff;
559  }
560  }
561  MPI_Comm_size(communicators->local, &communicators->local_size);
562  MPI_Comm_rank(communicators->local, &communicators->local_rank);
563 
564  /* determine filenumber */
565  if (numfiles < 1) {
566  /* one file per I/O-node */
567  if(numfiles==-1) communicators->file_number = rankDiff;
568  else communicators->file_number = rankSame;
569  }
570  else {
571  communicators->file_number = -1;
572  }
573 
574  /* print log message about location, ... */
575  sprintf(cbuffer, "");
576  if (rankSame < bluegene_np) {
577  sprintf(cbuffer, "FX[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %dx%dx%d %dx%dx%d %s ioid=%d ion=%d rc=%d numfiles=%d\n",
578 
579  communicators->all_rank, rankDiff, sizeDiff, rankSame, sizeSame,
580  communicators->local_rank, communicators->local_size,
581  x,y,z,a,b,c, location, ionodeid, communicators->ionode_number, rc, numfiles);
582  }
583  collective_print_gather(cbuffer, communicators->all);
584 
585  }
586 #endif
587 
588 
589 #ifdef _SION_XT
590  if (verbose) {
591  char location[256];
592  gethostname(location, 256);
593  int sizeSame = 0, sizeDiff = 0;
594  int rankSame, rankDiff;
595  char cbuffer[MAXCHARLEN];
596  int rc,nid;
597  rca_mesh_coord_t xyz;
598 
599  rc=PMI_Get_nid(communicators->all_rank,&nid);
600 
601  rca_get_meshcoord(nid, &xyz);
602 
603  sprintf(cbuffer, "XT[%03d] diff_comm: %4d of %4d same_comm: %4d of %4d file_comm: %4d of %4d %s nid=%d phys_xyz(%d,%d,%d)\n",
604  communicators->all_rank, communicators->all_rank, communicators->all_size,
605  communicators->work_rank, communicators->work_size, communicators->local_rank, communicators->local_size, location,
606  nid, xyz.mesh_x,xyz.mesh_y,xyz.mesh_z);
607  collective_print_gather(cbuffer, communicators->all);
608  }
609 #endif
610 
611 
612  /* initial set of communicators not changed? */
613  if (communicators->work_size == -2) {
614  /* all task do work */
615  communicators->work = communicators->all;
616  MPI_Comm_size(communicators->work, &communicators->work_size);
617  MPI_Comm_rank(communicators->work, &communicators->work_rank);
618  }
619  /* local communicators */
620  if (communicators->local_size == -2) {
621  if (numfiles == 1) {
622  communicators->local = communicators->work;
623  }
624  /* set a default distribution on files, will be computed again by sion_open */
625  if (numfiles < 1) {
626  numfiles = communicators->work_size / 2;
627  if (numfiles == 0)
628  numfiles = 1;
629  }
630  proc_per_file = communicators->work_size / numfiles;
631 
632  /* remaining tasks are write/read to/from the last file */
633  if (communicators->work_rank >= (numfiles * proc_per_file)) {
634  communicators->file_number = numfiles - 1;
635  }
636  else {
637  communicators->file_number = communicators->work_rank / proc_per_file;
638  }
639 
640  MPI_Comm_split(communicators->work, communicators->file_number, communicators->all_rank, &communicators->local);
641 
642  MPI_Comm_size(communicators->local, &communicators->local_size);
643  MPI_Comm_rank(communicators->local, &communicators->local_rank);
644 
645  communicators->ionode_number = communicators->file_number;
646 
647  }
648 
649  /* shift working tasks */
650  if (communicators->work_size != -1) {
651  /* only if task in communicator work */
652  int newtasknr;
653  newtasknr=(communicators->work_rank+read_task_offset)%communicators->work_size;
654  MPI_Comm_split(communicators->work, 0, newtasknr, &communicators->workread);
655 
656  MPI_Comm_size(communicators->workread, &communicators->workread_size);
657  MPI_Comm_rank(communicators->workread, &communicators->workread_rank);
658  /* printf("WF: %d %d %% %d-> %d (%d %d)\n",
659  communicators->work_rank,read_task_offset,
660  communicators->work_size,newtasknr,
661  communicators->workread_rank,communicators->workread_size);*/
662 
663  } else {
664  /* this rtask will not be used for reading */
665  communicators->workread_size = communicators->workread_rank = -1;
666  communicators->local_size = communicators->local_rank = -1;
667  }
668 
669  return(1);
670 }
671