SIONlib  2.0.0-rc.1
Scalable I/O library for parallel access to task-local files
sion_internal_startptr.c
1 /****************************************************************************
2 ** SIONLIB http://www.fz-juelich.de/jsc/sionlib **
3 *****************************************************************************
4 ** Copyright (c) 2008-2018 **
5 ** Forschungszentrum Juelich, Juelich Supercomputing Centre **
6 ** **
7 ** See the file COPYRIGHT in the package base directory for details **
8 ****************************************************************************/
9 
10 #define _XOPEN_SOURCE 700
11 
12 #include <stdint.h>
13 #include <string.h>
14 
15 #include "sion_common.h"
16 #include "sion_const.h"
17 #include "sion_debug.h"
18 #include "sion_error_handler.h"
19 #include "sion_filedesc.h"
20 #include "sion_internal_collstat.h"
21 #include "sion_internal_startptr.h"
22 #include "sion_metadata.h"
23 
24 /* INTERNAL */
25 
26 #define DFUNCTION "_sion_calculate_set_alignment"
27 int64_t _sion_calculate_set_alignment(_sion_filedesc *sion_filedesc, int t)
28 {
29  int64_t lsize = SION_SIZE_NOT_VALID;
30 
31  /* alignment: fsv 3, WRITE: -> align lsize */
32  /* : fsv 3, READ: -> align lsize */
33  /* : fsv 4, WRITE: -> align lsize+chunksize */
34  /* : fsv 4, READ: -> lsize=chunksize */
35 
36  if (sion_filedesc->fileversion <= 3) {
37  lsize = (sion_filedesc->all_chunksizes[t] % sion_filedesc->fsblksize == 0)
38  ? sion_filedesc->all_chunksizes[t]
39  : ((sion_filedesc->all_chunksizes[t] / sion_filedesc->fsblksize) + 1) * sion_filedesc->fsblksize;
40  DPRINTFP((2048, DFUNCTION, _SION_DEFAULT_RANK, "old file version (<=3) task=%d lsize=%ld\n", t, (long)lsize));
41  } else {
42  if (sion_filedesc->mode == SION_FILEMODE_WRITE) {
43  lsize = (sion_filedesc->all_chunksizes[t] % sion_filedesc->fsblksize == 0)
44  ? sion_filedesc->all_chunksizes[t]
45  : ((sion_filedesc->all_chunksizes[t] / sion_filedesc->fsblksize) + 1) * sion_filedesc->fsblksize;
46  /* expand chunksize for alignment */
47  sion_filedesc->all_chunksizes[t] = lsize;
48  DPRINTFP(
49  (2048, DFUNCTION, _SION_DEFAULT_RANK, "new file version (>3) task=%d lsize=%ld, WRITE chunksize set\n", t, (long)lsize));
50  } else {
51  lsize = sion_filedesc->all_chunksizes[t];
52  DPRINTFP((2048, DFUNCTION, _SION_DEFAULT_RANK, "new file version (>3) task=%d lsize=%ld, READ no align\n", t, (long)lsize));
53  }
54  }
55 
56  return lsize;
57 }
58 #undef DFUNCTION
59 
60 #define DFUNCTION "_sion_get_size_metadatablock1"
61 int _sion_get_size_metadatablock1(_sion_filedesc *sion_filedesc, int32_t mapping_size)
62 {
63  int rc = 0;
64  int firstsize;
65 
66  // calculate size of first block for meta-information
67  // length of SION_ID + endianness + fsblksize + ntasks + nfiles + filenumber + 3 * versioninfo
68  firstsize = strlen(SION_ID) + 8 * sizeof(int32_t) + 2 * sion_filedesc->ntasks * sizeof(int64_t) // globalranks + chunksizes
69  + 2 * sizeof(int64_t); // 2 x 64 bit flags
70  if (sion_filedesc->fileversion <= 5) {
71  firstsize += SION_FILENAME_LENGTH; // prefix
72  } else {
73  firstsize += sion_filedesc->ntasks * sizeof(int64_t) // sizes
74  + (mapping_size * 2 + 1) * sizeof(int32_t); // mapping information
75  }
76  rc = firstsize;
77 
78  return rc;
79 }
80 #undef DFUNCTION
81 
82 #define DFUNCTION "_sion_calculate_startpointers"
83 int _sion_calculate_startpointers(_sion_filedesc *sion_filedesc, int32_t mapping_size)
84 {
85  int rc = SION_SUCCESS;
86  int i, firstsize;
87  int64_t lsize;
88 
89  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "enter ntasks=%d fsblksize=%d chunksizes[0]=%lld\n", sion_filedesc->ntasks,
90  sion_filedesc->fsblksize, sion_filedesc->all_chunksizes[0]));
91 
92  firstsize = _sion_get_size_metadatablock1(sion_filedesc, mapping_size);
93 
94 #ifdef WFLARGEMETABLOCK
95  if (firstsize < 4 * 1024 * 1024) {
96  firstsize = 4 * 1024 * 1024;
97  }
98 #endif
99 
100  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "firstsize=%d\n", firstsize));
101 
102  sion_filedesc->all_startpointers[0] = (firstsize % sion_filedesc->fsblksize == 0)
103  ? firstsize
104  : ((firstsize / sion_filedesc->fsblksize) + 1) * sion_filedesc->fsblksize;
105 
106  i = 0;
107  DPRINTFP((2048, DFUNCTION, _SION_DEFAULT_RANK, " startpointers[%2d]=%10lld (%10.4fMB) chunksizes[%2d]=%8lld\n", i,
108  sion_filedesc->all_startpointers[i], sion_filedesc->all_startpointers[i] / 1024.0 / 1024.0, i,
109  sion_filedesc->all_chunksizes[i]));
110  sion_filedesc->globalskip = 0;
111  for (i = 1; i < sion_filedesc->ntasks; i++) {
112  lsize = _sion_calculate_set_alignment(sion_filedesc, i - 1);
113 
114  sion_filedesc->globalskip += lsize;
115  sion_filedesc->all_startpointers[i] = sion_filedesc->all_startpointers[i - 1] + lsize;
116 
117  DPRINTFP((2048, DFUNCTION, _SION_DEFAULT_RANK,
118  " startpointers[%2d]=%10lld (%10.4fMB) chunksizes[%2d]=%8lld chunksizes[%2d]=%8lld\n", i,
119  sion_filedesc->all_startpointers[i], sion_filedesc->all_startpointers[i] / 1024.0 / 1024.0, i,
120  sion_filedesc->all_chunksizes[i], i - 1, sion_filedesc->all_chunksizes[i - 1]));
121  }
122 
123  lsize = _sion_calculate_set_alignment(sion_filedesc, sion_filedesc->ntasks - 1);
124  sion_filedesc->globalskip += lsize;
125 
126  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "leave globalskip is %lld\n", sion_filedesc->globalskip));
127  return rc;
128 }
129 #undef DFUNCTION
130 
131 #define DFUNCTION "_sion_calculate_num_collector"
132 int _sion_calculate_num_collector(_sion_filedesc *sion_filedesc, _sion_collstat *collstat, int *num_collectors)
133 {
134  int rc = SION_SUCCESS;
135  int max_num_collectors;
136 
137  /* max. number: one fsblksize per collector */
138  max_num_collectors = (int)(collstat->gsize / sion_filedesc->fsblksize);
139  if (collstat->gsize % sion_filedesc->fsblksize > 0) {
140  max_num_collectors++;
141  }
142  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "max_num_collectors=%d\n", max_num_collectors));
143 
144  if (sion_filedesc->collsize > 0) {
145  /* user defined collsize (number of tasks per collector) */
146  *num_collectors = (int)(sion_filedesc->ntasks / sion_filedesc->collsize);
147  if (sion_filedesc->ntasks % sion_filedesc->collsize > 0) {
148  (*num_collectors)++;
149  }
150 
151  /* limit the user specification */
152  if (*num_collectors > max_num_collectors) {
153  *num_collectors = max_num_collectors;
154  }
155 
156  DPRINTFP((32, DFUNCTION, _SION_DEFAULT_RANK, "user given collsize %d -> set num_collectors to %d\n", sion_filedesc->collsize,
157  *num_collectors));
158  } else {
159  /* determine number of collectors by heuristic */
160  *num_collectors = max_num_collectors;
161 
162  /* limit the user specification */
163  if (*num_collectors > sion_filedesc->ntasks) {
164  *num_collectors = sion_filedesc->ntasks;
165  }
166 
167  /* some heuristics */
168  if ((sion_filedesc->ntasks >= 512) && (*num_collectors > 32)) {
169  *num_collectors = 32;
170  } else if ((sion_filedesc->ntasks >= 256) && (*num_collectors > 16)) {
171  *num_collectors = 16;
172  } else if ((sion_filedesc->ntasks >= 128) && (*num_collectors > 8)) {
173  *num_collectors = 8;
174  } else if ((sion_filedesc->ntasks >= 64) && (*num_collectors > 8)) {
175  *num_collectors = 8;
176  } else if ((sion_filedesc->ntasks >= 32) && (*num_collectors > 8)) {
177  *num_collectors = 8;
178  } else if ((sion_filedesc->ntasks >= 16) && (*num_collectors > 4)) {
179  *num_collectors = 4;
180  }
181 
182  /* TODO: check I/O infrastructure characteristics for better values */
183 
184  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "SIONlib heuristic collsize=%d num_collectors=%d\n", sion_filedesc->collsize,
185  *num_collectors));
186  }
187 
188  /* data per collector */
189  collstat->avg_data_per_collector =
190  ((int64_t)(collstat->gsize / *num_collectors) / sion_filedesc->fsblksize) * sion_filedesc->fsblksize;
191 
192  if ((collstat->gsize / *num_collectors) % sion_filedesc->fsblksize > 0) {
193  collstat->avg_data_per_collector += sion_filedesc->fsblksize;
194  }
195  DPRINTFP((32, DFUNCTION, _SION_DEFAULT_RANK, "avg_data_per_collectors=%ld\n", (long)collstat->avg_data_per_collector));
196 
197  return rc;
198 }
199 #undef DFUNCTION
200 
201 #define DFUNCTION "_sion_calculate_startpointers_collective"
202 int _sion_calculate_startpointers_collective(_sion_filedesc *sion_filedesc, int32_t mapping_size)
203 {
204  int rc = SION_SUCCESS;
205  int i, firstsize, num_collectors, numsender, lastcoll, s;
206 
207  int64_t currentsize, aligned_size, startpointer;
208  _sion_collstat *collstat;
209 
210  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "enter ntasks=%d fsblksize=%d chunksizes[0]=%lld\n", sion_filedesc->ntasks,
211  sion_filedesc->fsblksize, sion_filedesc->all_chunksizes[0]));
212 
213  if (sion_filedesc->fileversion <= 3) {
214  return _sion_errorprint(SION_NOT_SUCCESS, _SION_ERROR_RETURN,
215  "_sion_calculate_startpointers_collective: files with old sionlib file format (<3) can not be read by collective calls, "
216  "please use standard read calls, aborting ...\n");
217  }
218 
219  /* statistics */
220  collstat = _sion_create_and_init_collstat(sion_filedesc);
221 
222  /* calculate size of first block for meta-information */
223  collstat->firstsize = firstsize = _sion_get_size_metadatablock1(sion_filedesc, mapping_size);
224  _sion_calculate_num_collector(sion_filedesc, collstat, &num_collectors);
225  collstat->req_num_collectors = num_collectors;
226 
227  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "firstsize=%d collsize=%d num_collectors=%d\n", firstsize, sion_filedesc->collsize,
228  num_collectors));
229 
230  numsender = 1;
231  lastcoll = 0;
232  currentsize = sion_filedesc->all_chunksizes[0];
233 
234  /* 1ST STEP: calculate mapping to collectors */
235  for (i = 1; i < sion_filedesc->ntasks; i++) {
236  if ((currentsize + sion_filedesc->all_chunksizes[i] <= collstat->avg_data_per_collector)
237  || (sion_filedesc->all_coll_capability[i] == SION_CAPABILITY_ONLY_SENDER)) {
238  /* task will add to last collector */
239  currentsize += sion_filedesc->all_chunksizes[i];
240  numsender++;
241  } else { /* new collector needed */
242 
243  /* adjust last senders */
244  for (s = lastcoll; s < i; s++) {
245  sion_filedesc->all_coll_collector[s] = lastcoll;
246  sion_filedesc->all_coll_collsize[s] = numsender;
247  }
248 
249  /* expand last one of group to align next one */
250  if (sion_filedesc->mode == SION_FILEMODE_WRITE) {
251  aligned_size = ((int64_t)currentsize / sion_filedesc->fsblksize) * sion_filedesc->fsblksize;
252  if (currentsize % sion_filedesc->fsblksize > 0) {
253  aligned_size += sion_filedesc->fsblksize;
254  }
255 
256  DPRINTFP((128, DFUNCTION, _SION_DEFAULT_RANK, " align chunksizes[%2d]=%8lld + %lld\n", i - 1,
257  sion_filedesc->all_chunksizes[i - 1], aligned_size - currentsize));
258 
259  sion_filedesc->all_chunksizes[i - 1] += aligned_size - currentsize;
260  }
261 
262  /* init data for next collector */
263  numsender = 1;
264  lastcoll = i;
265  currentsize = sion_filedesc->all_chunksizes[i];
266  }
267  }
268 
269  /* align last sender */
270  if (sion_filedesc->mode == SION_FILEMODE_WRITE) {
271  aligned_size = ((int64_t)currentsize / sion_filedesc->fsblksize) * sion_filedesc->fsblksize;
272  if (currentsize % sion_filedesc->fsblksize > 0) {
273  aligned_size += sion_filedesc->fsblksize;
274  }
275  DPRINTFP((128, DFUNCTION, _SION_DEFAULT_RANK, " align chunksizes[%2d]=%8lld + %lld\n", sion_filedesc->ntasks - 1,
276  sion_filedesc->all_chunksizes[sion_filedesc->ntasks - 1], aligned_size - currentsize));
277  sion_filedesc->all_chunksizes[sion_filedesc->ntasks - 1] += aligned_size - currentsize;
278  }
279 
280  /* adjust sender of last collector */
281  for (s = lastcoll; s < sion_filedesc->ntasks; s++) {
282  sion_filedesc->all_coll_collector[s] = lastcoll;
283  sion_filedesc->all_coll_collsize[s] = numsender;
284  }
285 
286  /* 2ND STEP: calculate startpointers */
287 
288  /* align first, not necessary, only for debugging */
289  /* startpointer=firstsize; */
290  startpointer = (firstsize % sion_filedesc->fsblksize == 0)
291  ? firstsize
292  : ((firstsize / sion_filedesc->fsblksize) + 1) * sion_filedesc->fsblksize;
293  sion_filedesc->globalskip = 0;
294  /* calculate mapping to collectors */
295  for (i = 0; i < sion_filedesc->ntasks; i++) {
296  sion_filedesc->all_startpointers[i] = startpointer;
297  startpointer += sion_filedesc->all_chunksizes[i];
298  sion_filedesc->globalskip += sion_filedesc->all_chunksizes[i];
299  }
300 
301  /* statistics */
302  if (sion_filedesc->colldebug >= 1) {
303  _sion_update_collstat(collstat, sion_filedesc);
304  _sion_print_collstat(collstat, sion_filedesc);
305  }
306 
307  _sion_debugprint_collstat(collstat, sion_filedesc);
308 
309  _sion_destroy_collstat(collstat);
310 
311  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "leave globalskip is %lld\n", sion_filedesc->globalskip));
312  return rc;
313 }
314 #undef DFUNCTION
315 
316 #define DFUNCTION "_sion_calculate_startpointers_collective_merge"
317 int _sion_calculate_startpointers_collective_merge(_sion_filedesc *sion_filedesc, int32_t mapping_size)
318 {
319  int rc = SION_SUCCESS;
320  int i, firstsize, num_collectors, numsender, lastcoll, s;
321 
322  int64_t currentsize, newsize, aligned_size, startpointer;
323  _sion_collstat *collstat;
324 
325  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "enter ntasks=%d fsblksize=%d chunksizes[0]=%lld\n", sion_filedesc->ntasks,
326  sion_filedesc->fsblksize, sion_filedesc->all_chunksizes[0]));
327 
328  if (sion_filedesc->fileversion <= 3) {
329  return _sion_errorprint(SION_NOT_SUCCESS, _SION_ERROR_RETURN,
330  "_sion_calculate_startpointers_collective: files with old sionlib file format (<3) can not be read by collective calls, "
331  "please use standard read calls, aborting ...\n");
332  }
333 
334  /* statistics */
335  collstat = _sion_create_and_init_collstat(sion_filedesc);
336 
337  /* calculate size of first block for meta-information */
338  collstat->firstsize = firstsize = _sion_get_size_metadatablock1(sion_filedesc, mapping_size);
339  _sion_calculate_num_collector(sion_filedesc, collstat, &num_collectors);
340  collstat->req_num_collectors = num_collectors;
341 
342  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "firstsize=%d collsize=%d num_collectors=%d\n", firstsize, sion_filedesc->collsize,
343  num_collectors));
344 
345  numsender = 1;
346  lastcoll = 0;
347  currentsize = sion_filedesc->all_chunksizes[0];
348 
349  DPRINTFP((128, DFUNCTION, _SION_DEFAULT_RANK, " currentsize=%lld chunksizes[%2d]=%8lld\n", currentsize, 0,
350  sion_filedesc->all_chunksizes[0]));
351 
352  /* 1ST STEP: calculate mapping to collectors */
353  for (i = 1; i < sion_filedesc->ntasks; i++) {
354  if ((currentsize + sion_filedesc->all_chunksizes[i] <= collstat->avg_data_per_collector)
355  || (sion_filedesc->all_coll_capability[i] == SION_CAPABILITY_ONLY_SENDER)) {
356  /* task will add to last collector */
357  currentsize += sion_filedesc->all_chunksizes[i];
358  numsender++;
359  DPRINTFP((128, DFUNCTION, _SION_DEFAULT_RANK, " currentsize=%lld chunksizes[%2d]=%8lld\n", currentsize, i,
360  sion_filedesc->all_chunksizes[i]));
361  } else { /* new collector needed */
362 
363  /* increase chunksize of last collector */
364  if (sion_filedesc->mode == SION_FILEMODE_WRITE) {
365  newsize = currentsize;
366 
367  /* expand chunksize to align next one */
368  aligned_size = ((int64_t)newsize / sion_filedesc->fsblksize) * sion_filedesc->fsblksize;
369  if (newsize % sion_filedesc->fsblksize > 0) {
370  aligned_size += sion_filedesc->fsblksize;
371  }
372 
373  DPRINTFP((128, DFUNCTION, _SION_DEFAULT_RANK, " resize chunksizes[%2d]=%8lld + %lld\n", lastcoll,
374  sion_filedesc->all_chunksizes[lastcoll], aligned_size - sion_filedesc->all_chunksizes[lastcoll]));
375  sion_filedesc->all_chunksizes[lastcoll] = aligned_size;
376 
377  for (s = lastcoll + 1; s < i; s++) {
378  /* leave chunksize of sender on aligned original size to
379  allow storing data in intermeadiate non-collective
380  flushes */
381  _sion_calculate_set_alignment(sion_filedesc, s);
382 
383  /* adjust chunksize of the sender to one file system block as spare space */
384  /* sion_filedesc->all_chunksizes[s]=sion_filedesc->fsblksize; */
385  }
386  }
387 
388  /* adjust last senders */
389  for (s = lastcoll; s < i; s++) {
390  sion_filedesc->all_coll_collector[s] = lastcoll;
391  sion_filedesc->all_coll_collsize[s] = numsender;
392  }
393 
394  /* init data for next collector */
395  numsender = 1;
396  lastcoll = i;
397  currentsize = sion_filedesc->all_chunksizes[i];
398  }
399  }
400 
401  /* align last sender */
402  if (sion_filedesc->mode == SION_FILEMODE_WRITE) {
403  newsize = currentsize;
404 
405  /* expand chunksize to align next one */
406  aligned_size = ((int64_t)newsize / sion_filedesc->fsblksize) * sion_filedesc->fsblksize;
407  if (newsize % sion_filedesc->fsblksize > 0) {
408  aligned_size += sion_filedesc->fsblksize;
409  }
410 
411  DPRINTFP((128, DFUNCTION, _SION_DEFAULT_RANK, " resize chunksizes[%2d]=%8lld + %lld\n", lastcoll,
412  sion_filedesc->all_chunksizes[lastcoll], aligned_size - sion_filedesc->all_chunksizes[lastcoll]));
413  sion_filedesc->all_chunksizes[lastcoll] = aligned_size;
414 
415  /* adjust chunksize of the sender to one file system block as spare space */
416  for (s = lastcoll + 1; s < i; s++) {
417  /* leave chunksize of sender on aligned original size to
418  allow storing data in intermeadiate non-collective
419  flushes */
420  _sion_calculate_set_alignment(sion_filedesc, s);
421 
422  /* adjust chunksize of the sender to one file system block as spare space */
423  /* sion_filedesc->all_chunksizes[s]=sion_filedesc->fsblksize; */
424  }
425  }
426 
427  /* adjust sender of last collector */
428  for (s = lastcoll; s < sion_filedesc->ntasks; s++) {
429  sion_filedesc->all_coll_collector[s] = lastcoll;
430  sion_filedesc->all_coll_collsize[s] = numsender;
431  }
432 
433  /* 2ND STEP: calculate startpointers */
434 
435  /* align first, not necessary, only for debugging */
436  /* startpointer=firstsize; */
437  startpointer = (firstsize % sion_filedesc->fsblksize == 0)
438  ? firstsize
439  : ((firstsize / sion_filedesc->fsblksize) + 1) * sion_filedesc->fsblksize;
440  sion_filedesc->globalskip = 0;
441  /* calculate mapping to collectors */
442  for (i = 0; i < sion_filedesc->ntasks; i++) {
443  sion_filedesc->all_startpointers[i] = startpointer;
444  startpointer += sion_filedesc->all_chunksizes[i];
445  sion_filedesc->globalskip += sion_filedesc->all_chunksizes[i];
446  }
447 
448  /* statistics */
449  if (sion_filedesc->colldebug >= 1) {
450  _sion_update_collstat(collstat, sion_filedesc);
451  _sion_print_collstat(collstat, sion_filedesc);
452  }
453 
454  _sion_debugprint_collstat(collstat, sion_filedesc);
455 
456  _sion_destroy_collstat(collstat);
457 
458  DPRINTFP((2, DFUNCTION, _SION_DEFAULT_RANK, "leave globalskip is %lld\n", sion_filedesc->globalskip));
459  return rc;
460 }
461 #undef DFUNCTION