SIONlib  1.7.4
Scalable I/O library for parallel access to task-local files
sionsplit.c
1 /****************************************************************************
2 ** SIONLIB http://www.fz-juelich.de/jsc/sionlib **
3 *****************************************************************************
4 ** Copyright (c) 2008-2019 **
5 ** Forschungszentrum Juelich, Juelich Supercomputing Centre **
6 ** **
7 ** See the file COPYRIGHT in the package base directory for details **
8 ****************************************************************************/
9 #define _XOPEN_SOURCE 700
10 
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <ctype.h>
15 
16 #include "sion.h"
17 #include "sion_debug.h"
18 #include "sion_error_handler.h"
19 #include "sion_file.h"
20 #include "sion_internal.h"
21 #include "sion_fd.h"
22 #include "sion_filedesc.h"
23 
24 #define FILENAME_LENGTH 1024
25 
26 static void usage(char *name);
27 
28 int main(int argc, char **argv)
29 {
30  _sion_fileptr *outfp;
31  char infilename[FILENAME_LENGTH];
32  char outfilename[FILENAME_LENGTH];
33  char prefix[FILENAME_LENGTH];
34  char fnmask[FILENAME_LENGTH];
35  char fnkmask[FILENAME_LENGTH];
36 
37  int i, rank, blknum;
38  char *localbuffer;
39  sion_int64 chunksize = 0;
40  sion_int64 left, bread, bsumread, bwrote;
41 
42  /* options */
43  int digits = 5;
44  int verbose = 0;
45  int useglobalranks = 0;
46 
47  /* for file infomation */
48  int sid, ntasks, nfiles, maxblocks;
49  sion_int32 fsblksize;
50  sion_int64 globalskip;
51  sion_int64 start_of_varheader;
52  sion_int64 *sion_localsizes;
53  sion_int64 *sion_globalranks;
54  sion_int64 *sion_blockcount;
55  sion_int64 *sion_blocksizes;
56  sion_int64 *sionsplit_sum_bytes_per_task;
57  sion_int64 sionsplit_sum_bytes;
58  sion_int64 sionsplit_filesize;
59 
60  _sion_filedesc *sion_filedesc;
61  uint64_t key;
62 
63 
64  /* parse command line */
65  i = 1;
66  if (argc < 3)
67  usage(argv[0]);
68 
69  while (i < argc) {
70  if (argv[i][0] == '-') {
71  switch (argv[i][1]) {
72  case 'd':
73  digits = atoi(argv[++i]);
74  break;
75  case 'g':
76  useglobalranks=1;
77  break;
78  case 'v':
79  verbose++;
80  break;
81  case 'V':
82  fprintf(stderr, "SIONlib utility %s (Version %d.%dp%d, fileformat version %d)\n", argv[0],
83  SION_MAIN_VERSION,SION_SUB_VERSION,
84  SION_VERSION_PATCHLEVEL,SION_FILEFORMAT_VERSION);
85  exit(1);
86  case 'h':
87  usage(argv[0]);
88  break;
89  default:
90  usage(argv[0]);
91  }
92  }
93  i++;
94  }
95 
96  strcpy(infilename, argv[argc - 2]);
97  strcpy(prefix, argv[argc - 1]);
98 
99  printf("sionsplit: filename: %-30s\n", infilename);
100  printf("sionsplit: prefix: %-30s\n", prefix);
101 
102  sid = sion_open(infilename, "rb,posix", &ntasks, &nfiles, NULL, &fsblksize, NULL, NULL);
103 
104  /* get the sion file structure */
105  if ((_sion_vcdtype(sid) != SION_FILEDESCRIPTOR) || !(sion_filedesc = _sion_vcdtovcon(sid))) {
106  return(_sion_errorprint(SION_NOT_SUCCESS,_SION_ERROR_RETURN,"invalid sion_filedesc, aborting %d ...\n", sid));
107  }
108 
109  printf("sionsplit: sid: %d\n", sid);
110  printf("sionsplit: filename: %-30s\n", infilename);
111  printf("sionsplit: number of tasks: %d\n", ntasks);
112  printf("sionsplit: number of files: %d\n", nfiles);
113  printf("sionsplit: number of digits: %d\n", digits);
114  printf("sionsplit: use global ranks: %d\n", useglobalranks);
115  printf("sionsplit: keyval: %d (%s)\n", (int) sion_filedesc->keyvalmode, sion_keyval_type_to_str(sion_filedesc->keyvalmode));
116  printf("sionsplit: current endianness: %s\n", (sion_get_endianness())? "big" : "little");
117  printf("sionsplit: file endianness: %s\n", (sion_get_file_endianness(sid)) ? "big" : "little");
118  printf("sionsplit: fsblksize: %lu bytes (%6.2f MB)\n", (unsigned long) fsblksize, fsblksize / 1024.0 / 1024.0);
119 
120  sion_get_locations(sid, &ntasks, &maxblocks, &globalskip, &start_of_varheader, &sion_localsizes, &sion_globalranks, &sion_blockcount,
121  &sion_blocksizes);
122 
123  printf("sionsplit: max number of chunks: %d\n", maxblocks);
124 
125  /* analysis */
126  sionsplit_sum_bytes_per_task = (sion_int64 *) malloc(ntasks * sizeof(sion_int64));
127  for (rank = 0; rank < ntasks; rank++)
128  sionsplit_sum_bytes_per_task[rank] = 0;
129  for (rank = 0; rank < ntasks; rank++) {
130  for (blknum = 0; blknum < maxblocks; blknum++) {
131  sionsplit_sum_bytes_per_task[rank] += sion_blocksizes[ntasks * blknum + rank];
132  }
133  }
134 
135  sionsplit_sum_bytes = 0;
136  for (rank = 0; rank < ntasks; rank++)
137  sionsplit_sum_bytes += sionsplit_sum_bytes_per_task[rank];
138 
139  printf("sionsplit: datasize in file (aggr.): %lld bytes (%6.2f MB)\n", sionsplit_sum_bytes, sionsplit_sum_bytes / 1024.0 / 1024.0);
140 
141  sionsplit_filesize = start_of_varheader + (maxblocks + 1) * rank * sizeof(sion_int64);
142 
143  printf("sionsplit: start_of_varheader: %lld bytes (%6.2f MB)\n", start_of_varheader, start_of_varheader / 1024.0 / 1024.0);
144  printf("sionsplit: size of file: %lld bytes (%6.2f MB)\n", sionsplit_filesize, sionsplit_filesize / 1024.0 / 1024.0);
145 
146  if (sionsplit_filesize > 0) {
147  printf("sionsplit: file usage: %8.6f%%\n", (double) sionsplit_sum_bytes / (double) sionsplit_filesize * 100.0);
148  }
149  chunksize = 0;
150 
151  sprintf(fnmask, "%s%%0%dd", prefix, digits);
152  printf("sionsplit: filename mask: %-s\n", fnmask);
153 
154  sprintf(fnkmask, "%s%%0%dd_%%012ld", prefix, digits);
155  printf("sionsplit: filename key mask: %-s\n", fnkmask);
156 
157  if(sion_filedesc->keyvalmode==SION_KEYVAL_NONE) {
158  /* standard copy with read/write */
159 
160  for (rank = 0; rank < ntasks; rank++) {
161  if (chunksize<sion_localsizes[rank]) chunksize=sion_localsizes[rank];
162  if (chunksize<sion_blocksizes[rank]) chunksize=sion_blocksizes[rank];
163  }
164  printf("sionsplit: max chunksize: %lld\n", chunksize);
165  localbuffer = (char *) malloc(chunksize * sizeof(char));
166  if (localbuffer == NULL) {
167  fprintf(stderr, "cannot allocate localbuffer of size %lld , aborting ...\n", chunksize * sizeof(char));
168  free(sionsplit_sum_bytes_per_task);
169  return (1);
170  }
171 
172 
173  for (rank = 0; rank < ntasks; rank++) {
174  if(useglobalranks) {
175  sprintf(outfilename, fnmask, (int) sion_globalranks[rank]);
176  } else {
177  sprintf(outfilename, fnmask, rank);
178  }
179 
180 
181  printf("sionsplit: generating file: %-s\n", outfilename);
183  if (outfp == NULL) {
184  fprintf(stderr, "cannot open outfile %s , aborting ...\n", outfilename);
185  free(sionsplit_sum_bytes_per_task);
186  free(localbuffer);
187  return (1);
188  }
189 
190  for (blknum = 0; blknum < sion_blockcount[rank]; blknum++) {
191 
192  /* seek position of block */
193  sion_seek(sid, rank, blknum, 0);
194  DPRINTFP((1, "sionsplit", 0, "after sion_seek sid=%d rank=%d blknum=%d fileposition=%lld\n", sid, rank, blknum, sion_get_position(sid)));
195 
196  /* read data from block */
197  left = sion_blocksizes[ntasks * blknum + rank];
198  bsumread = 0;
199  while (left > 0) {
200  DPRINTFP((8, "sionsplit", 0, "will read %lld bytes localbuffer+%lld\n", left, bsumread));
201  bread = sion_fread(localbuffer + bsumread, 1, left, sid);
202  left -= bread;
203  bsumread += bread;
204  printf("sionsplit: %lld read left=%lld \n", bread, left);
205  }
206 
207  /* write data to outfile */
208  left = sion_blocksizes[ntasks * blknum + rank];
209  bwrote = _sion_file_write(localbuffer, left, outfp);
210  printf("sionsplit: %lld wrote of left=%lld \n", bwrote, left);
211 
212  }
213 
214  _sion_file_close(outfp);
215 
216  }
217 
218  free(localbuffer);
219 
220  } else {
221 
222  /* key-value copy with read/write */
223 
224  printf("sionsplit: buffer size: %d\n", fsblksize);
225  localbuffer = (char *) malloc(fsblksize * sizeof(char));
226  if (localbuffer == NULL) {
227  fprintf(stderr, "cannot allocate localbuffer of size %lld , aborting ...\n", chunksize * sizeof(char));
228  free(sionsplit_sum_bytes_per_task);
229  return (1);
230  }
231 
232  for (rank = 0; rank < ntasks; rank++) {
233 
234  if (verbose)
235  printf("siondefrag: ->rank: %d\n", rank);
236  if (!verbose)
237  if (rank % 16 == 0) {
238  printf("[%d]", rank);
239  fflush(stdout);
240  }
241 
242  /* search rank in input file */
244 
245  sion_key_full_scan(sid);
246 
247  /* reset iterator over keys */
249 
250  /* loop over key-value blocks */
251  while(sion_key_list_iterator_next(sid,&key)==SION_SUCCESS) {
252 
253  if(useglobalranks) {
254  sprintf(outfilename, fnkmask, (int) sion_globalranks[rank],(long) key);
255  } else {
256  sprintf(outfilename, fnkmask, rank,(long) key);
257  }
258 
259  printf("sionsplit: generating file: '%-s'\n", outfilename);
261  if (outfp == NULL) {
262  fprintf(stderr, "cannot open outfile %s , aborting ...\n", outfilename);
263  free(localbuffer);
264  free(sionsplit_sum_bytes_per_task);
265  return (1);
266  }
267 
268  while( (bread=sion_fread_key(localbuffer,key,1,fsblksize,sid))>0 ) {
269  if(bread>0) {
270  bwrote = _sion_file_write(localbuffer, bread, outfp);
271  }
272  if (verbose)
273  printf("sionsplit: extracting now data of key[%12ld] (%lld bytes) (%lld bytes written)\n",
274  (long) key, bread, bwrote);
275  }
276 
277  _sion_file_close(outfp);
278 
279  }
280 
281  }
282  free(localbuffer);
283  }
284 
285  free(sionsplit_sum_bytes_per_task);
286  sion_close(sid);
287 
288  return (0);
289 }
290 
291 void usage(char *name)
292 {
293  fprintf(stderr, "Usage: %s options <sionfn> <prefix>\n\n", name);
294 
295  fprintf(stderr, "Split SIONlib file <sionfn> into separate files. For each task a file is\n");
296  fprintf(stderr, "created with the name <prefix><digits>\n\n");
297 
298  fprintf(stderr, "Example: %s data.sion data/file_\n", name);
299  fprintf(stderr, " creates files data/file_00000, data/file_00001, ...\n\n");
300 
301  fprintf(stderr, "Options:\n");
302  fprintf(stderr, " [-v] verbose mode\n");
303  fprintf(stderr, " [-g] use global rank for numbering files\n");
304  fprintf(stderr, " [-d <num>] number of digits for filename generation (default 5)\n");
305  fprintf(stderr, " [-V] show version of SIONlib\n");
306  fprintf(stderr, " [-h] show this help\n");
307  exit(1);
308 }
sion_int64 _sion_file_write(const void *data, sion_int64 bytes, _sion_fileptr *sion_fileptr)
Write data to file.
Definition: sion_file.c:141
int sion_key_list_iterator_next(int sid, uint64_t *keyptr)
Forward to next key.
_sion_fileptr * _sion_file_open(const char *fname, unsigned int flags, unsigned int addflags)
Create and open a new file for writing.
Definition: sion_file.c:42
Sion File Descriptor Structure.
Definition: sion_filedesc.h:79
int sion_get_endianness(void)
Return endianness.
Definition: sion_tools.c:32
sion_int64 sion_get_position(int sid)
Function that returns the current file position.
Definition: sion_common.c:930
#define SION_FILE_FLAG_WRITE
Definition: sion_file.h:26
int sion_get_file_endianness(int sid)
Returns edianness of data in file sid.
Definition: sion_common.c:253
#define SION_CURRENT_BLK
Definition: sion_const.h:66
int sion_close(int sid)
Close a sion file.
Definition: sion_serial.c:106
int _sion_vcdtype(int sid)
Definition: sion_fd.c:58
int sion_get_locations(int sid, int *ntasks, int *maxchunks, sion_int64 *globalskip, sion_int64 *start_of_varheader, sion_int64 **sion_chunksizes, sion_int64 **sion_globalranks, sion_int64 **sion_blockcount, sion_int64 **sion_blocksizes)
Returns pointers to internal fields.
Definition: sion_common.c:84
#define SION_KEYVAL_NONE
Definition: sion_const.h:79
char * sion_keyval_type_to_str(int type)
Returns key value mode as string.
void * _sion_vcdtovcon(int sid)
Definition: sion_fd.c:53
#define SION_FILE_FLAG_ANSI
Definition: sion_file.h:22
#define SION_FILE_FLAG_CREATE
Definition: sion_file.h:25
int sion_seek(int sid, int rank, int currentblocknr, sion_int64 posinblk)
Function to set the file pointer to a new position.
Definition: sion_common.c:698
#define SION_CURRENT_POS
Definition: sion_const.h:69
int _sion_file_close(_sion_fileptr *sion_fileptr)
Close file and destroys fileptr structure.
Definition: sion_file.c:109
int sion_key_list_iterator_reset(int sid)
Resets key iterator.
#define SION_FILEDESCRIPTOR
Definition: sion_fd.h:17
size_t sion_fread(void *data, size_t size, size_t nitems, int sid)
Read data from sion file.
Definition: sion_common.c:609
int sion_key_full_scan(int sid)
Performs a full scan of all meta data in current file.
size_t sion_fread_key(void *data, uint64_t key, size_t size, size_t nitems, int sid)
Read data for key.
int sion_open(char *fname, const char *file_mode, int *ntasks, int *nfiles, sion_int64 **chunksizes, sion_int32 *fsblksize, int **globalranks, FILE **fileptr)
Open a sion file in serial mode.
Definition: sion_serial.c:54