SIONlib  1.6.2
Scalable I/O library for parallel access to task-local files
sionsplit.c
1 /****************************************************************************
2 ** SIONLIB http://www.fz-juelich.de/jsc/sionlib **
3 *****************************************************************************
4 ** Copyright (c) 2008-2016 **
5 ** Forschungszentrum Juelich, Juelich Supercomputing Centre **
6 ** **
7 ** See the file COPYRIGHT in the package base directory for details **
8 ****************************************************************************/
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 
14 #include "sion.h"
15 #include "sion_debug.h"
16 #include "sion_file.h"
17 #include "sion_internal.h"
18 #include "sion_fd.h"
19 #include "sion_filedesc.h"
20 
21 #define FILENAME_LENGTH 1024
22 
23 static void usage(char *name);
24 
25 int main(int argc, char **argv)
26 {
27  _sion_fileptr *outfp;
28  char infilename[FILENAME_LENGTH];
29  char outfilename[FILENAME_LENGTH];
30  char prefix[FILENAME_LENGTH];
31  char fnmask[FILENAME_LENGTH];
32  char fnkmask[FILENAME_LENGTH];
33 
34  int i, rank, blknum;
35  char *localbuffer;
36  sion_int64 chunksize = 0;
37  sion_int64 left, bread, bsumread, bwrote;
38 
39  /* options */
40  int digits = 5;
41  int verbose = 0;
42  int useglobalranks = 0;
43 
44  /* for file infomation */
45  int sid, ntasks, nfiles, maxblocks;
46  sion_int32 fsblksize;
47  sion_int64 globalskip;
48  sion_int64 start_of_varheader;
49  sion_int64 *sion_localsizes;
50  sion_int64 *sion_globalranks;
51  sion_int64 *sion_blockcount;
52  sion_int64 *sion_blocksizes;
53  sion_int64 *sionsplit_sum_bytes_per_task;
54  sion_int64 sionsplit_sum_bytes;
55  sion_int64 sionsplit_filesize;
56 
57  _sion_filedesc *sion_filedesc;
58  uint64_t key;
59 
60 
61  /* parse command line */
62  i = 1;
63  if (argc < 3)
64  usage(argv[0]);
65 
66  while (i < argc) {
67  if (argv[i][0] == '-') {
68  switch (argv[i][1]) {
69  case 'd':
70  digits = atoi(argv[++i]);
71  break;
72  case 'g':
73  useglobalranks=1;
74  break;
75  case 'v':
76  verbose++;
77  break;
78  case 'V':
79  fprintf(stderr, "SIONlib utility %s (Version %d.%dp%d, fileformat version %d)\n", argv[0],
80  SION_MAIN_VERSION,SION_SUB_VERSION,
81  SION_VERSION_PATCHLEVEL,SION_FILEFORMAT_VERSION);
82  exit(1);
83  case 'h':
84  usage(argv[0]);
85  break;
86  default:
87  usage(argv[0]);
88  }
89  }
90  i++;
91  }
92 
93  strcpy(infilename, argv[argc - 2]);
94  strcpy(prefix, argv[argc - 1]);
95 
96  printf("sionsplit: filename: %-30s\n", infilename);
97  printf("sionsplit: prefix: %-30s\n", prefix);
98 
99  sid = sion_open(infilename, "rb,posix", &ntasks, &nfiles, NULL, &fsblksize, NULL, NULL);
100 
101  /* get the sion file structure */
102  if ((_sion_vcdtype(sid) != SION_FILEDESCRIPTOR) || !(sion_filedesc = _sion_vcdtovcon(sid))) {
103  return(_sion_errorprint(SION_NOT_SUCCESS,_SION_ERROR_RETURN,"invalid sion_filedesc, aborting %d ...\n", sid));
104  }
105 
106  printf("sionsplit: sid: %d\n", sid);
107  printf("sionsplit: filename: %-30s\n", infilename);
108  printf("sionsplit: number of tasks: %d\n", ntasks);
109  printf("sionsplit: number of files: %d\n", nfiles);
110  printf("sionsplit: number of digits: %d\n", digits);
111  printf("sionsplit: use global ranks: %d\n", useglobalranks);
112  printf("sionsplit: keyval: %d (%s)\n", (int) sion_filedesc->keyvalmode, sion_keyval_type_to_str(sion_filedesc->keyvalmode));
113  printf("sionsplit: current endianness: %s\n", (sion_get_endianness())? "big" : "little");
114  printf("sionsplit: file endianness: %s\n", (sion_get_file_endianness(sid)) ? "big" : "little");
115  printf("sionsplit: fsblksize: %lu bytes (%6.2f MB)\n", (unsigned long) fsblksize, fsblksize / 1024.0 / 1024.0);
116 
117  sion_get_locations(sid, &ntasks, &maxblocks, &globalskip, &start_of_varheader, &sion_localsizes, &sion_globalranks, &sion_blockcount,
118  &sion_blocksizes);
119 
120  printf("sionsplit: max number of chunks: %d\n", maxblocks);
121 
122  /* analysis */
123  sionsplit_sum_bytes_per_task = (sion_int64 *) malloc(ntasks * sizeof(sion_int64));
124  for (rank = 0; rank < ntasks; rank++)
125  sionsplit_sum_bytes_per_task[rank] = 0;
126  for (rank = 0; rank < ntasks; rank++) {
127  for (blknum = 0; blknum < maxblocks; blknum++) {
128  sionsplit_sum_bytes_per_task[rank] += sion_blocksizes[ntasks * blknum + rank];
129  }
130  }
131 
132  sionsplit_sum_bytes = 0;
133  for (rank = 0; rank < ntasks; rank++)
134  sionsplit_sum_bytes += sionsplit_sum_bytes_per_task[rank];
135 
136  printf("sionsplit: datasize in file (aggr.): %lld bytes (%6.2f MB)\n", sionsplit_sum_bytes, sionsplit_sum_bytes / 1024.0 / 1024.0);
137 
138  sionsplit_filesize = start_of_varheader + (maxblocks + 1) * rank * sizeof(sion_int64);
139 
140  printf("sionsplit: start_of_varheader: %lld bytes (%6.2f MB)\n", start_of_varheader, start_of_varheader / 1024.0 / 1024.0);
141  printf("sionsplit: size of file: %lld bytes (%6.2f MB)\n", sionsplit_filesize, sionsplit_filesize / 1024.0 / 1024.0);
142 
143  if (sionsplit_filesize > 0) {
144  printf("sionsplit: file usage: %8.6f%%\n", (double) sionsplit_sum_bytes / (double) sionsplit_filesize * 100.0);
145  }
146  chunksize = 0;
147 
148  sprintf(fnmask, "%s%%0%dd", prefix, digits);
149  printf("sionsplit: filename mask: %-s\n", fnmask);
150 
151  sprintf(fnkmask, "%s%%0%dd_%%012ld", prefix, digits);
152  printf("sionsplit: filename key mask: %-s\n", fnkmask);
153 
154  if(sion_filedesc->keyvalmode==SION_KEYVAL_NONE) {
155  /* standard copy with read/write */
156 
157  for (rank = 0; rank < ntasks; rank++) {
158  if (chunksize<sion_localsizes[rank]) chunksize=sion_localsizes[rank];
159  if (chunksize<sion_blocksizes[rank]) chunksize=sion_blocksizes[rank];
160  }
161  printf("sionsplit: max chunksize: %lld\n", chunksize);
162  localbuffer = (char *) malloc(chunksize * sizeof(char));
163  if (localbuffer == NULL) {
164  fprintf(stderr, "cannot allocate localbuffer of size %lld , aborting ...\n", chunksize * sizeof(char));
165  return (1);
166  }
167 
168 
169  for (rank = 0; rank < ntasks; rank++) {
170  if(useglobalranks) {
171  sprintf(outfilename, fnmask, (int) sion_globalranks[rank]);
172  } else {
173  sprintf(outfilename, fnmask, rank);
174  }
175 
176 
177  printf("sionsplit: generating file: %-s\n", outfilename);
179  if (outfp == NULL) {
180  fprintf(stderr, "cannot open outfile %s , aborting ...\n", outfilename);
181  return (1);
182  }
183 
184  for (blknum = 0; blknum < sion_blockcount[rank]; blknum++) {
185 
186  /* seek position of block */
187  sion_seek(sid, rank, blknum, 0);
188  DPRINTFP((1, "sionsplit", 0, "after sion_seek sid=%d rank=%d blknum=%d fileposition=%lld\n", sid, rank, blknum, sion_get_position(sid)));
189 
190  /* read data from block */
191  left = sion_blocksizes[ntasks * blknum + rank];
192  bsumread = 0;
193  while (left > 0) {
194  DPRINTFP((8, "sionsplit", 0, "will read %lld bytes localbuffer+%lld\n", left, bsumread));
195  bread = sion_fread(localbuffer + bsumread, 1, left, sid);
196  left -= bread;
197  bsumread += bread;
198  printf("sionsplit: %lld read left=%lld \n", bread, left);
199  }
200 
201  /* write data to outfile */
202  left = sion_blocksizes[ntasks * blknum + rank];
203  bwrote = _sion_file_write(localbuffer, left, outfp);
204  printf("sionsplit: %lld wrote of left=%lld \n", bwrote, left);
205 
206  }
207 
208  _sion_file_close(outfp);
209 
210  }
211 
212  free(localbuffer);
213 
214  } else {
215 
216  /* key-value copy with read/write */
217 
218  printf("sionsplit: buffer size: %d\n", fsblksize);
219  localbuffer = (char *) malloc(fsblksize * sizeof(char));
220  if (localbuffer == NULL) {
221  fprintf(stderr, "cannot allocate localbuffer of size %lld , aborting ...\n", chunksize * sizeof(char));
222  return (1);
223  }
224 
225  for (rank = 0; rank < ntasks; rank++) {
226 
227  if (verbose)
228  printf("siondefrag: ->rank: %d\n", rank);
229  if (!verbose)
230  if (rank % 16 == 0) {
231  printf("[%d]", rank);
232  fflush(stdout);
233  }
234 
235  /* search rank in input file */
237 
238  sion_key_full_scan(sid);
239 
240  /* reset iterator over keys */
242 
243  /* loop over key-value blocks */
244  while(sion_key_list_iterator_next(sid,&key)==SION_SUCCESS) {
245 
246  if(useglobalranks) {
247  sprintf(outfilename, fnkmask, (int) sion_globalranks[rank],(long) key);
248  } else {
249  sprintf(outfilename, fnkmask, rank,(long) key);
250  }
251 
252  printf("sionsplit: generating file: '%-s'\n", outfilename);
254  if (outfp == NULL) {
255  fprintf(stderr, "cannot open outfile %s , aborting ...\n", outfilename);
256  return (1);
257  }
258 
259  while( (bread=sion_fread_key(localbuffer,key,1,fsblksize,sid))>0 ) {
260  if(bread>0) {
261  bwrote = _sion_file_write(localbuffer, bread, outfp);
262  }
263  if (verbose)
264  printf("sionsplit: extracting now data of key[%12ld] (%lld bytes) (%lld bytes written)\n",
265  (long) key, bread, bwrote);
266  }
267 
268  _sion_file_close(outfp);
269 
270  }
271 
272  }
273 
274  }
275 
276  free(sionsplit_sum_bytes_per_task);
277  sion_close(sid);
278 
279  return (0);
280 }
281 
282 void usage(char *name)
283 {
284  fprintf(stderr, "Usage: %s options <sionfn> <prefix>\n\n", name);
285 
286  fprintf(stderr, "Split SIONlib file <sionfn> into separate files. For each task a file is\n");
287  fprintf(stderr, "created with the name <prefix><digits>\n\n");
288 
289  fprintf(stderr, "Example: %s data.sion data/file_\n", name);
290  fprintf(stderr, " creates files data/file_00000, data/file_00001, ...\n\n");
291 
292  fprintf(stderr, "Options:\n");
293  fprintf(stderr, " [-v] verbose mode\n");
294  fprintf(stderr, " [-g] use global rank for numbering files\n");
295  fprintf(stderr, " [-d <num>] number of digits for filename generation (default 5)\n");
296  fprintf(stderr, " [-V] show version of SIONlib\n");
297  fprintf(stderr, " [-h] show this help\n");
298  exit(1);
299 }
sion_int64 _sion_file_write(const void *data, sion_int64 bytes, _sion_fileptr *sion_fileptr)
Write data to file.
Definition: sion_file.c:147
int sion_key_list_iterator_next(int sid, uint64_t *keyptr)
Forward to next key.
_sion_fileptr * _sion_file_open(const char *fname, unsigned int flags, unsigned int addflags)
Create and open a new file for writing.
Definition: sion_file.c:40
Sion File Descriptor Structure.
Definition: sion_filedesc.h:77
int sion_get_endianness(void)
Return endianness.
Definition: sion_tools.c:29
sion_int64 sion_get_position(int sid)
Function that returns the current file position.
Definition: sion_common.c:853
#define SION_FILE_FLAG_WRITE
Definition: sion_file.h:23
int sion_get_file_endianness(int sid)
Returns edianness of data in file sid.
Definition: sion_common.c:247
#define SION_CURRENT_BLK
Definition: sion_const.h:67
int sion_close(int sid)
Close a sion file.
Definition: sion_serial.c:113
int _sion_vcdtype(int sid)
Definition: sion_fd.c:56
int sion_get_locations(int sid, int *ntasks, int *maxchunks, sion_int64 *globalskip, sion_int64 *start_of_varheader, sion_int64 **sion_chunksizes, sion_int64 **sion_globalranks, sion_int64 **sion_blockcount, sion_int64 **sion_blocksizes)
Returns pointers to internal fields.
Definition: sion_common.c:81
#define SION_KEYVAL_NONE
Definition: sion_const.h:80
char * sion_keyval_type_to_str(int type)
Returns key value mode as string.
void * _sion_vcdtovcon(int sid)
Definition: sion_fd.c:51
#define SION_FILE_FLAG_ANSI
Definition: sion_file.h:19
#define SION_FILE_FLAG_CREATE
Definition: sion_file.h:22
int sion_seek(int sid, int rank, int currentblocknr, sion_int64 posinblk)
Function to set the file pointer to a new position.
Definition: sion_common.c:621
#define SION_CURRENT_POS
Definition: sion_const.h:70
int _sion_file_close(_sion_fileptr *sion_fileptr)
Close file and destroys fileptr structure.
Definition: sion_file.c:117
int _sion_errorprint(int rc, int level, const char *format,...)
Internal SION error.
int sion_key_list_iterator_reset(int sid)
Resets key iterator.
#define SION_FILEDESCRIPTOR
Definition: sion_fd.h:17
size_t sion_fread(void *data, size_t size, size_t nitems, int sid)
Read data from sion file.
Definition: sion_common.c:553
int sion_key_full_scan(int sid)
Performs a full scan of all meta data in current file.
size_t sion_fread_key(void *data, uint64_t key, size_t size, size_t nitems, int sid)
Read data for key.
int sion_open(char *fname, const char *file_mode, int *ntasks, int *nfiles, sion_int64 **chunksizes, sion_int32 *fsblksize, int **globalranks, FILE **fileptr)
Open a sion file in serial mode.
Definition: sion_serial.c:61