SIONlib  1.7.1
Scalable I/O library for parallel access to task-local files
sionsplit.c
1 /****************************************************************************
2 ** SIONLIB http://www.fz-juelich.de/jsc/sionlib **
3 *****************************************************************************
4 ** Copyright (c) 2008-2016 **
5 ** Forschungszentrum Juelich, Juelich Supercomputing Centre **
6 ** **
7 ** See the file COPYRIGHT in the package base directory for details **
8 ****************************************************************************/
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <ctype.h>
13 
14 #include "sion.h"
15 #include "sion_debug.h"
16 #include "sion_error_handler.h"
17 #include "sion_file.h"
18 #include "sion_internal.h"
19 #include "sion_fd.h"
20 #include "sion_filedesc.h"
21 
22 #define FILENAME_LENGTH 1024
23 
24 static void usage(char *name);
25 
26 int main(int argc, char **argv)
27 {
28  _sion_fileptr *outfp;
29  char infilename[FILENAME_LENGTH];
30  char outfilename[FILENAME_LENGTH];
31  char prefix[FILENAME_LENGTH];
32  char fnmask[FILENAME_LENGTH];
33  char fnkmask[FILENAME_LENGTH];
34 
35  int i, rank, blknum;
36  char *localbuffer;
37  sion_int64 chunksize = 0;
38  sion_int64 left, bread, bsumread, bwrote;
39 
40  /* options */
41  int digits = 5;
42  int verbose = 0;
43  int useglobalranks = 0;
44 
45  /* for file infomation */
46  int sid, ntasks, nfiles, maxblocks;
47  sion_int32 fsblksize;
48  sion_int64 globalskip;
49  sion_int64 start_of_varheader;
50  sion_int64 *sion_localsizes;
51  sion_int64 *sion_globalranks;
52  sion_int64 *sion_blockcount;
53  sion_int64 *sion_blocksizes;
54  sion_int64 *sionsplit_sum_bytes_per_task;
55  sion_int64 sionsplit_sum_bytes;
56  sion_int64 sionsplit_filesize;
57 
58  _sion_filedesc *sion_filedesc;
59  uint64_t key;
60 
61 
62  /* parse command line */
63  i = 1;
64  if (argc < 3)
65  usage(argv[0]);
66 
67  while (i < argc) {
68  if (argv[i][0] == '-') {
69  switch (argv[i][1]) {
70  case 'd':
71  digits = atoi(argv[++i]);
72  break;
73  case 'g':
74  useglobalranks=1;
75  break;
76  case 'v':
77  verbose++;
78  break;
79  case 'V':
80  fprintf(stderr, "SIONlib utility %s (Version %d.%dp%d, fileformat version %d)\n", argv[0],
81  SION_MAIN_VERSION,SION_SUB_VERSION,
82  SION_VERSION_PATCHLEVEL,SION_FILEFORMAT_VERSION);
83  exit(1);
84  case 'h':
85  usage(argv[0]);
86  break;
87  default:
88  usage(argv[0]);
89  }
90  }
91  i++;
92  }
93 
94  strcpy(infilename, argv[argc - 2]);
95  strcpy(prefix, argv[argc - 1]);
96 
97  printf("sionsplit: filename: %-30s\n", infilename);
98  printf("sionsplit: prefix: %-30s\n", prefix);
99 
100  sid = sion_open(infilename, "rb,posix", &ntasks, &nfiles, NULL, &fsblksize, NULL, NULL);
101 
102  /* get the sion file structure */
103  if ((_sion_vcdtype(sid) != SION_FILEDESCRIPTOR) || !(sion_filedesc = _sion_vcdtovcon(sid))) {
104  return(_sion_errorprint(SION_NOT_SUCCESS,_SION_ERROR_RETURN,"invalid sion_filedesc, aborting %d ...\n", sid));
105  }
106 
107  printf("sionsplit: sid: %d\n", sid);
108  printf("sionsplit: filename: %-30s\n", infilename);
109  printf("sionsplit: number of tasks: %d\n", ntasks);
110  printf("sionsplit: number of files: %d\n", nfiles);
111  printf("sionsplit: number of digits: %d\n", digits);
112  printf("sionsplit: use global ranks: %d\n", useglobalranks);
113  printf("sionsplit: keyval: %d (%s)\n", (int) sion_filedesc->keyvalmode, sion_keyval_type_to_str(sion_filedesc->keyvalmode));
114  printf("sionsplit: current endianness: %s\n", (sion_get_endianness())? "big" : "little");
115  printf("sionsplit: file endianness: %s\n", (sion_get_file_endianness(sid)) ? "big" : "little");
116  printf("sionsplit: fsblksize: %lu bytes (%6.2f MB)\n", (unsigned long) fsblksize, fsblksize / 1024.0 / 1024.0);
117 
118  sion_get_locations(sid, &ntasks, &maxblocks, &globalskip, &start_of_varheader, &sion_localsizes, &sion_globalranks, &sion_blockcount,
119  &sion_blocksizes);
120 
121  printf("sionsplit: max number of chunks: %d\n", maxblocks);
122 
123  /* analysis */
124  sionsplit_sum_bytes_per_task = (sion_int64 *) malloc(ntasks * sizeof(sion_int64));
125  for (rank = 0; rank < ntasks; rank++)
126  sionsplit_sum_bytes_per_task[rank] = 0;
127  for (rank = 0; rank < ntasks; rank++) {
128  for (blknum = 0; blknum < maxblocks; blknum++) {
129  sionsplit_sum_bytes_per_task[rank] += sion_blocksizes[ntasks * blknum + rank];
130  }
131  }
132 
133  sionsplit_sum_bytes = 0;
134  for (rank = 0; rank < ntasks; rank++)
135  sionsplit_sum_bytes += sionsplit_sum_bytes_per_task[rank];
136 
137  printf("sionsplit: datasize in file (aggr.): %lld bytes (%6.2f MB)\n", sionsplit_sum_bytes, sionsplit_sum_bytes / 1024.0 / 1024.0);
138 
139  sionsplit_filesize = start_of_varheader + (maxblocks + 1) * rank * sizeof(sion_int64);
140 
141  printf("sionsplit: start_of_varheader: %lld bytes (%6.2f MB)\n", start_of_varheader, start_of_varheader / 1024.0 / 1024.0);
142  printf("sionsplit: size of file: %lld bytes (%6.2f MB)\n", sionsplit_filesize, sionsplit_filesize / 1024.0 / 1024.0);
143 
144  if (sionsplit_filesize > 0) {
145  printf("sionsplit: file usage: %8.6f%%\n", (double) sionsplit_sum_bytes / (double) sionsplit_filesize * 100.0);
146  }
147  chunksize = 0;
148 
149  sprintf(fnmask, "%s%%0%dd", prefix, digits);
150  printf("sionsplit: filename mask: %-s\n", fnmask);
151 
152  sprintf(fnkmask, "%s%%0%dd_%%012ld", prefix, digits);
153  printf("sionsplit: filename key mask: %-s\n", fnkmask);
154 
155  if(sion_filedesc->keyvalmode==SION_KEYVAL_NONE) {
156  /* standard copy with read/write */
157 
158  for (rank = 0; rank < ntasks; rank++) {
159  if (chunksize<sion_localsizes[rank]) chunksize=sion_localsizes[rank];
160  if (chunksize<sion_blocksizes[rank]) chunksize=sion_blocksizes[rank];
161  }
162  printf("sionsplit: max chunksize: %lld\n", chunksize);
163  localbuffer = (char *) malloc(chunksize * sizeof(char));
164  if (localbuffer == NULL) {
165  fprintf(stderr, "cannot allocate localbuffer of size %lld , aborting ...\n", chunksize * sizeof(char));
166  free(sionsplit_sum_bytes_per_task);
167  return (1);
168  }
169 
170 
171  for (rank = 0; rank < ntasks; rank++) {
172  if(useglobalranks) {
173  sprintf(outfilename, fnmask, (int) sion_globalranks[rank]);
174  } else {
175  sprintf(outfilename, fnmask, rank);
176  }
177 
178 
179  printf("sionsplit: generating file: %-s\n", outfilename);
181  if (outfp == NULL) {
182  fprintf(stderr, "cannot open outfile %s , aborting ...\n", outfilename);
183  free(sionsplit_sum_bytes_per_task);
184  free(localbuffer);
185  return (1);
186  }
187 
188  for (blknum = 0; blknum < sion_blockcount[rank]; blknum++) {
189 
190  /* seek position of block */
191  sion_seek(sid, rank, blknum, 0);
192  DPRINTFP((1, "sionsplit", 0, "after sion_seek sid=%d rank=%d blknum=%d fileposition=%lld\n", sid, rank, blknum, sion_get_position(sid)));
193 
194  /* read data from block */
195  left = sion_blocksizes[ntasks * blknum + rank];
196  bsumread = 0;
197  while (left > 0) {
198  DPRINTFP((8, "sionsplit", 0, "will read %lld bytes localbuffer+%lld\n", left, bsumread));
199  bread = sion_fread(localbuffer + bsumread, 1, left, sid);
200  left -= bread;
201  bsumread += bread;
202  printf("sionsplit: %lld read left=%lld \n", bread, left);
203  }
204 
205  /* write data to outfile */
206  left = sion_blocksizes[ntasks * blknum + rank];
207  bwrote = _sion_file_write(localbuffer, left, outfp);
208  printf("sionsplit: %lld wrote of left=%lld \n", bwrote, left);
209 
210  }
211 
212  _sion_file_close(outfp);
213 
214  }
215 
216  free(localbuffer);
217 
218  } else {
219 
220  /* key-value copy with read/write */
221 
222  printf("sionsplit: buffer size: %d\n", fsblksize);
223  localbuffer = (char *) malloc(fsblksize * sizeof(char));
224  if (localbuffer == NULL) {
225  fprintf(stderr, "cannot allocate localbuffer of size %lld , aborting ...\n", chunksize * sizeof(char));
226  free(sionsplit_sum_bytes_per_task);
227  return (1);
228  }
229 
230  for (rank = 0; rank < ntasks; rank++) {
231 
232  if (verbose)
233  printf("siondefrag: ->rank: %d\n", rank);
234  if (!verbose)
235  if (rank % 16 == 0) {
236  printf("[%d]", rank);
237  fflush(stdout);
238  }
239 
240  /* search rank in input file */
242 
243  sion_key_full_scan(sid);
244 
245  /* reset iterator over keys */
247 
248  /* loop over key-value blocks */
249  while(sion_key_list_iterator_next(sid,&key)==SION_SUCCESS) {
250 
251  if(useglobalranks) {
252  sprintf(outfilename, fnkmask, (int) sion_globalranks[rank],(long) key);
253  } else {
254  sprintf(outfilename, fnkmask, rank,(long) key);
255  }
256 
257  printf("sionsplit: generating file: '%-s'\n", outfilename);
259  if (outfp == NULL) {
260  fprintf(stderr, "cannot open outfile %s , aborting ...\n", outfilename);
261  free(localbuffer);
262  free(sionsplit_sum_bytes_per_task);
263  return (1);
264  }
265 
266  while( (bread=sion_fread_key(localbuffer,key,1,fsblksize,sid))>0 ) {
267  if(bread>0) {
268  bwrote = _sion_file_write(localbuffer, bread, outfp);
269  }
270  if (verbose)
271  printf("sionsplit: extracting now data of key[%12ld] (%lld bytes) (%lld bytes written)\n",
272  (long) key, bread, bwrote);
273  }
274 
275  _sion_file_close(outfp);
276 
277  }
278 
279  }
280  free(localbuffer);
281  }
282 
283  free(sionsplit_sum_bytes_per_task);
284  sion_close(sid);
285 
286  return (0);
287 }
288 
289 void usage(char *name)
290 {
291  fprintf(stderr, "Usage: %s options <sionfn> <prefix>\n\n", name);
292 
293  fprintf(stderr, "Split SIONlib file <sionfn> into separate files. For each task a file is\n");
294  fprintf(stderr, "created with the name <prefix><digits>\n\n");
295 
296  fprintf(stderr, "Example: %s data.sion data/file_\n", name);
297  fprintf(stderr, " creates files data/file_00000, data/file_00001, ...\n\n");
298 
299  fprintf(stderr, "Options:\n");
300  fprintf(stderr, " [-v] verbose mode\n");
301  fprintf(stderr, " [-g] use global rank for numbering files\n");
302  fprintf(stderr, " [-d <num>] number of digits for filename generation (default 5)\n");
303  fprintf(stderr, " [-V] show version of SIONlib\n");
304  fprintf(stderr, " [-h] show this help\n");
305  exit(1);
306 }
sion_int64 _sion_file_write(const void *data, sion_int64 bytes, _sion_fileptr *sion_fileptr)
Write data to file.
Definition: sion_file.c:148
int sion_key_list_iterator_next(int sid, uint64_t *keyptr)
Forward to next key.
_sion_fileptr * _sion_file_open(const char *fname, unsigned int flags, unsigned int addflags)
Create and open a new file for writing.
Definition: sion_file.c:41
Sion File Descriptor Structure.
Definition: sion_filedesc.h:77
int sion_get_endianness(void)
Return endianness.
Definition: sion_tools.c:30
sion_int64 sion_get_position(int sid)
Function that returns the current file position.
Definition: sion_common.c:891
#define SION_FILE_FLAG_WRITE
Definition: sion_file.h:23
int sion_get_file_endianness(int sid)
Returns edianness of data in file sid.
Definition: sion_common.c:255
#define SION_CURRENT_BLK
Definition: sion_const.h:67
int sion_close(int sid)
Close a sion file.
Definition: sion_serial.c:113
int _sion_vcdtype(int sid)
Definition: sion_fd.c:56
int sion_get_locations(int sid, int *ntasks, int *maxchunks, sion_int64 *globalskip, sion_int64 *start_of_varheader, sion_int64 **sion_chunksizes, sion_int64 **sion_globalranks, sion_int64 **sion_blockcount, sion_int64 **sion_blocksizes)
Returns pointers to internal fields.
Definition: sion_common.c:86
#define SION_KEYVAL_NONE
Definition: sion_const.h:80
char * sion_keyval_type_to_str(int type)
Returns key value mode as string.
void * _sion_vcdtovcon(int sid)
Definition: sion_fd.c:51
#define SION_FILE_FLAG_ANSI
Definition: sion_file.h:19
#define SION_FILE_FLAG_CREATE
Definition: sion_file.h:22
int sion_seek(int sid, int rank, int currentblocknr, sion_int64 posinblk)
Function to set the file pointer to a new position.
Definition: sion_common.c:659
#define SION_CURRENT_POS
Definition: sion_const.h:70
int _sion_file_close(_sion_fileptr *sion_fileptr)
Close file and destroys fileptr structure.
Definition: sion_file.c:118
int sion_key_list_iterator_reset(int sid)
Resets key iterator.
#define SION_FILEDESCRIPTOR
Definition: sion_fd.h:17
size_t sion_fread(void *data, size_t size, size_t nitems, int sid)
Read data from sion file.
Definition: sion_common.c:591
int sion_key_full_scan(int sid)
Performs a full scan of all meta data in current file.
size_t sion_fread_key(void *data, uint64_t key, size_t size, size_t nitems, int sid)
Read data for key.
int sion_open(char *fname, const char *file_mode, int *ntasks, int *nfiles, sion_int64 **chunksizes, sion_int32 *fsblksize, int **globalranks, FILE **fileptr)
Open a sion file in serial mode.
Definition: sion_serial.c:61