Merge from vendor branch LESS:
[dragonfly.git] / sbin / jscan / jfile.c
CommitLineData
ce5e5ac4
MD
1/*
2 * Copyright (c) 2004,2005 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
a9cb3889 34 * $DragonFly: src/sbin/jscan/jfile.c,v 1.10 2005/09/07 07:20:23 dillon Exp $
ce5e5ac4
MD
35 */
36
37#include "jscan.h"
c2b2044a
MD
38#include <dirent.h>
39
a9cb3889
MD
40static void jalign(struct jfile *jf, enum jdirection direction);
41static int jreadbuf(struct jfile *jf, enum jdirection direction,
42 void *buf, int bytes);
ce5e5ac4
MD
43
44/*
c2b2044a
MD
45 * Open a file descriptor for journal record access.
46 *
47 * NOTE: only seekable descriptors are supported for backwards scans.
ce5e5ac4
MD
48 */
49struct jfile *
a9cb3889 50jopen_fd(int fd)
9c118cb2 51{
9c118cb2
MD
52 struct jfile *jf;
53
c2b2044a
MD
54 jf = malloc(sizeof(struct jfile));
55 bzero(jf, sizeof(struct jfile));
56 jf->jf_fd = fd;
bb406b71 57 jf->jf_write_fd = -1;
c2b2044a 58 jf->jf_open_flags = O_RDONLY;
a9cb3889 59 jf->jf_pos = 0;
9c118cb2
MD
60 return(jf);
61}
62
c2b2044a
MD
63/*
64 * Open a prefix set. <prefix>.nnnnnnnnn files or a <prefix>.transid file
65 * must exist to succeed. No file descriptor is actually opened but
66 * the sequence number is initialized to the beginning or end of the set.
67 */
9c118cb2 68struct jfile *
a9cb3889 69jopen_prefix(const char *prefix, int rw)
ce5e5ac4
MD
70{
71 struct jfile *jf;
bb406b71 72 struct jdata *jd;
c2b2044a
MD
73 unsigned int seq_beg = -1;
74 unsigned int seq_end = -1;
75 unsigned int seq;
76 struct stat st;
77 const char *dirname;
78 struct dirent *den;
79 DIR *dir;
80 char *basename;
81 char *data;
82 char *ptr;
83 int hastransid;
84 int baselen;
85 int fd;
ce5e5ac4 86
c2b2044a
MD
87 dirname = data = strdup(prefix);
88 if ((basename = strrchr(dirname, '/')) != NULL) {
89 *basename++ = 0;
90 } else {
91 basename = data;
92 dirname = "./";
93 }
94 baselen = strlen(basename);
95 if ((dir = opendir(dirname)) != NULL) {
96 while ((den = readdir(dir)) != NULL) {
97 if (strncmp(den->d_name, basename, baselen) == 0 &&
98 den->d_name[baselen] == '.'
99 ) {
cbeb73b9 100 seq = strtoul(den->d_name + baselen + 1, &ptr, 16);
bb406b71 101 if (*ptr == 0 && seq != ULONG_MAX) {
c2b2044a
MD
102 if (seq_beg == (unsigned int)-1 || seq_beg > seq)
103 seq_beg = seq;
104 if (seq_end == (unsigned int)-1 || seq_end < seq)
105 seq_end = seq;
106 }
107 }
108 }
109 closedir(dir);
110 }
111 free(data);
112
113 hastransid = 0;
114 asprintf(&data, "%s.transid", prefix);
115 if (stat(data, &st) == 0)
116 hastransid = 1;
117 free(data);
118
119 if (seq_beg != (unsigned int)-1 || hastransid) {
120 if (seq_beg == (unsigned int)-1) {
121 seq_beg = 0;
122 seq_end = 0;
123 if (rw) {
124 asprintf(&data, "%s.%08x", prefix, 0);
125 if ((fd = open(data, O_RDWR|O_CREAT, 0666)) >= 0)
126 close(fd);
127 free(data);
128 }
129 }
130 jf = malloc(sizeof(struct jfile));
131 bzero(jf, sizeof(struct jfile));
132 jf->jf_fd = -1;
bb406b71 133 jf->jf_write_fd = -1;
c2b2044a 134 jf->jf_prefix = strdup(prefix);
a9cb3889 135 jf->jf_seq = seq_beg;
c2b2044a
MD
136 jf->jf_seq_beg = seq_beg;
137 jf->jf_seq_end = seq_end;
c2b2044a 138 jf->jf_open_flags = rw ? (O_RDWR|O_CREAT) : O_RDONLY;
cbeb73b9
MD
139 if (verbose_opt)
140 fprintf(stderr, "Open prefix set %08x-%08x\n", seq_beg, seq_end);
a9cb3889 141 if ((jd = jread(jf, NULL, JD_BACKWARDS)) != NULL) {
bb406b71
MD
142 jf->jf_last_transid = jd->jd_transid;
143 jfree(jf, jd);
144 }
c2b2044a
MD
145 } else {
146 jf = NULL;
ce5e5ac4
MD
147 }
148 return(jf);
149}
150
151/*
c2b2044a 152 * Get a prefix set ready for append.
ce5e5ac4 153 */
c2b2044a
MD
154int
155jrecord_init(const char *prefix)
ce5e5ac4 156{
c2b2044a
MD
157 struct jfile *jf;
158 struct stat st;
159 char *data;
160 int hasseqspace;
161 int fd;
ce5e5ac4 162
c2b2044a
MD
163 /*
164 * Determine whether we already have a prefix set or whether we need
165 * to create one.
166 */
a9cb3889 167 jf = jopen_prefix(prefix, 0);
c2b2044a
MD
168 hasseqspace = 0;
169 if (jf) {
170 if (jf->jf_seq_beg != (unsigned int)-1)
171 hasseqspace = 1;
172 jclose(jf);
ce5e5ac4 173 }
c2b2044a
MD
174 asprintf(&data, "%s.transid", prefix);
175
176 /*
177 * If the sequence exists the transid file must ALREADY exist for us
178 * to be able to safely 'append' to the space. Locked-down sequence
179 * spaces do not have a transid file.
180 */
181 if (hasseqspace) {
182 fd = open(data, O_RDWR, 0666);
183 } else {
184 fd = open(data, O_RDWR|O_CREAT, 0666);
185 }
186 free(data);
187 if (fd < 0)
188 return(-1);
189 if (fstat(fd, &st) == 0 && st.st_size == 0)
190 write(fd, "0000000000000000\n", 17); /* starting transid in hex */
191 close(fd);
192 return(0);
ce5e5ac4
MD
193}
194
195/*
c2b2044a 196 * Close a previously opened journal, clean up any side allocations.
ce5e5ac4
MD
197 */
198void
c2b2044a 199jclose(struct jfile *jf)
ce5e5ac4 200{
bb406b71
MD
201 if (jf->jf_fd >= 0) {
202 close(jf->jf_fd);
203 jf->jf_fd = -1;
204 }
205 if (jf->jf_write_fd >= 0) {
206 close(jf->jf_write_fd);
207 jf->jf_write_fd = -1;
208 }
c2b2044a 209 free(jf);
ce5e5ac4
MD
210}
211
212/*
a9cb3889
MD
213 * Locate the next (or previous) raw record given a jfile, current record,
214 * and direction. If the current record is NULL then the first or last
215 * record for the current sequence number is returned.
c3172c11
MD
216 *
217 * PAD RECORD SPECIAL CASE. Pad records can be 16 bytes long, which means
218 * that that rawrecend overlaps the transid field of the rawrecbeg. Because
219 * the transid is garbage, we must skip and cannot return pad records.
ce5e5ac4 220 */
a9cb3889
MD
221struct jdata *
222jread(struct jfile *jf, struct jdata *jd, enum jdirection direction)
ce5e5ac4 223{
c2b2044a
MD
224 struct journal_rawrecbeg head;
225 struct journal_rawrecbeg *headp;
226 struct journal_rawrecend tail;
227 struct journal_rawrecend *tailp;
cbeb73b9 228 struct stat st;
c2b2044a
MD
229 char *filename;
230 int allocsize;
231 int recsize;
232 int search;
cbeb73b9 233 int error;
a2158374 234 int n;
c2b2044a 235
a9cb3889
MD
236 if (jd) {
237 /*
238 * Handle the next/previous record case. If running in the forwards
239 * direction we position the file just after jd. If running in the
240 * backwards direction we position the file at the base of jd so
241 * the backwards read gets the previous record.
242 *
243 * In prefix mode we have to get the right descriptor open and
244 * position the file, since the fall through code resets to the
245 * beginning or end if it has to open a descriptor.
246 */
247 assert(direction != JD_SEQFIRST && direction != JD_SEQLAST);
248 if (jf->jf_prefix) {
249 if (jf->jf_fd >= 0 && jf->jf_seq != jd->jd_seq) {
250 close(jf->jf_fd);
251 jf->jf_fd = -1;
252 }
253 jf->jf_seq = jd->jd_seq;
254 if (jf->jf_fd < 0) {
255 asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq);
256 jf->jf_fd = open(filename, O_RDONLY);
257 if (verbose_opt > 1)
258 fprintf(stderr, "Open %s fd %d\n", filename, jf->jf_fd);
259 free(filename);
260 }
261 }
262 if ((jmodes & JMODEF_INPUT_PIPE) == 0) {
c2b2044a 263 if (direction == JD_FORWARDS) {
a9cb3889 264 jf->jf_pos = jd->jd_pos + jd->jd_size;
c2b2044a 265 lseek(jf->jf_fd, jf->jf_pos, 0);
a9cb3889
MD
266 } else {
267 jf->jf_pos = jd->jd_pos;
268 /* lseek(jf->jf_fd, jf->jf_pos, 0); not needed */
c2b2044a 269 }
a9cb3889
MD
270 } else {
271 assert(direction == JD_FORWARDS && jf->jf_prefix == NULL);
272 assert(jf->jf_pos == jd->jd_pos + jd->jd_size);
273 }
274 jfree(jf, jd);
275 } else {
276 /*
277 * Handle the first/last record case. In the prefix case we only
278 * need to set jf_seq and close the file handle and fall through.
279 * The SEQ modes maintain the current jf_seq (kinda a hack).
280 */
281 if (jf->jf_prefix) {
282 if (jf->jf_fd >= 0) {
283 close(jf->jf_fd);
284 jf->jf_fd = -1;
285 }
286 switch(direction) {
287 case JD_FORWARDS:
288 jf->jf_seq = jf->jf_seq_beg;
289 break;
290 case JD_BACKWARDS:
291 jf->jf_seq = jf->jf_seq_end;
292 break;
293 case JD_SEQFIRST:
294 direction = JD_FORWARDS;
295 break;
296 case JD_SEQLAST:
297 direction = JD_BACKWARDS;
298 break;
299 }
300 } else if ((jmodes & JMODEF_INPUT_PIPE) == 0) {
301 switch(direction) {
302 case JD_SEQFIRST:
303 direction = JD_FORWARDS;
304 /* fall through */
305 case JD_FORWARDS:
306 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_SET);
307 break;
308 case JD_SEQLAST:
309 direction = JD_BACKWARDS;
310 /* fall through */
311 case JD_BACKWARDS:
312 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
313 break;
314 }
315 } else {
316 if (direction == JD_SEQFIRST)
317 direction = JD_FORWARDS;
318 assert(jf->jf_pos == 0 && direction == JD_FORWARDS);
a2158374 319 }
c2b2044a
MD
320 }
321
322top:
323 /*
a9cb3889
MD
324 * If we are doing a prefix scan and the descriptor is not open,
325 * open the file based on jf_seq and position it to the beginning
326 * or end based on the direction. This is how we iterate through
327 * the prefix set.
c2b2044a 328 */
a9cb3889 329 if (jf->jf_fd < 0) {
c2b2044a 330 asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq);
a9cb3889 331 jf->jf_fd = open(filename, O_RDONLY);
cbeb73b9
MD
332 if (verbose_opt > 1)
333 fprintf(stderr, "Open %s fd %d\n", filename, jf->jf_fd);
c2b2044a 334 free(filename);
a9cb3889
MD
335 if (direction == JD_FORWARDS)
336 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_SET);
337 else
338 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
c2b2044a
MD
339 }
340
341 /*
342 * Get the current offset and make sure it is 16-byte aligned. If it
343 * isn't, align it and enter search mode.
344 */
345 if (jf->jf_pos & 15) {
346 jf_warn(jf, "realigning bad offset and entering search mode");
a9cb3889 347 jalign(jf, direction);
c2b2044a
MD
348 search = 1;
349 } else {
350 search = 0;
351 }
352
cbeb73b9 353 error = 0;
a9cb3889 354 if (direction == JD_FORWARDS) {
c2b2044a
MD
355 /*
356 * Scan the journal forwards. Note that the file pointer might not
357 * be seekable.
358 */
a9cb3889 359 while ((error = jreadbuf(jf, direction, &head, sizeof(head))) == sizeof(head)) {
c2b2044a
MD
360 if (head.begmagic != JREC_BEGMAGIC) {
361 if (search == 0)
362 jf_warn(jf, "bad beginmagic, searching for new record");
363 search = 1;
a9cb3889 364 jalign(jf, direction);
c2b2044a
MD
365 continue;
366 }
367
368 /*
369 * The actual record is 16-byte aligned. head.recsize contains
370 * the unaligned record size.
371 */
372 recsize = (head.recsize + 15) & ~15;
373 if (recsize < JREC_MINRECSIZE || recsize > JREC_MAXRECSIZE) {
374 if (search == 0)
375 jf_warn(jf, "bad recordsize: %d\n", recsize);
376 search = 1;
a9cb3889 377 jalign(jf, direction);
c2b2044a
MD
378 continue;
379 }
380 allocsize = offsetof(struct jdata, jd_data[recsize]);
381 allocsize = (allocsize + 255) & ~255;
382 jd = malloc(allocsize);
383 bzero(jd, offsetof(struct jdata, jd_data[0]));
384 bcopy(&head, jd->jd_data, sizeof(head));
a9cb3889 385 n = jreadbuf(jf, direction, jd->jd_data + sizeof(head),
c2b2044a
MD
386 recsize - sizeof(head));
387 if (n != (int)(recsize - sizeof(head))) {
388 if (search == 0)
389 jf_warn(jf, "Incomplete stream record\n");
390 search = 1;
a9cb3889 391 jalign(jf, direction);
c2b2044a
MD
392 free(jd);
393 continue;
394 }
395
396 tailp = (void *)(jd->jd_data + recsize - sizeof(*tailp));
397 if (tailp->endmagic != JREC_ENDMAGIC) {
398 if (search == 0)
399 jf_warn(jf, "bad endmagic, searching for new record");
400 search = 1;
a9cb3889 401 jalign(jf, direction);
c2b2044a
MD
402 free(jd);
403 continue;
404 }
405
c3172c11
MD
406 /*
407 * Skip pad records.
408 */
409 if (head.streamid == JREC_STREAMID_PAD) {
410 free(jd);
411 continue;
412 }
413
c2b2044a
MD
414 /*
415 * note: recsize is aligned (the actual record size),
416 * head.recsize is unaligned (the actual payload size).
417 */
418 jd->jd_transid = head.transid;
419 jd->jd_alloc = allocsize;
420 jd->jd_size = recsize;
a9cb3889
MD
421 jd->jd_seq = jf->jf_seq;
422 jd->jd_pos = jf->jf_pos - recsize;
c2b2044a 423 jd->jd_refs = 1;
a9cb3889 424 return(jd);
ce5e5ac4
MD
425 }
426 } else {
c2b2044a
MD
427 /*
428 * Scan the journal backwards. Note that jread()'s reverse-seek and
429 * read. The data read will be forward ordered, however.
430 */
a9cb3889 431 while ((error = jreadbuf(jf, direction, &tail, sizeof(tail))) == sizeof(tail)) {
c2b2044a
MD
432 if (tail.endmagic != JREC_ENDMAGIC) {
433 if (search == 0)
434 jf_warn(jf, "bad endmagic, searching for new record");
435 search = 1;
a9cb3889 436 jalign(jf, direction);
c2b2044a
MD
437 continue;
438 }
439
440 /*
441 * The actual record is 16-byte aligned. head.recsize contains
442 * the unaligned record size.
443 */
444 recsize = (tail.recsize + 15) & ~15;
445 if (recsize < JREC_MINRECSIZE || recsize > JREC_MAXRECSIZE) {
446 if (search == 0)
447 jf_warn(jf, "bad recordsize: %d\n", recsize);
448 search = 1;
a9cb3889 449 jalign(jf, direction);
c2b2044a
MD
450 continue;
451 }
452 allocsize = offsetof(struct jdata, jd_data[recsize]);
453 allocsize = (allocsize + 255) & ~255;
454 jd = malloc(allocsize);
455 bzero(jd, offsetof(struct jdata, jd_data[0]));
456 bcopy(&tail, jd->jd_data + recsize - sizeof(tail), sizeof(tail));
a9cb3889 457 n = jreadbuf(jf, direction, jd->jd_data, recsize - sizeof(tail));
c2b2044a
MD
458 if (n != (int)(recsize - sizeof(tail))) {
459 if (search == 0)
460 jf_warn(jf, "Incomplete stream record\n");
461 search = 1;
a9cb3889 462 jalign(jf, direction);
c2b2044a
MD
463 free(jd);
464 continue;
465 }
466
467 headp = (void *)jd->jd_data;
468 if (headp->begmagic != JREC_BEGMAGIC) {
469 if (search == 0)
470 jf_warn(jf, "bad begmagic, searching for new record");
471 search = 1;
a9cb3889 472 jalign(jf, direction);
c2b2044a
MD
473 free(jd);
474 continue;
475 }
476
c3172c11
MD
477 /*
478 * Skip pad records.
479 */
480 if (head.streamid == JREC_STREAMID_PAD) {
481 free(jd);
482 continue;
483 }
484
c2b2044a
MD
485 /*
486 * note: recsize is aligned (the actual record size),
487 * head.recsize is unaligned (the actual payload size).
488 */
489 jd->jd_transid = headp->transid;
490 jd->jd_alloc = allocsize;
491 jd->jd_size = recsize;
a9cb3889
MD
492 jd->jd_seq = jf->jf_seq;
493 jd->jd_pos = jf->jf_pos;
c2b2044a 494 jd->jd_refs = 1;
a9cb3889 495 return(jd);
c2b2044a
MD
496 }
497 }
498
499 /*
500 * If reading in prefix mode and there is no more data, close the
501 * current descriptor, adjust the sequence number, and loop.
cbeb73b9
MD
502 *
503 * If we hit the end of the sequence space and were asked to loop,
504 * check for the next sequence number and adjust jf_seq_end. Leave
505 * the current descriptor open so we do not loose track of its seek
506 * position, and also to catch a race where another jscan may have
507 * written more data to the current sequence number before rolling
508 * the next sequence number.
c2b2044a 509 */
cbeb73b9 510 if (error == 0 && jf->jf_prefix) {
a9cb3889 511 if (direction == JD_FORWARDS) {
c2b2044a
MD
512 if (jf->jf_seq < jf->jf_seq_end) {
513 ++jf->jf_seq;
cbeb73b9
MD
514 if (verbose_opt)
515 fprintf(stderr, "jread: roll to seq %08x\n", jf->jf_seq);
516 if (jf->jf_fd >= 0) {
517 close(jf->jf_fd);
518 jf->jf_fd = -1;
519 }
520 goto top;
521 }
522 if (jmodes & JMODEF_LOOP_FOREVER) {
523 asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq + 1);
524 if (stat(filename, &st) == 0) {
525 ++jf->jf_seq_end;
526 if (verbose_opt)
527 fprintf(stderr, "jread: roll seq_end to %08x\n",
528 jf->jf_seq_end);
529 } else {
530 sleep(5);
531 }
c2b2044a
MD
532 goto top;
533 }
ce5e5ac4 534 } else {
c2b2044a
MD
535 if (jf->jf_seq > jf->jf_seq_beg) {
536 --jf->jf_seq;
cbeb73b9
MD
537 if (verbose_opt)
538 fprintf(stderr, "jread: roll to seq %08x\n", jf->jf_seq);
539 if (jf->jf_fd >= 0) {
540 close(jf->jf_fd);
541 jf->jf_fd = -1;
542 }
c2b2044a
MD
543 goto top;
544 }
ce5e5ac4
MD
545 }
546 }
c2b2044a 547
cbeb73b9
MD
548 /*
549 * If we hit EOF and were asked to loop forever on the input, leave
550 * the current descriptor open, sleep, and loop.
551 *
552 * We have already handled the prefix case. This feature only works
553 * when doing forward scans and the input is not a pipe.
554 */
a9cb3889
MD
555 if (error == 0 && jf->jf_prefix == NULL &&
556 (jmodes & JMODEF_LOOP_FOREVER) &&
557 !(jmodes & JMODEF_INPUT_PIPE) &&
558 direction == JD_FORWARDS
cbeb73b9
MD
559 ) {
560 sleep(5);
561 goto top;
562 }
563
c2b2044a
MD
564 /*
565 * Otherwise there are no more records and we are done.
566 */
a9cb3889 567 return(NULL);
ce5e5ac4
MD
568}
569
c2b2044a
MD
570/*
571 * Write a record out. If this is a prefix set and the file would
572 * exceed record_size, we rotate into a new sequence number.
573 */
bb406b71 574void
c2b2044a 575jwrite(struct jfile *jf, struct jdata *jd)
5adba82e 576{
bb406b71
MD
577 struct stat st;
578 char *path;
5adba82e
MD
579 int n;
580
bb406b71
MD
581 assert(jf->jf_prefix);
582
583again:
584 /*
585 * Open/create a new file in the prefix set
586 */
587 if (jf->jf_write_fd < 0) {
588 asprintf(&path, "%s.%08x", jf->jf_prefix, jf->jf_seq_end);
589 jf->jf_write_fd = open(path, O_RDWR|O_CREAT, 0666);
590 if (jf->jf_write_fd < 0 || fstat(jf->jf_write_fd, &st) != 0) {
591 fprintf(stderr, "Unable to open/create %s\n", path);
592 exit(1);
593 }
594 jf->jf_write_pos = st.st_size;
595 lseek(jf->jf_write_fd, jf->jf_write_pos, 0);
596 free(path);
597 }
598
599 /*
600 * Each file must contain at least one raw record, even if it exceeds
601 * the user-requested record-size. Apart from that, we cycle to the next
602 * file when its size would exceed the user-specified
603 */
604 if (jf->jf_write_pos > 0 &&
605 jf->jf_write_pos + jd->jd_size > prefix_file_size
606 ) {
607 close(jf->jf_write_fd);
608 jf->jf_write_fd = -1;
609 ++jf->jf_seq_end;
610 goto again;
611 }
612
613 /*
614 * Terminate if a failure occurs (for now).
615 */
616 n = write(jf->jf_write_fd, jd->jd_data, jd->jd_size);
617 if (n != jd->jd_size) {
618 ftruncate(jf->jf_write_fd, jf->jf_write_pos);
619 fprintf(stderr, "jwrite: failed %s\n", strerror(errno));
620 exit(1);
621 }
622 jf->jf_write_pos += n;
623 jf->jf_last_transid = jd->jd_transid;
5adba82e
MD
624}
625
c2b2044a 626/*
a9cb3889
MD
627 * Attempt to locate and return the record specified by the transid. The
628 * returned record may be inexact.
c2b2044a 629 *
a9cb3889
MD
630 * If scanning forwards this function guarentees that no record prior
631 * to the returned record is >= transid.
632 *
633 * If scanning backwards this function guarentees that no record after
634 * the returned record is <= transid.
c2b2044a 635 */
a9cb3889 636struct jdata *
c2b2044a 637jseek(struct jfile *jf, int64_t transid, enum jdirection direction)
ce5e5ac4 638{
a9cb3889
MD
639 unsigned int seq;
640 struct jdata *jd = NULL;
c2b2044a
MD
641
642 /*
a9cb3889 643 * If the input is a pipe we can't seek.
c2b2044a 644 */
a9cb3889
MD
645 if (jmodes & JMODEF_INPUT_PIPE) {
646 assert(direction == JD_FORWARDS);
647 return (jread(jf, NULL, direction));
648 }
649
c2b2044a 650 if (jf->jf_prefix) {
a9cb3889
MD
651 /*
652 * If we have a prefix set search the sequence space backwards until
653 * we find the file most likely to contain the transaction id.
654 */
cbeb73b9
MD
655 if (verbose_opt > 2) {
656 fprintf(stderr, "jseek prefix set %s %08x-%08x\n", jf->jf_prefix,
657 jf->jf_seq_beg, jf->jf_seq_end);
658 }
a9cb3889 659 jd = NULL;
cbeb73b9 660 for (seq = jf->jf_seq_end; seq != jf->jf_seq_beg - 1; --seq) {
cbeb73b9
MD
661 if (verbose_opt > 2)
662 fprintf(stderr, "try seq %08x\n", seq);
a9cb3889
MD
663 jf->jf_seq = seq;
664 if ((jd = jread(jf, NULL, JD_SEQFIRST)) != NULL) {
665 if (jd->jd_transid == transid)
666 return(jd);
667 if (jd->jd_transid < transid) {
668 jfree(jf, jd);
c2b2044a
MD
669 break;
670 }
a9cb3889 671 jfree(jf, jd);
c2b2044a
MD
672 }
673 }
a9cb3889
MD
674
675 /*
676 * if transid is less the first file in the sequence space we
677 * return NULL if scanning backwards, indicating no records are
678 * available, or the first record in the sequence space if we
679 * are scanning forwards.
680 */
cbeb73b9 681 if (seq == jf->jf_seq_beg - 1) {
a9cb3889
MD
682 if (direction == JD_BACKWARDS)
683 return(NULL);
684 else
685 return(jread(jf, NULL, JD_FORWARDS));
cbeb73b9
MD
686 }
687 if (verbose_opt > 1)
688 fprintf(stderr, "jseek input prefix set to seq %08x\n", seq);
c2b2044a
MD
689 }
690
691 /*
a9cb3889
MD
692 * Position us to the end of the current record, then scan backwards
693 * looking for the requested transid.
c2b2044a 694 */
a9cb3889
MD
695 jd = jread(jf, NULL, JD_SEQLAST);
696 while (jd != NULL) {
697 if (jd->jd_transid <= transid) {
698 if (jd->jd_transid < transid) {
699 if (direction == JD_FORWARDS)
700 jd =jread(jf, jd, JD_FORWARDS);
c2b2044a 701 }
a9cb3889
MD
702 if (verbose_opt > 1) {
703 fprintf(stderr, "jseek returning seq %08x offset 0x%08llx\n",
704 jd->jd_seq, jd->jd_pos);
705 }
706 return(jd);
c2b2044a 707 }
a9cb3889 708 jd = jread(jf, jd, JD_BACKWARDS);
c2b2044a 709 }
a9cb3889
MD
710
711 /*
712 * We scanned the whole file with no luck, all the transid's are
713 * greater then the requested transid. If the intended read
714 * direction is backwards there are no records and we return NULL.
715 * If it is forwards we return the first record.
716 */
717 if (direction == JD_BACKWARDS)
718 return(NULL);
719 else
720 return(jread(jf, NULL, JD_FORWARDS));
c2b2044a
MD
721}
722
723/*
724 * Data returned by jread() is persistent until released.
725 */
726struct jdata *
727jref(struct jdata *jd)
728{
729 ++jd->jd_refs;
730 return(jd);
ce5e5ac4
MD
731}
732
733void
bb406b71 734jfree(struct jfile *jf __unused, struct jdata *jd)
ce5e5ac4 735{
bb406b71 736 if (--jd->jd_refs == 0)
c2b2044a 737 free(jd);
c2b2044a
MD
738}
739
740/*
741 * Align us to the next 16 byte boundary. If scanning forwards we align
742 * forwards if not already aligned. If scanning backwards we align
743 * backwards if not already aligned. We only have to synchronize the
744 * seek position with the file seek position for forward scans.
745 */
746static void
a9cb3889 747jalign(struct jfile *jf, enum jdirection direction)
c2b2044a
MD
748{
749 char dummy[16];
750 int bytes;
751
752 if ((int)jf->jf_pos & 15) {
a9cb3889 753 if (direction == JD_FORWARDS) {
c2b2044a 754 bytes = 16 - ((int)jf->jf_pos & 15);
a9cb3889 755 jreadbuf(jf, direction, dummy, bytes);
c2b2044a
MD
756 } else {
757 jf->jf_pos = jf->jf_pos & ~(off_t)15;
758 }
759 }
760}
761
762/*
763 * Read the next raw journal record forwards or backwards and return a
764 * pointer to it. Note that the file pointer's actual seek position does
765 * not match jf_pos in the reverse direction case.
766 */
767static int
a9cb3889 768jreadbuf(struct jfile *jf, enum jdirection direction, void *buf, int bytes)
c2b2044a
MD
769{
770 int ttl = 0;
771 int n;
772
773 if (jf->jf_fd < 0)
774 return(0);
775
a9cb3889 776 if (direction == JD_FORWARDS) {
c2b2044a
MD
777 while (ttl != bytes) {
778 n = read(jf->jf_fd, (char *)buf + ttl, bytes - ttl);
cbeb73b9
MD
779 if (n <= 0) {
780 if (n < 0 && ttl == 0)
781 ttl = -errno;
c2b2044a 782 break;
cbeb73b9 783 }
c2b2044a 784 ttl += n;
c3172c11 785 jf->jf_pos += n;
c2b2044a
MD
786 }
787 } else {
788 if (jf->jf_pos >= bytes) {
789 jf->jf_pos -= bytes;
790 lseek(jf->jf_fd, jf->jf_pos, 0);
791 while (ttl != bytes) {
792 n = read(jf->jf_fd, (char *)buf + ttl, bytes - ttl);
cbeb73b9
MD
793 if (n <= 0) {
794 if (n < 0 && ttl == 0)
795 ttl = -errno;
c2b2044a 796 break;
cbeb73b9 797 }
c2b2044a
MD
798 ttl += n;
799 }
800 }
801 }
802 return(ttl);
ce5e5ac4
MD
803}
804