2 * Copyright (c) 2004,2005 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sbin/jscan/jfile.c,v 1.6 2005/09/06 06:42:44 dillon Exp $
40 static void jalign(struct jfile *jf);
41 static int jreadbuf(struct jfile *jf, void *buf, int bytes);
44 * Open a file descriptor for journal record access.
46 * NOTE: only seekable descriptors are supported for backwards scans.
49 jopen_fd(int fd, enum jdirection direction)
53 jf = malloc(sizeof(struct jfile));
54 bzero(jf, sizeof(struct jfile));
56 jf->jf_open_flags = O_RDONLY;
57 if (direction == JD_BACKWARDS) {
58 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
60 jf->jf_direction = direction;
65 * Open a prefix set. <prefix>.nnnnnnnnn files or a <prefix>.transid file
66 * must exist to succeed. No file descriptor is actually opened but
67 * the sequence number is initialized to the beginning or end of the set.
70 jopen_prefix(const char *prefix, enum jdirection direction, int rw)
73 unsigned int seq_beg = -1;
74 unsigned int seq_end = -1;
87 dirname = data = strdup(prefix);
88 if ((basename = strrchr(dirname, '/')) != NULL) {
94 baselen = strlen(basename);
95 if ((dir = opendir(dirname)) != NULL) {
96 while ((den = readdir(dir)) != NULL) {
97 if (strncmp(den->d_name, basename, baselen) == 0 &&
98 den->d_name[baselen] == '.'
100 seq = strtoul(den->d_name + baselen + 1, &ptr, 10);
101 if (*ptr == 0 && seq > 0) {
102 if (seq_beg == (unsigned int)-1 || seq_beg > seq)
104 if (seq_end == (unsigned int)-1 || seq_end < seq)
114 asprintf(&data, "%s.transid", prefix);
115 if (stat(data, &st) == 0)
119 if (seq_beg != (unsigned int)-1 || hastransid) {
120 if (seq_beg == (unsigned int)-1) {
124 asprintf(&data, "%s.%08x", prefix, 0);
125 if ((fd = open(data, O_RDWR|O_CREAT, 0666)) >= 0)
130 jf = malloc(sizeof(struct jfile));
131 bzero(jf, sizeof(struct jfile));
133 jf->jf_prefix = strdup(prefix);
134 jf->jf_seq_beg = seq_beg;
135 jf->jf_seq_end = seq_end;
137 if (direction == JD_BACKWARDS) {
138 jf->jf_seq = jf->jf_seq_end;
140 jf->jf_seq = jf->jf_seq_beg;
142 jf->jf_direction = direction;
143 jf->jf_open_flags = rw ? (O_RDWR|O_CREAT) : O_RDONLY;
151 * Get a prefix set ready for append.
154 jrecord_init(const char *prefix)
163 * Determine whether we already have a prefix set or whether we need
166 jf = jopen_prefix(prefix, 0, 0);
169 if (jf->jf_seq_beg != (unsigned int)-1)
173 asprintf(&data, "%s.transid", prefix);
176 * If the sequence exists the transid file must ALREADY exist for us
177 * to be able to safely 'append' to the space. Locked-down sequence
178 * spaces do not have a transid file.
181 fd = open(data, O_RDWR, 0666);
183 fd = open(data, O_RDWR|O_CREAT, 0666);
188 if (fstat(fd, &st) == 0 && st.st_size == 0)
189 write(fd, "0000000000000000\n", 17); /* starting transid in hex */
195 * Close a previously opened journal, clean up any side allocations.
198 jclose(struct jfile *jf)
206 * Locate the next (or previous) complete virtual stream transaction given a
207 * file descriptor and direction. Keep track of partial stream records as
210 * Note that a transaction might represent a huge I/O operation, resulting
211 * in an overall node structure that spans gigabytes, but individual
212 * subrecord leaf nodes are limited in size and we depend on this to simplify
213 * the handling of leaf records.
215 * A transaction may cover several raw records. The jstream collection for
216 * a transaction is only returned when the entire transaction has been
217 * successfully scanned. Due to the interleaving of transactions the ordering
218 * of returned JS's may be different (not exactly reversed) when scanning a
219 * journal backwards verses forwards. Since parallel operations are
220 * theoretically non-conflicting, this should not present a problem.
223 jread(struct jfile *jf, struct jdata **jdp, enum jdirection direction)
225 struct journal_rawrecbeg head;
226 struct journal_rawrecbeg *headp;
227 struct journal_rawrecend tail;
228 struct journal_rawrecend *tailp;
237 * If changing direction on an open descriptor we have to fixup jf_pos.
238 * When reading backwards the actual file seek position does not match
241 * If you read forwards then read backwards, or read backwords then
242 * read forwards, you will get the same record.
244 if (jf->jf_direction != direction) {
245 if (jf->jf_fd >= 0) {
246 if (direction == JD_FORWARDS) {
247 lseek(jf->jf_fd, jf->jf_pos, 0);
250 jf->jf_direction = direction;
255 * If reading in prefix mode and we have no descriptor, open
256 * a new descriptor based on the current sequence number. If
257 * this fails we will fall all the way through to the end which will
258 * setup the next sequence number and loop.
260 if (jf->jf_fd == -1 && jf->jf_prefix) {
261 asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq);
262 if ((jf->jf_fd = open(filename, O_RDONLY)) >= 0) {
263 if (jf->jf_direction == JD_FORWARDS)
266 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
269 fprintf(stderr, "Open %s fd %d\n", filename, jf->jf_fd);
274 * Get the current offset and make sure it is 16-byte aligned. If it
275 * isn't, align it and enter search mode.
277 if (jf->jf_pos & 15) {
278 jf_warn(jf, "realigning bad offset and entering search mode");
285 if (jf->jf_direction == JD_FORWARDS) {
287 * Scan the journal forwards. Note that the file pointer might not
290 while (jreadbuf(jf, &head, sizeof(head)) == sizeof(head)) {
291 if (head.begmagic != JREC_BEGMAGIC) {
293 jf_warn(jf, "bad beginmagic, searching for new record");
300 * The actual record is 16-byte aligned. head.recsize contains
301 * the unaligned record size.
303 recsize = (head.recsize + 15) & ~15;
304 if (recsize < JREC_MINRECSIZE || recsize > JREC_MAXRECSIZE) {
306 jf_warn(jf, "bad recordsize: %d\n", recsize);
311 allocsize = offsetof(struct jdata, jd_data[recsize]);
312 allocsize = (allocsize + 255) & ~255;
313 jd = malloc(allocsize);
314 bzero(jd, offsetof(struct jdata, jd_data[0]));
315 bcopy(&head, jd->jd_data, sizeof(head));
316 n = jreadbuf(jf, jd->jd_data + sizeof(head),
317 recsize - sizeof(head));
318 if (n != (int)(recsize - sizeof(head))) {
320 jf_warn(jf, "Incomplete stream record\n");
327 tailp = (void *)(jd->jd_data + recsize - sizeof(*tailp));
328 if (tailp->endmagic != JREC_ENDMAGIC) {
330 jf_warn(jf, "bad endmagic, searching for new record");
338 * note: recsize is aligned (the actual record size),
339 * head.recsize is unaligned (the actual payload size).
341 jd->jd_transid = head.transid;
342 jd->jd_alloc = allocsize;
343 jd->jd_size = recsize;
345 jd->jd_next = jf->jf_data;
352 * Scan the journal backwards. Note that jread()'s reverse-seek and
353 * read. The data read will be forward ordered, however.
355 while (jreadbuf(jf, &tail, sizeof(tail)) == sizeof(tail)) {
356 if (tail.endmagic != JREC_ENDMAGIC) {
358 jf_warn(jf, "bad endmagic, searching for new record");
365 * The actual record is 16-byte aligned. head.recsize contains
366 * the unaligned record size.
368 recsize = (tail.recsize + 15) & ~15;
369 if (recsize < JREC_MINRECSIZE || recsize > JREC_MAXRECSIZE) {
371 jf_warn(jf, "bad recordsize: %d\n", recsize);
376 allocsize = offsetof(struct jdata, jd_data[recsize]);
377 allocsize = (allocsize + 255) & ~255;
378 jd = malloc(allocsize);
379 bzero(jd, offsetof(struct jdata, jd_data[0]));
380 bcopy(&tail, jd->jd_data + recsize - sizeof(tail), sizeof(tail));
381 n = jreadbuf(jf, jd->jd_data, recsize - sizeof(tail));
382 if (n != (int)(recsize - sizeof(tail))) {
384 jf_warn(jf, "Incomplete stream record\n");
391 headp = (void *)jd->jd_data;
392 if (headp->begmagic != JREC_BEGMAGIC) {
394 jf_warn(jf, "bad begmagic, searching for new record");
402 * note: recsize is aligned (the actual record size),
403 * head.recsize is unaligned (the actual payload size).
405 jd->jd_transid = headp->transid;
406 jd->jd_alloc = allocsize;
407 jd->jd_size = recsize;
409 jd->jd_next = jf->jf_data;
417 * If reading in prefix mode and there is no more data, close the
418 * current descriptor, adjust the sequence number, and loop.
423 if (jf->jf_direction == JD_FORWARDS) {
424 if (jf->jf_seq < jf->jf_seq_end) {
429 if (jf->jf_seq > jf->jf_seq_beg) {
437 * Otherwise there are no more records and we are done.
444 * Write a record out. If this is a prefix set and the file would
445 * exceed record_size, we rotate into a new sequence number.
448 jwrite(struct jfile *jf, struct jdata *jd)
452 n = write(jf->jf_fd, jd->jd_data, jd->jd_size);
457 * Reset the direction and seek us to the beginning or end
458 * of the currenet file. In prefix mode we might as well
459 * just let jsread() do it since it might have to do it
463 jreset(struct jfile *jf, unsigned int seq, enum jdirection direction)
466 if (jf->jf_fd >= 0) {
474 jf->jf_pos = lseek(jf->jf_fd, 0L, 0);
476 jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
479 jf->jf_direction = direction;
483 * Position the file such that the next jread() in the specified
484 * direction will read the record for the specified transaction id.
485 * If the transaction id does not exist the jseek will position the
486 * file at the next higher (if reading forwards) or lower (if reading
487 * backwards) transaction id.
489 * jseek is not required to be exact. It is allowed to position the
490 * file at any point <= the transid (forwards) or >= the transid
491 * (backwards). However, the more off jseek is, the more scanning
492 * the code will have to do to position itself properly.
495 jseek(struct jfile *jf, int64_t transid, enum jdirection direction)
503 * If we have a prefix set search the sequence space backwards until
504 * we find the file most likely to contain the transaction id.
507 for (seq = jf->jf_seq_end; seq >= jf->jf_seq_beg; --seq) {
508 jreset(jf, seq, JD_FORWARDS);
509 if (jread(jf, &jd, JD_FORWARDS) == 0) {
510 transid_beg = jd->jd_transid;
512 if (transid_beg == transid) {
513 jreset(jf, seq, JD_FORWARDS);
516 if (transid_beg < transid)
523 * Position us within the current file.
525 jreset(jf, seq, JD_BACKWARDS);
526 while (jread(jf, &jd, JD_BACKWARDS) == 0) {
527 transid_end = jd->jd_transid;
531 * If we are at the sequence number the next forward read
532 * will re-read the record since we were going backwards. If
533 * the caller wants to go backwards we have to go forwards one
534 * record so the caller gets the transid record when it does
535 * its first backwards read. Confused yet?
537 * If we are at a smaller sequence number we need to read forwards
538 * by one so the next forwards read gets the first record > transid,
539 * or the next backwards read gets the first record < transid.
541 if (transid_end == transid) {
542 if (direction == JD_BACKWARDS) {
543 if (jread(jf, &jd, JD_FORWARDS) == 0)
548 if (transid_end < transid) {
549 if (jread(jf, &jd, JD_FORWARDS) == 0)
556 * Data returned by jread() is persistent until released.
559 jref(struct jdata *jd)
566 jfree(struct jfile *jf, struct jdata *jd)
570 if (--jd->jd_refs == 0){
571 for (jdp = &jf->jf_data; *jdp != jd; jdp = &(*jdp)->jd_next) {
572 assert(*jdp != NULL);
580 * Align us to the next 16 byte boundary. If scanning forwards we align
581 * forwards if not already aligned. If scanning backwards we align
582 * backwards if not already aligned. We only have to synchronize the
583 * seek position with the file seek position for forward scans.
586 jalign(struct jfile *jf)
591 if ((int)jf->jf_pos & 15) {
592 if (jf->jf_direction == JD_FORWARDS) {
593 bytes = 16 - ((int)jf->jf_pos & 15);
594 jf->jf_pos += jreadbuf(jf, dummy, bytes);
596 jf->jf_pos = jf->jf_pos & ~(off_t)15;
602 * Read the next raw journal record forwards or backwards and return a
603 * pointer to it. Note that the file pointer's actual seek position does
604 * not match jf_pos in the reverse direction case.
607 jreadbuf(struct jfile *jf, void *buf, int bytes)
615 if (jf->jf_direction == JD_FORWARDS) {
616 while (ttl != bytes) {
617 n = read(jf->jf_fd, (char *)buf + ttl, bytes - ttl);
623 if (jf->jf_pos >= bytes) {
625 lseek(jf->jf_fd, jf->jf_pos, 0);
626 while (ttl != bytes) {
627 n = read(jf->jf_fd, (char *)buf + ttl, bytes - ttl);