2 * Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2002 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: journal.c,v 1.77.2.1.10.13 2005/11/03 23:08:41 marka Exp $ */
27 #include <isc/stdio.h>
28 #include <isc/string.h>
31 #include <dns/compress.h>
33 #include <dns/dbiterator.h>
35 #include <dns/fixedname.h>
36 #include <dns/journal.h>
38 #include <dns/rdataset.h>
39 #include <dns/rdatasetiter.h>
40 #include <dns/result.h>
44 * When true, accept IXFR difference sequences where the
45 * SOA serial number does not change (BIND 8 sends such
48 static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */
50 /**************************************************************************/
52 * Miscellaneous utilities.
55 #define JOURNAL_COMMON_LOGARGS \
56 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
58 #define JOURNAL_DEBUG_LOGARGS(n) \
59 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
62 * It would be non-sensical (or at least obtuse) to use FAIL() with an
63 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
64 * from complaining about "end-of-loop code not reached".
67 do { result = (code); \
68 if (result != ISC_R_SUCCESS) goto failure; \
73 if (result != ISC_R_SUCCESS) goto failure; \
76 static isc_result_t index_to_disk(dns_journal_t *);
78 static inline isc_uint32_t
79 decode_uint32(unsigned char *p) {
80 return ((p[0] << 24) +
87 encode_uint32(isc_uint32_t val, unsigned char *p) {
88 p[0] = (isc_uint8_t)(val >> 24);
89 p[1] = (isc_uint8_t)(val >> 16);
90 p[2] = (isc_uint8_t)(val >> 8);
91 p[3] = (isc_uint8_t)(val >> 0);
95 dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx,
96 dns_diffop_t op, dns_difftuple_t **tp)
100 dns_rdataset_t rdataset;
101 dns_rdata_t rdata = DNS_RDATA_INIT;
102 dns_name_t *zonename;
104 zonename = dns_db_origin(db);
107 result = dns_db_findnode(db, zonename, ISC_FALSE, &node);
108 if (result != ISC_R_SUCCESS)
111 dns_rdataset_init(&rdataset);
112 result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0,
113 (isc_stdtime_t)0, &rdataset, NULL);
114 if (result != ISC_R_SUCCESS)
117 result = dns_rdataset_first(&rdataset);
118 if (result != ISC_R_SUCCESS)
121 dns_rdataset_current(&rdataset, &rdata);
123 result = dns_difftuple_create(mctx, op, zonename, rdataset.ttl,
126 dns_rdataset_disassociate(&rdataset);
127 dns_db_detachnode(db, &node);
128 return (ISC_R_SUCCESS);
131 dns_db_detachnode(db, &node);
133 UNEXPECTED_ERROR(__FILE__, __LINE__, "missing SOA");
137 /**************************************************************************/
143 * A journal file consists of
145 * - A fixed-size header of type journal_rawheader_t.
147 * - The index. This is an unordered array of index entries
148 * of type journal_rawpos_t giving the locations
149 * of some arbitrary subset of the journal's addressable
150 * transactions. The index entries are used as hints to
151 * speed up the process of locating a transaction with a given
152 * serial number. Unused index entries have an "offset"
153 * field of zero. The size of the index can vary between
154 * journal files, but does not change during the lifetime
155 * of a file. The size can be zero.
157 * - The journal data. This consists of one or more transactions.
158 * Each transaction begins with a transaction header of type
159 * journal_rawxhdr_t. The transaction header is followed by a
160 * sequence of RRs, similar in structure to an IXFR difference
161 * sequence (RFC1995). That is, the pre-transaction SOA,
162 * zero or more other deleted RRs, the post-transaction SOA,
163 * and zero or more other added RRs. Unlike in IXFR, each RR
164 * is prefixed with a 32-bit length.
166 * The journal data part grows as new transactions are
167 * appended to the file. Only those transactions
168 * whose serial number is current-(2^31-1) to current
169 * are considered "addressable" and may be pointed
170 * to from the header or index. They may be preceded
171 * by old transactions that are no longer addressable,
172 * and they may be followed by transactions that were
173 * appended to the journal but never committed by updating
174 * the "end" position in the header. The latter will
175 * be overwritten when new transactions are added.
179 * On-disk representation of a "pointer" to a journal entry.
180 * These are used in the journal header to locate the beginning
181 * and end of the journal, and in the journal index to locate
182 * other transactions.
185 unsigned char serial[4]; /* SOA serial before update. */
187 * XXXRTH Should offset be 8 bytes?
188 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
189 * XXXAG ... but we will not be able to seek >2G anyway on many
190 * platforms as long as we are using fseek() rather
193 unsigned char offset[4]; /* Offset from beginning of file. */
197 * The on-disk representation of the journal header.
198 * All numbers are stored in big-endian order.
202 * The header is of a fixed size, with some spare room for future
205 #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
209 /* File format version ID. */
210 unsigned char format[16];
211 /* Position of the first addressable transaction */
212 journal_rawpos_t begin;
213 /* Position of the next (yet nonexistent) transaction. */
214 journal_rawpos_t end;
215 /* Number of index entries following the header. */
216 unsigned char index_size[4];
218 /* Pad the header to a fixed size. */
219 unsigned char pad[JOURNAL_HEADER_SIZE];
220 } journal_rawheader_t;
223 * The on-disk representation of the transaction header.
224 * There is one of these at the beginning of each transaction.
227 unsigned char size[4]; /* In bytes, excluding header. */
228 unsigned char serial0[4]; /* SOA serial before update. */
229 unsigned char serial1[4]; /* SOA serial after update. */
233 * The on-disk representation of the RR header.
234 * There is one of these at the beginning of each RR.
237 unsigned char size[4]; /* In bytes, excluding header. */
238 } journal_rawrrhdr_t;
241 * The in-core representation of the journal header.
248 #define POS_VALID(pos) ((pos).offset != 0)
249 #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
252 unsigned char format[16];
255 isc_uint32_t index_size;
259 * The in-core representation of the transaction header.
264 isc_uint32_t serial0;
265 isc_uint32_t serial1;
269 * The in-core representation of the RR header.
277 * Initial contents to store in the header of a newly created
280 * The header starts with the magic string ";BIND LOG V9\n"
281 * to identify the file as a BIND 9 journal file. An ASCII
282 * identification string is used rather than a binary magic
283 * number to be consistent with BIND 8 (BIND 8 journal files
284 * are ASCII text files).
287 static journal_header_t
288 initial_journal_header = { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0 };
290 #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
293 JOURNAL_STATE_INVALID,
296 JOURNAL_STATE_TRANSACTION
300 unsigned int magic; /* JOUR */
301 isc_mem_t *mctx; /* Memory context */
302 journal_state_t state;
303 const char *filename; /* Journal file name */
304 FILE * fp; /* File handle */
305 isc_offset_t offset; /* Current file offset */
306 journal_header_t header; /* In-core journal header */
307 unsigned char *rawindex; /* In-core buffer for journal
308 index in on-disk format */
309 journal_pos_t *index; /* In-core journal index */
311 /* Current transaction state (when writing). */
313 unsigned int n_soa; /* Number of SOAs seen */
314 journal_pos_t pos[2]; /* Begin/end position */
317 /* Iteration state (when reading). */
319 /* These define the part of the journal we iterate over. */
320 journal_pos_t bpos; /* Position before first, */
321 journal_pos_t epos; /* and after last
323 /* The rest is iterator state. */
324 isc_uint32_t current_serial; /* Current SOA serial */
325 isc_buffer_t source; /* Data from disk */
326 isc_buffer_t target; /* Data from _fromwire check */
327 dns_decompress_t dctx; /* Dummy decompression ctx */
328 dns_name_t name; /* Current domain name */
329 dns_rdata_t rdata; /* Current rdata */
330 isc_uint32_t ttl; /* Current TTL */
331 unsigned int xsize; /* Size of transaction data */
332 unsigned int xpos; /* Current position in it */
333 isc_result_t result; /* Result of last call */
337 #define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R')
338 #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
341 journal_pos_decode(journal_rawpos_t *raw, journal_pos_t *cooked) {
342 cooked->serial = decode_uint32(raw->serial);
343 cooked->offset = decode_uint32(raw->offset);
347 journal_pos_encode(journal_rawpos_t *raw, journal_pos_t *cooked) {
348 encode_uint32(cooked->serial, raw->serial);
349 encode_uint32(cooked->offset, raw->offset);
353 journal_header_decode(journal_rawheader_t *raw, journal_header_t *cooked) {
354 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
355 memcpy(cooked->format, raw->h.format, sizeof(cooked->format));
356 journal_pos_decode(&raw->h.begin, &cooked->begin);
357 journal_pos_decode(&raw->h.end, &cooked->end);
358 cooked->index_size = decode_uint32(raw->h.index_size);
362 journal_header_encode(journal_header_t *cooked, journal_rawheader_t *raw) {
363 INSIST(sizeof(cooked->format) == sizeof(raw->h.format));
364 memset(raw->pad, 0, sizeof(raw->pad));
365 memcpy(raw->h.format, cooked->format, sizeof(raw->h.format));
366 journal_pos_encode(&raw->h.begin, &cooked->begin);
367 journal_pos_encode(&raw->h.end, &cooked->end);
368 encode_uint32(cooked->index_size, raw->h.index_size);
372 * Journal file I/O subroutines, with error checking and reporting.
375 journal_seek(dns_journal_t *j, isc_uint32_t offset) {
377 result = isc_stdio_seek(j->fp, (long)offset, SEEK_SET);
378 if (result != ISC_R_SUCCESS) {
379 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
380 "%s: seek: %s", j->filename,
381 isc_result_totext(result));
382 return (ISC_R_UNEXPECTED);
385 return (ISC_R_SUCCESS);
389 journal_read(dns_journal_t *j, void *mem, size_t nbytes) {
392 result = isc_stdio_read(mem, 1, nbytes, j->fp, NULL);
393 if (result != ISC_R_SUCCESS) {
394 if (result == ISC_R_EOF)
395 return (ISC_R_NOMORE);
396 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
398 j->filename, isc_result_totext(result));
399 return (ISC_R_UNEXPECTED);
402 return (ISC_R_SUCCESS);
406 journal_write(dns_journal_t *j, void *mem, size_t nbytes) {
409 result = isc_stdio_write(mem, 1, nbytes, j->fp, NULL);
410 if (result != ISC_R_SUCCESS) {
411 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
413 j->filename, isc_result_totext(result));
414 return (ISC_R_UNEXPECTED);
417 return (ISC_R_SUCCESS);
421 journal_fsync(dns_journal_t *j) {
423 result = isc_stdio_flush(j->fp);
424 if (result != ISC_R_SUCCESS) {
425 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
427 j->filename, isc_result_totext(result));
428 return (ISC_R_UNEXPECTED);
430 result = isc_stdio_sync(j->fp);
431 if (result != ISC_R_SUCCESS) {
432 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
434 j->filename, isc_result_totext(result));
435 return (ISC_R_UNEXPECTED);
437 return (ISC_R_SUCCESS);
441 * Read/write a transaction header at the current file position.
445 journal_read_xhdr(dns_journal_t *j, journal_xhdr_t *xhdr) {
446 journal_rawxhdr_t raw;
448 result = journal_read(j, &raw, sizeof(raw));
449 if (result != ISC_R_SUCCESS)
451 xhdr->size = decode_uint32(raw.size);
452 xhdr->serial0 = decode_uint32(raw.serial0);
453 xhdr->serial1 = decode_uint32(raw.serial1);
454 return (ISC_R_SUCCESS);
458 journal_write_xhdr(dns_journal_t *j, isc_uint32_t size,
459 isc_uint32_t serial0, isc_uint32_t serial1)
461 journal_rawxhdr_t raw;
462 encode_uint32(size, raw.size);
463 encode_uint32(serial0, raw.serial0);
464 encode_uint32(serial1, raw.serial1);
465 return (journal_write(j, &raw, sizeof(raw)));
470 * Read an RR header at the current file position.
474 journal_read_rrhdr(dns_journal_t *j, journal_rrhdr_t *rrhdr) {
475 journal_rawrrhdr_t raw;
477 result = journal_read(j, &raw, sizeof(raw));
478 if (result != ISC_R_SUCCESS)
480 rrhdr->size = decode_uint32(raw.size);
481 return (ISC_R_SUCCESS);
485 journal_file_create(isc_mem_t *mctx, const char *filename) {
488 journal_header_t header;
489 journal_rawheader_t rawheader;
490 int index_size = 56; /* XXX configurable */
492 void *mem; /* Memory for temporary index image. */
494 INSIST(sizeof(journal_rawheader_t) == JOURNAL_HEADER_SIZE);
496 result = isc_stdio_open(filename, "wb", &fp);
497 if (result != ISC_R_SUCCESS) {
498 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
500 filename, isc_result_totext(result));
501 return (ISC_R_UNEXPECTED);
504 header = initial_journal_header;
505 header.index_size = index_size;
506 journal_header_encode(&header, &rawheader);
508 size = sizeof(journal_rawheader_t) +
509 index_size * sizeof(journal_rawpos_t);
511 mem = isc_mem_get(mctx, size);
513 (void)isc_stdio_close(fp);
514 (void)isc_file_remove(filename);
515 return (ISC_R_NOMEMORY);
517 memset(mem, 0, size);
518 memcpy(mem, &rawheader, sizeof(rawheader));
520 result = isc_stdio_write(mem, 1, (size_t) size, fp, NULL);
521 if (result != ISC_R_SUCCESS) {
522 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
524 filename, isc_result_totext(result));
525 (void)isc_stdio_close(fp);
526 (void)isc_file_remove(filename);
527 isc_mem_put(mctx, mem, size);
528 return (ISC_R_UNEXPECTED);
530 isc_mem_put(mctx, mem, size);
532 result = isc_stdio_close(fp);
533 if (result != ISC_R_SUCCESS) {
534 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
536 filename, isc_result_totext(result));
537 (void)isc_file_remove(filename);
538 return (ISC_R_UNEXPECTED);
541 return (ISC_R_SUCCESS);
545 journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write,
546 isc_boolean_t create, dns_journal_t **journalp) {
549 journal_rawheader_t rawheader;
552 INSIST(journalp != NULL && *journalp == NULL);
553 j = isc_mem_get(mctx, sizeof(*j));
555 return (ISC_R_NOMEMORY);
558 j->state = JOURNAL_STATE_INVALID;
560 j->filename = filename;
564 result = isc_stdio_open(j->filename, write ? "rb+" : "rb", &fp);
566 if (result == ISC_R_FILENOTFOUND) {
568 isc_log_write(JOURNAL_COMMON_LOGARGS,
570 "journal file %s does not exist, "
573 CHECK(journal_file_create(mctx, filename));
577 result = isc_stdio_open(j->filename, "rb+", &fp);
579 FAIL(ISC_R_NOTFOUND);
582 if (result != ISC_R_SUCCESS) {
583 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
585 j->filename, isc_result_totext(result));
586 FAIL(ISC_R_UNEXPECTED);
592 * Set magic early so that seek/read can succeed.
594 j->magic = DNS_JOURNAL_MAGIC;
596 CHECK(journal_seek(j, 0));
597 CHECK(journal_read(j, &rawheader, sizeof(rawheader)));
599 if (memcmp(rawheader.h.format, initial_journal_header.format,
600 sizeof(initial_journal_header.format)) != 0) {
601 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
602 "%s: journal format not recognized",
604 FAIL(ISC_R_UNEXPECTED);
606 journal_header_decode(&rawheader, &j->header);
609 * If there is an index, read the raw index into a dynamically
610 * allocated buffer and then convert it into a cooked index.
612 if (j->header.index_size != 0) {
614 unsigned int rawbytes;
617 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
618 j->rawindex = isc_mem_get(mctx, rawbytes);
619 if (j->rawindex == NULL)
620 FAIL(ISC_R_NOMEMORY);
622 CHECK(journal_read(j, j->rawindex, rawbytes));
624 j->index = isc_mem_get(mctx, j->header.index_size *
625 sizeof(journal_pos_t));
626 if (j->index == NULL)
627 FAIL(ISC_R_NOMEMORY);
630 for (i = 0; i < j->header.index_size; i++) {
631 j->index[i].serial = decode_uint32(p);
633 j->index[i].offset = decode_uint32(p);
636 INSIST(p == j->rawindex + rawbytes);
638 j->offset = -1; /* Invalid, must seek explicitly. */
641 * Initialize the iterator.
643 dns_name_init(&j->it.name, NULL);
644 dns_rdata_init(&j->it.rdata);
647 * Set up empty initial buffers for uncheched and checked
648 * wire format RR data. They will be reallocated
651 isc_buffer_init(&j->it.source, NULL, 0);
652 isc_buffer_init(&j->it.target, NULL, 0);
653 dns_decompress_init(&j->it.dctx, -1, DNS_DECOMPRESS_NONE);
656 write ? JOURNAL_STATE_WRITE : JOURNAL_STATE_READ;
659 return (ISC_R_SUCCESS);
663 if (j->index != NULL) {
664 isc_mem_put(j->mctx, j->index, j->header.index_size *
665 sizeof(journal_rawpos_t));
669 (void)isc_stdio_close(j->fp);
670 isc_mem_put(j->mctx, j, sizeof(*j));
675 dns_journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write,
676 dns_journal_t **journalp) {
677 return (journal_open(mctx, filename, write, write, journalp));
681 * A comparison function defining the sorting order for
682 * entries in the IXFR-style journal file.
684 * The IXFR format requires that deletions are sorted before
685 * additions, and within either one, SOA records are sorted
688 * Also sort the non-SOA records by type as a courtesy to the
689 * server receiving the IXFR - it may help reduce the amount of
690 * rdataset merging it has to do.
693 ixfr_order(const void *av, const void *bv) {
694 dns_difftuple_t const * const *ap = av;
695 dns_difftuple_t const * const *bp = bv;
696 dns_difftuple_t const *a = *ap;
697 dns_difftuple_t const *b = *bp;
700 r = (b->op == DNS_DIFFOP_DEL) - (a->op == DNS_DIFFOP_DEL);
704 r = (b->rdata.type == dns_rdatatype_soa) -
705 (a->rdata.type == dns_rdatatype_soa);
709 r = (a->rdata.type - b->rdata.type);
714 * Advance '*pos' to the next journal transaction.
717 * *pos refers to a valid journal transaction.
720 * When ISC_R_SUCCESS is returned,
721 * *pos refers to the next journal transaction.
726 * ISC_R_NOMORE *pos pointed at the last transaction
727 * Other results due to file errors are possible.
730 journal_next(dns_journal_t *j, journal_pos_t *pos) {
733 REQUIRE(DNS_JOURNAL_VALID(j));
735 result = journal_seek(j, pos->offset);
736 if (result != ISC_R_SUCCESS)
739 if (pos->serial == j->header.end.serial)
740 return (ISC_R_NOMORE);
742 * Read the header of the current transaction.
743 * This will return ISC_R_NOMORE if we are at EOF.
745 result = journal_read_xhdr(j, &xhdr);
746 if (result != ISC_R_SUCCESS)
750 * Check serial number consistency.
752 if (xhdr.serial0 != pos->serial) {
753 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
754 "%s: journal file corrupt: "
755 "expected serial %u, got %u",
756 j->filename, pos->serial, xhdr.serial0);
757 return (ISC_R_UNEXPECTED);
761 * Check for offset wraparound.
763 if ((isc_offset_t)(pos->offset + sizeof(journal_rawxhdr_t) + xhdr.size)
765 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
766 "%s: offset too large", j->filename);
767 return (ISC_R_UNEXPECTED);
770 pos->offset += sizeof(journal_rawxhdr_t) + xhdr.size;
771 pos->serial = xhdr.serial1;
772 return (ISC_R_SUCCESS);
776 * If the index of the journal 'j' contains an entry "better"
777 * than '*best_guess', replace '*best_guess' with it.
779 * "Better" means having a serial number closer to 'serial'
780 * but not greater than 'serial'.
783 index_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *best_guess) {
785 if (j->index == NULL)
787 for (i = 0; i < j->header.index_size; i++) {
788 if (POS_VALID(j->index[i]) &&
789 DNS_SERIAL_GE(serial, j->index[i].serial) &&
790 DNS_SERIAL_GT(j->index[i].serial, best_guess->serial))
791 *best_guess = j->index[i];
796 * Add a new index entry. If there is no room, make room by removing
797 * the odd-numbered entries and compacting the others into the first
798 * half of the index. This decimates old index entries exponentially
799 * over time, so that the index always contains a much larger fraction
800 * of recent serial numbers than of old ones. This is deliberate -
801 * most index searches are for outgoing IXFR, and IXFR tends to request
802 * recent versions more often than old ones.
805 index_add(dns_journal_t *j, journal_pos_t *pos) {
807 if (j->index == NULL)
810 * Search for a vacant position.
812 for (i = 0; i < j->header.index_size; i++) {
813 if (! POS_VALID(j->index[i]))
816 if (i == j->header.index_size) {
819 * Found no vacant position. Make some room.
821 for (i = 0; i < j->header.index_size; i += 2) {
822 j->index[k++] = j->index[i];
824 i = k; /* 'i' identifies the first vacant position. */
825 while (k < j->header.index_size) {
826 POS_INVALIDATE(j->index[k]);
830 INSIST(i < j->header.index_size);
831 INSIST(! POS_VALID(j->index[i]));
834 * Store the new index entry.
840 * Invalidate any existing index entries that could become
841 * ambiguous when a new transaction with number 'serial' is added.
844 index_invalidate(dns_journal_t *j, isc_uint32_t serial) {
846 if (j->index == NULL)
848 for (i = 0; i < j->header.index_size; i++) {
849 if (! DNS_SERIAL_GT(serial, j->index[i].serial))
850 POS_INVALIDATE(j->index[i]);
855 * Try to find a transaction with initial serial number 'serial'
856 * in the journal 'j'.
858 * If found, store its position at '*pos' and return ISC_R_SUCCESS.
860 * If 'serial' is current (= the ending serial number of the
861 * last transaction in the journal), set '*pos' to
862 * the position immediately following the last transaction and
863 * return ISC_R_SUCCESS.
865 * If 'serial' is within the range of addressable serial numbers
866 * covered by the journal but that particular serial number is missing
867 * (from the journal, not just from the index), return ISC_R_NOTFOUND.
869 * If 'serial' is outside the range of addressable serial numbers
870 * covered by the journal, return ISC_R_RANGE.
874 journal_find(dns_journal_t *j, isc_uint32_t serial, journal_pos_t *pos) {
876 journal_pos_t current_pos;
877 REQUIRE(DNS_JOURNAL_VALID(j));
879 if (DNS_SERIAL_GT(j->header.begin.serial, serial))
880 return (ISC_R_RANGE);
881 if (DNS_SERIAL_GT(serial, j->header.end.serial))
882 return (ISC_R_RANGE);
883 if (serial == j->header.end.serial) {
884 *pos = j->header.end;
885 return (ISC_R_SUCCESS);
888 current_pos = j->header.begin;
889 index_find(j, serial, ¤t_pos);
891 while (current_pos.serial != serial) {
892 if (DNS_SERIAL_GT(current_pos.serial, serial))
893 return (ISC_R_NOTFOUND);
894 result = journal_next(j, ¤t_pos);
895 if (result != ISC_R_SUCCESS)
899 return (ISC_R_SUCCESS);
903 dns_journal_begin_transaction(dns_journal_t *j) {
906 journal_rawxhdr_t hdr;
908 REQUIRE(DNS_JOURNAL_VALID(j));
909 REQUIRE(j->state == JOURNAL_STATE_WRITE);
912 * Find the file offset where the new transaction should
913 * be written, and seek there.
915 if (JOURNAL_EMPTY(&j->header)) {
916 offset = sizeof(journal_rawheader_t) +
917 j->header.index_size * sizeof(journal_rawpos_t);
919 offset = j->header.end.offset;
921 j->x.pos[0].offset = offset;
922 j->x.pos[1].offset = offset; /* Initial value, will be incremented. */
925 CHECK(journal_seek(j, offset));
928 * Write a dummy transaction header of all zeroes to reserve
929 * space. It will be filled in when the transaction is
932 memset(&hdr, 0, sizeof(hdr));
933 CHECK(journal_write(j, &hdr, sizeof(hdr)));
934 j->x.pos[1].offset = j->offset;
936 j->state = JOURNAL_STATE_TRANSACTION;
937 result = ISC_R_SUCCESS;
943 dns_journal_writediff(dns_journal_t *j, dns_diff_t *diff) {
951 REQUIRE(DNS_DIFF_VALID(diff));
952 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
954 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
955 (void)dns_diff_print(diff, NULL);
958 * Pass 1: determine the buffer size needed, and
959 * keep track of SOA serial numbers.
962 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
963 t = ISC_LIST_NEXT(t, link))
965 if (t->rdata.type == dns_rdatatype_soa) {
967 j->x.pos[j->x.n_soa].serial =
968 dns_soa_getserial(&t->rdata);
971 size += sizeof(journal_rawrrhdr_t);
972 size += t->name.length; /* XXX should have access macro? */
974 size += t->rdata.length;
977 mem = isc_mem_get(j->mctx, size);
979 return (ISC_R_NOMEMORY);
981 isc_buffer_init(&buffer, mem, size);
984 * Pass 2. Write RRs to buffer.
986 for (t = ISC_LIST_HEAD(diff->tuples); t != NULL;
987 t = ISC_LIST_NEXT(t, link))
990 * Write the RR header.
992 isc_buffer_putuint32(&buffer, t->name.length + 10 +
995 * Write the owner name, RR header, and RR data.
997 isc_buffer_putmem(&buffer, t->name.ndata, t->name.length);
998 isc_buffer_putuint16(&buffer, t->rdata.type);
999 isc_buffer_putuint16(&buffer, t->rdata.rdclass);
1000 isc_buffer_putuint32(&buffer, t->ttl);
1001 INSIST(t->rdata.length < 65536);
1002 isc_buffer_putuint16(&buffer, (isc_uint16_t)t->rdata.length);
1003 INSIST(isc_buffer_availablelength(&buffer) >= t->rdata.length);
1004 isc_buffer_putmem(&buffer, t->rdata.data, t->rdata.length);
1007 isc_buffer_usedregion(&buffer, &used);
1008 INSIST(used.length == size);
1010 j->x.pos[1].offset += used.length;
1013 * Write the buffer contents to the journal file.
1015 CHECK(journal_write(j, used.base, used.length));
1017 result = ISC_R_SUCCESS;
1021 isc_mem_put(j->mctx, mem, size);
1027 dns_journal_commit(dns_journal_t *j) {
1028 isc_result_t result;
1029 journal_rawheader_t rawheader;
1031 REQUIRE(DNS_JOURNAL_VALID(j));
1032 REQUIRE(j->state == JOURNAL_STATE_TRANSACTION);
1035 * Perform some basic consistency checks.
1037 if (j->x.n_soa != 2) {
1038 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1039 "%s: malformed transaction: %d SOAs",
1040 j->filename, j->x.n_soa);
1041 return (ISC_R_UNEXPECTED);
1043 if (! (DNS_SERIAL_GT(j->x.pos[1].serial, j->x.pos[0].serial) ||
1045 j->x.pos[1].serial == j->x.pos[0].serial)))
1047 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1048 "%s: malformed transaction: serial number "
1049 "would decrease", j->filename);
1050 return (ISC_R_UNEXPECTED);
1052 if (! JOURNAL_EMPTY(&j->header)) {
1053 if (j->x.pos[0].serial != j->header.end.serial) {
1054 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1055 "malformed transaction: "
1056 "%s last serial %u != "
1057 "transaction first serial %u",
1059 j->header.end.serial,
1060 j->x.pos[0].serial);
1061 return (ISC_R_UNEXPECTED);
1066 * Some old journal entries may become non-addressable
1067 * when we increment the current serial number. Purge them
1068 * by stepping header.begin forward to the first addressable
1069 * transaction. Also purge them from the index.
1071 if (! JOURNAL_EMPTY(&j->header)) {
1072 while (! DNS_SERIAL_GT(j->x.pos[1].serial,
1073 j->header.begin.serial)) {
1074 CHECK(journal_next(j, &j->header.begin));
1076 index_invalidate(j, j->x.pos[1].serial);
1079 if (DNS_SERIAL_GT(last_dumped_serial, j->x.pos[1].serial)) {
1085 * Commit the transaction data to stable storage.
1087 CHECK(journal_fsync(j));
1090 * Update the transaction header.
1092 CHECK(journal_seek(j, j->x.pos[0].offset));
1093 CHECK(journal_write_xhdr(j, (j->x.pos[1].offset - j->x.pos[0].offset) -
1094 sizeof(journal_rawxhdr_t),
1095 j->x.pos[0].serial, j->x.pos[1].serial));
1098 * Update the journal header.
1100 if (JOURNAL_EMPTY(&j->header)) {
1101 j->header.begin = j->x.pos[0];
1103 j->header.end = j->x.pos[1];
1104 journal_header_encode(&j->header, &rawheader);
1105 CHECK(journal_seek(j, 0));
1106 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
1111 index_add(j, &j->x.pos[0]);
1114 * Convert the index into on-disk format and write
1117 CHECK(index_to_disk(j));
1120 * Commit the header to stable storage.
1122 CHECK(journal_fsync(j));
1125 * We no longer have a transaction open.
1127 j->state = JOURNAL_STATE_WRITE;
1129 result = ISC_R_SUCCESS;
1136 dns_journal_write_transaction(dns_journal_t *j, dns_diff_t *diff) {
1137 isc_result_t result;
1138 CHECK(dns_diff_sort(diff, ixfr_order));
1139 CHECK(dns_journal_begin_transaction(j));
1140 CHECK(dns_journal_writediff(j, diff));
1141 CHECK(dns_journal_commit(j));
1142 result = ISC_R_SUCCESS;
1148 dns_journal_destroy(dns_journal_t **journalp) {
1149 dns_journal_t *j = *journalp;
1150 REQUIRE(DNS_JOURNAL_VALID(j));
1152 j->it.result = ISC_R_FAILURE;
1153 dns_name_invalidate(&j->it.name);
1154 dns_decompress_invalidate(&j->it.dctx);
1155 if (j->rawindex != NULL)
1156 isc_mem_put(j->mctx, j->rawindex, j->header.index_size *
1157 sizeof(journal_rawpos_t));
1158 if (j->index != NULL)
1159 isc_mem_put(j->mctx, j->index, j->header.index_size *
1160 sizeof(journal_pos_t));
1161 if (j->it.target.base != NULL)
1162 isc_mem_put(j->mctx, j->it.target.base, j->it.target.length);
1163 if (j->it.source.base != NULL)
1164 isc_mem_put(j->mctx, j->it.source.base, j->it.source.length);
1167 (void)isc_stdio_close(j->fp);
1169 isc_mem_put(j->mctx, j, sizeof(*j));
1174 * Roll the open journal 'j' into the database 'db'.
1175 * A new database version will be created.
1178 /* XXX Share code with incoming IXFR? */
1181 roll_forward(dns_journal_t *j, dns_db_t *db) {
1182 isc_buffer_t source; /* Transaction data from disk */
1183 isc_buffer_t target; /* Ditto after _fromwire check */
1184 isc_uint32_t db_serial; /* Database SOA serial */
1185 isc_uint32_t end_serial; /* Last journal SOA serial */
1186 isc_result_t result;
1187 dns_dbversion_t *ver = NULL;
1190 unsigned int n_soa = 0;
1191 unsigned int n_put = 0;
1193 REQUIRE(DNS_JOURNAL_VALID(j));
1194 REQUIRE(DNS_DB_VALID(db));
1196 dns_diff_init(j->mctx, &diff);
1199 * Set up empty initial buffers for uncheched and checked
1200 * wire format transaction data. They will be reallocated
1203 isc_buffer_init(&source, NULL, 0);
1204 isc_buffer_init(&target, NULL, 0);
1207 * Create the new database version.
1209 CHECK(dns_db_newversion(db, &ver));
1212 * Get the current database SOA serial number.
1214 CHECK(dns_db_getsoaserial(db, ver, &db_serial));
1217 * Locate a journal entry for the current database serial.
1219 CHECK(journal_find(j, db_serial, &pos));
1221 * XXX do more drastic things, like marking zone stale,
1225 * XXXRTH The zone code should probably mark the zone as bad and
1226 * scream loudly into the log if this is a dynamic update
1227 * log reply that failed.
1230 end_serial = dns_journal_last_serial(j);
1231 if (db_serial == end_serial)
1232 CHECK(DNS_R_UPTODATE);
1234 CHECK(dns_journal_iter_init(j, db_serial, end_serial));
1236 for (result = dns_journal_first_rr(j);
1237 result == ISC_R_SUCCESS;
1238 result = dns_journal_next_rr(j))
1243 dns_difftuple_t *tuple = NULL;
1247 dns_journal_current_rr(j, &name, &ttl, &rdata);
1249 if (rdata->type == dns_rdatatype_soa) {
1252 db_serial = j->it.current_serial;
1258 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1259 "%s: journal file corrupt: missing "
1260 "initial SOA", j->filename);
1261 FAIL(ISC_R_UNEXPECTED);
1263 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ?
1264 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
1265 name, ttl, rdata, &tuple));
1266 dns_diff_append(&diff, &tuple);
1268 if (++n_put > 100) {
1269 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1270 "%s: applying diff to database (%u)",
1271 j->filename, db_serial);
1272 (void)dns_diff_print(&diff, NULL);
1273 CHECK(dns_diff_apply(&diff, db, ver));
1274 dns_diff_clear(&diff);
1278 if (result == ISC_R_NOMORE)
1279 result = ISC_R_SUCCESS;
1283 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1284 "%s: applying final diff to database (%u)",
1285 j->filename, db_serial);
1286 (void)dns_diff_print(&diff, NULL);
1287 CHECK(dns_diff_apply(&diff, db, ver));
1288 dns_diff_clear(&diff);
1293 dns_db_closeversion(db, &ver, result == ISC_R_SUCCESS ?
1294 ISC_TRUE : ISC_FALSE);
1296 if (source.base != NULL)
1297 isc_mem_put(j->mctx, source.base, source.length);
1298 if (target.base != NULL)
1299 isc_mem_put(j->mctx, target.base, target.length);
1301 dns_diff_clear(&diff);
1307 dns_journal_rollforward(isc_mem_t *mctx, dns_db_t *db, const char *filename) {
1309 isc_result_t result;
1311 REQUIRE(DNS_DB_VALID(db));
1312 REQUIRE(filename != NULL);
1315 result = dns_journal_open(mctx, filename, ISC_FALSE, &j);
1316 if (result == ISC_R_NOTFOUND) {
1317 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1318 "no journal file, but that's OK");
1319 return (DNS_R_NOJOURNAL);
1321 if (result != ISC_R_SUCCESS)
1323 if (JOURNAL_EMPTY(&j->header))
1324 result = DNS_R_UPTODATE;
1326 result = roll_forward(j, db);
1328 dns_journal_destroy(&j);
1334 dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) {
1336 isc_buffer_t source; /* Transaction data from disk */
1337 isc_buffer_t target; /* Ditto after _fromwire check */
1338 isc_uint32_t start_serial; /* Database SOA serial */
1339 isc_uint32_t end_serial; /* Last journal SOA serial */
1340 isc_result_t result;
1342 unsigned int n_soa = 0;
1343 unsigned int n_put = 0;
1345 REQUIRE(filename != NULL);
1348 result = dns_journal_open(mctx, filename, ISC_FALSE, &j);
1349 if (result == ISC_R_NOTFOUND) {
1350 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
1351 return (DNS_R_NOJOURNAL);
1354 if (result != ISC_R_SUCCESS) {
1355 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1356 "journal open failure: %s: %s",
1357 isc_result_totext(result), j->filename);
1361 dns_diff_init(j->mctx, &diff);
1364 * Set up empty initial buffers for uncheched and checked
1365 * wire format transaction data. They will be reallocated
1368 isc_buffer_init(&source, NULL, 0);
1369 isc_buffer_init(&target, NULL, 0);
1371 start_serial = dns_journal_first_serial(j);
1372 end_serial = dns_journal_last_serial(j);
1374 CHECK(dns_journal_iter_init(j, start_serial, end_serial));
1376 for (result = dns_journal_first_rr(j);
1377 result == ISC_R_SUCCESS;
1378 result = dns_journal_next_rr(j))
1383 dns_difftuple_t *tuple = NULL;
1387 dns_journal_current_rr(j, &name, &ttl, &rdata);
1389 if (rdata->type == dns_rdatatype_soa)
1395 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1396 "%s: journal file corrupt: missing "
1397 "initial SOA", j->filename);
1398 FAIL(ISC_R_UNEXPECTED);
1400 CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ?
1401 DNS_DIFFOP_DEL : DNS_DIFFOP_ADD,
1402 name, ttl, rdata, &tuple));
1403 dns_diff_append(&diff, &tuple);
1405 if (++n_put > 100) {
1406 result = dns_diff_print(&diff, file);
1407 dns_diff_clear(&diff);
1409 if (result != ISC_R_SUCCESS)
1413 if (result == ISC_R_NOMORE)
1414 result = ISC_R_SUCCESS;
1418 result = dns_diff_print(&diff, file);
1419 dns_diff_clear(&diff);
1424 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1425 "%s: cannot print: journal file corrupt", j->filename);
1428 if (source.base != NULL)
1429 isc_mem_put(j->mctx, source.base, source.length);
1430 if (target.base != NULL)
1431 isc_mem_put(j->mctx, target.base, target.length);
1433 dns_diff_clear(&diff);
1434 dns_journal_destroy(&j);
1439 /**************************************************************************/
1441 * Miscellaneous accessors.
1443 isc_uint32_t dns_journal_first_serial(dns_journal_t *j) {
1444 return (j->header.begin.serial);
1447 isc_uint32_t dns_journal_last_serial(dns_journal_t *j) {
1448 return (j->header.end.serial);
1451 /**************************************************************************/
1453 * Iteration support.
1455 * When serving an outgoing IXFR, we transmit a part the journal starting
1456 * at the serial number in the IXFR request and ending at the serial
1457 * number that is current when the IXFR request arrives. The ending
1458 * serial number is not necessarily at the end of the journal:
1459 * the journal may grow while the IXFR is in progress, but we stop
1460 * when we reach the serial number that was current when the IXFR started.
1463 static isc_result_t read_one_rr(dns_journal_t *j);
1466 * Make sure the buffer 'b' is has at least 'size' bytes
1467 * allocated, and clear it.
1470 * Either b->base is NULL, or it points to b->length bytes of memory
1471 * previously allocated by isc_mem_get().
1475 size_buffer(isc_mem_t *mctx, isc_buffer_t *b, unsigned size) {
1476 if (b->length < size) {
1477 void *mem = isc_mem_get(mctx, size);
1479 return (ISC_R_NOMEMORY);
1480 if (b->base != NULL)
1481 isc_mem_put(mctx, b->base, b->length);
1485 isc_buffer_clear(b);
1486 return (ISC_R_SUCCESS);
1490 dns_journal_iter_init(dns_journal_t *j,
1491 isc_uint32_t begin_serial, isc_uint32_t end_serial)
1493 isc_result_t result;
1495 CHECK(journal_find(j, begin_serial, &j->it.bpos));
1496 INSIST(j->it.bpos.serial == begin_serial);
1498 CHECK(journal_find(j, end_serial, &j->it.epos));
1499 INSIST(j->it.epos.serial == end_serial);
1501 result = ISC_R_SUCCESS;
1503 j->it.result = result;
1504 return (j->it.result);
1509 dns_journal_first_rr(dns_journal_t *j) {
1510 isc_result_t result;
1513 * Seek to the beginning of the first transaction we are
1516 CHECK(journal_seek(j, j->it.bpos.offset));
1517 j->it.current_serial = j->it.bpos.serial;
1519 j->it.xsize = 0; /* We have no transaction data yet... */
1520 j->it.xpos = 0; /* ...and haven't used any of it. */
1522 return (read_one_rr(j));
1529 read_one_rr(dns_journal_t *j) {
1530 isc_result_t result;
1532 dns_rdatatype_t rdtype;
1533 dns_rdataclass_t rdclass;
1536 journal_xhdr_t xhdr;
1537 journal_rrhdr_t rrhdr;
1539 INSIST(j->offset <= j->it.epos.offset);
1540 if (j->offset == j->it.epos.offset)
1541 return (ISC_R_NOMORE);
1542 if (j->it.xpos == j->it.xsize) {
1544 * We are at a transaction boundary.
1545 * Read another transaction header.
1547 CHECK(journal_read_xhdr(j, &xhdr));
1548 if (xhdr.size == 0) {
1549 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1550 "%s: journal corrupt: empty transaction",
1552 FAIL(ISC_R_UNEXPECTED);
1554 if (xhdr.serial0 != j->it.current_serial) {
1555 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1556 "%s: journal file corrupt: "
1557 "expected serial %u, got %u",
1559 j->it.current_serial, xhdr.serial0);
1560 FAIL(ISC_R_UNEXPECTED);
1562 j->it.xsize = xhdr.size;
1568 CHECK(journal_read_rrhdr(j, &rrhdr));
1570 * Perform a sanity check on the journal RR size.
1571 * The smallest possible RR has a 1-byte owner name
1572 * and a 10-byte header. The largest possible
1573 * RR has 65535 bytes of data, a header, and a maximum-
1574 * size owner name, well below 70 k total.
1576 if (rrhdr.size < 1+10 || rrhdr.size > 70000) {
1577 isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR,
1578 "%s: journal corrupt: impossible RR size "
1579 "(%d bytes)", j->filename, rrhdr.size);
1580 FAIL(ISC_R_UNEXPECTED);
1583 CHECK(size_buffer(j->mctx, &j->it.source, rrhdr.size));
1584 CHECK(journal_read(j, j->it.source.base, rrhdr.size));
1585 isc_buffer_add(&j->it.source, rrhdr.size);
1588 * The target buffer is made the same size
1589 * as the source buffer, with the assumption that when
1590 * no compression in present, the output of dns_*_fromwire()
1591 * is no larger than the input.
1593 CHECK(size_buffer(j->mctx, &j->it.target, rrhdr.size));
1596 * Parse the owner name. We don't know where it
1597 * ends yet, so we make the entire "remaining"
1598 * part of the buffer "active".
1600 isc_buffer_setactive(&j->it.source,
1601 j->it.source.used - j->it.source.current);
1602 CHECK(dns_name_fromwire(&j->it.name, &j->it.source,
1603 &j->it.dctx, 0, &j->it.target));
1606 * Check that the RR header is there, and parse it.
1608 if (isc_buffer_remaininglength(&j->it.source) < 10)
1609 FAIL(DNS_R_FORMERR);
1611 rdtype = isc_buffer_getuint16(&j->it.source);
1612 rdclass = isc_buffer_getuint16(&j->it.source);
1613 ttl = isc_buffer_getuint32(&j->it.source);
1614 rdlen = isc_buffer_getuint16(&j->it.source);
1619 isc_buffer_setactive(&j->it.source, rdlen);
1620 dns_rdata_reset(&j->it.rdata);
1621 CHECK(dns_rdata_fromwire(&j->it.rdata, rdclass,
1622 rdtype, &j->it.source, &j->it.dctx,
1626 j->it.xpos += sizeof(journal_rawrrhdr_t) + rrhdr.size;
1627 if (rdtype == dns_rdatatype_soa) {
1628 /* XXX could do additional consistency checks here */
1629 j->it.current_serial = dns_soa_getserial(&j->it.rdata);
1632 result = ISC_R_SUCCESS;
1635 j->it.result = result;
1640 dns_journal_next_rr(dns_journal_t *j) {
1641 j->it.result = read_one_rr(j);
1642 return (j->it.result);
1646 dns_journal_current_rr(dns_journal_t *j, dns_name_t **name, isc_uint32_t *ttl,
1647 dns_rdata_t **rdata)
1649 REQUIRE(j->it.result == ISC_R_SUCCESS);
1650 *name = &j->it.name;
1652 *rdata = &j->it.rdata;
1655 /**************************************************************************/
1657 * Generating diffs from databases
1661 * Construct a diff containing all the RRs at the current name of the
1662 * database iterator 'dbit' in database 'db', version 'ver'.
1663 * Set '*name' to the current name, and append the diff to 'diff'.
1664 * All new tuples will have the operation 'op'.
1666 * Requires: 'name' must have buffer large enough to hold the name.
1667 * Typically, a dns_fixedname_t would be used.
1670 get_name_diff(dns_db_t *db, dns_dbversion_t *ver, isc_stdtime_t now,
1671 dns_dbiterator_t *dbit, dns_name_t *name, dns_diffop_t op,
1674 isc_result_t result;
1675 dns_dbnode_t *node = NULL;
1676 dns_rdatasetiter_t *rdsiter = NULL;
1677 dns_difftuple_t *tuple = NULL;
1679 result = dns_dbiterator_current(dbit, &node, name);
1680 if (result != ISC_R_SUCCESS)
1683 result = dns_db_allrdatasets(db, node, ver, now, &rdsiter);
1684 if (result != ISC_R_SUCCESS)
1687 for (result = dns_rdatasetiter_first(rdsiter);
1688 result == ISC_R_SUCCESS;
1689 result = dns_rdatasetiter_next(rdsiter))
1691 dns_rdataset_t rdataset;
1693 dns_rdataset_init(&rdataset);
1694 dns_rdatasetiter_current(rdsiter, &rdataset);
1696 for (result = dns_rdataset_first(&rdataset);
1697 result == ISC_R_SUCCESS;
1698 result = dns_rdataset_next(&rdataset))
1700 dns_rdata_t rdata = DNS_RDATA_INIT;
1701 dns_rdataset_current(&rdataset, &rdata);
1702 result = dns_difftuple_create(diff->mctx, op, name,
1703 rdataset.ttl, &rdata,
1705 if (result != ISC_R_SUCCESS) {
1706 dns_rdataset_disassociate(&rdataset);
1707 goto cleanup_iterator;
1709 dns_diff_append(diff, &tuple);
1711 dns_rdataset_disassociate(&rdataset);
1712 if (result != ISC_R_NOMORE)
1713 goto cleanup_iterator;
1715 if (result != ISC_R_NOMORE)
1716 goto cleanup_iterator;
1718 result = ISC_R_SUCCESS;
1721 dns_rdatasetiter_destroy(&rdsiter);
1724 dns_db_detachnode(db, &node);
1730 * Comparison function for use by dns_diff_subtract when sorting
1731 * the diffs to be subtracted. The sort keys are the rdata type
1732 * and the rdata itself. The owner name is ignored, because
1733 * it is known to be the same for all tuples.
1736 rdata_order(const void *av, const void *bv) {
1737 dns_difftuple_t const * const *ap = av;
1738 dns_difftuple_t const * const *bp = bv;
1739 dns_difftuple_t const *a = *ap;
1740 dns_difftuple_t const *b = *bp;
1742 r = (b->rdata.type - a->rdata.type);
1745 r = dns_rdata_compare(&a->rdata, &b->rdata);
1750 dns_diff_subtract(dns_diff_t diff[2], dns_diff_t *r) {
1751 isc_result_t result;
1752 dns_difftuple_t *p[2];
1754 isc_boolean_t append;
1756 CHECK(dns_diff_sort(&diff[0], rdata_order));
1757 CHECK(dns_diff_sort(&diff[1], rdata_order));
1760 p[0] = ISC_LIST_HEAD(diff[0].tuples);
1761 p[1] = ISC_LIST_HEAD(diff[1].tuples);
1762 if (p[0] == NULL && p[1] == NULL)
1765 for (i = 0; i < 2; i++)
1766 if (p[!i] == NULL) {
1767 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
1768 ISC_LIST_APPEND(r->tuples, p[i], link);
1771 t = rdata_order(&p[0], &p[1]);
1773 ISC_LIST_UNLINK(diff[0].tuples, p[0], link);
1774 ISC_LIST_APPEND(r->tuples, p[0], link);
1778 ISC_LIST_UNLINK(diff[1].tuples, p[1], link);
1779 ISC_LIST_APPEND(r->tuples, p[1], link);
1784 * Identical RRs in both databases; skip them both
1785 * if the ttl differs.
1787 append = ISC_TF(p[0]->ttl != p[1]->ttl);
1788 for (i = 0; i < 2; i++) {
1789 ISC_LIST_UNLINK(diff[i].tuples, p[i], link);
1791 ISC_LIST_APPEND(r->tuples, p[i], link);
1793 dns_difftuple_free(&p[i]);
1798 result = ISC_R_SUCCESS;
1804 * Compare the databases 'dba' and 'dbb' and generate a journal
1805 * entry containing the changes to make 'dba' from 'dbb' (note
1806 * the order). This journal entry will consist of a single,
1807 * possibly very large transaction.
1811 dns_db_diff(isc_mem_t *mctx,
1812 dns_db_t *dba, dns_dbversion_t *dbvera,
1813 dns_db_t *dbb, dns_dbversion_t *dbverb,
1814 const char *journal_filename)
1817 dns_dbversion_t *ver[2];
1818 dns_dbiterator_t *dbit[2] = { NULL, NULL };
1819 isc_boolean_t have[2] = { ISC_FALSE, ISC_FALSE };
1820 dns_fixedname_t fixname[2];
1821 isc_result_t result, itresult[2];
1822 dns_diff_t diff[2], resultdiff;
1824 dns_journal_t *journal = NULL;
1826 db[0] = dba, db[1] = dbb;
1827 ver[0] = dbvera, ver[1] = dbverb;
1829 dns_diff_init(mctx, &diff[0]);
1830 dns_diff_init(mctx, &diff[1]);
1831 dns_diff_init(mctx, &resultdiff);
1833 dns_fixedname_init(&fixname[0]);
1834 dns_fixedname_init(&fixname[1]);
1836 result = dns_journal_open(mctx, journal_filename, ISC_TRUE, &journal);
1837 if (result != ISC_R_SUCCESS)
1840 result = dns_db_createiterator(db[0], ISC_FALSE, &dbit[0]);
1841 if (result != ISC_R_SUCCESS)
1842 goto cleanup_journal;
1843 result = dns_db_createiterator(db[1], ISC_FALSE, &dbit[1]);
1844 if (result != ISC_R_SUCCESS)
1845 goto cleanup_interator0;
1847 itresult[0] = dns_dbiterator_first(dbit[0]);
1848 itresult[1] = dns_dbiterator_first(dbit[1]);
1851 for (i = 0; i < 2; i++) {
1852 if (! have[i] && itresult[i] == ISC_R_SUCCESS) {
1853 CHECK(get_name_diff(db[i], ver[i], 0, dbit[i],
1854 dns_fixedname_name(&fixname[i]),
1859 itresult[i] = dns_dbiterator_next(dbit[i]);
1864 if (! have[0] && ! have[1]) {
1865 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
1866 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
1870 for (i = 0; i < 2; i++) {
1872 ISC_LIST_APPENDLIST(resultdiff.tuples,
1873 diff[i].tuples, link);
1874 INSIST(ISC_LIST_EMPTY(diff[i].tuples));
1875 have[i] = ISC_FALSE;
1880 t = dns_name_compare(dns_fixedname_name(&fixname[0]),
1881 dns_fixedname_name(&fixname[1]));
1883 ISC_LIST_APPENDLIST(resultdiff.tuples,
1884 diff[0].tuples, link);
1885 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
1886 have[0] = ISC_FALSE;
1890 ISC_LIST_APPENDLIST(resultdiff.tuples,
1891 diff[1].tuples, link);
1892 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
1893 have[1] = ISC_FALSE;
1897 CHECK(dns_diff_subtract(diff, &resultdiff));
1898 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
1899 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
1900 have[0] = have[1] = ISC_FALSE;
1903 if (itresult[0] != ISC_R_NOMORE)
1905 if (itresult[1] != ISC_R_NOMORE)
1908 if (ISC_LIST_EMPTY(resultdiff.tuples)) {
1909 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
1911 CHECK(dns_journal_write_transaction(journal, &resultdiff));
1913 INSIST(ISC_LIST_EMPTY(diff[0].tuples));
1914 INSIST(ISC_LIST_EMPTY(diff[1].tuples));
1917 dns_diff_clear(&resultdiff);
1918 dns_dbiterator_destroy(&dbit[1]);
1920 dns_dbiterator_destroy(&dbit[0]);
1922 dns_journal_destroy(&journal);
1927 dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial,
1928 isc_uint32_t target_size)
1931 journal_pos_t best_guess;
1932 journal_pos_t current_pos;
1933 dns_journal_t *j = NULL;
1934 journal_rawheader_t rawheader;
1935 unsigned int copy_length;
1938 unsigned int size = 0;
1939 isc_result_t result;
1940 unsigned int indexend;
1942 CHECK(journal_open(mctx, filename, ISC_TRUE, ISC_FALSE, &j));
1944 if (JOURNAL_EMPTY(&j->header)) {
1945 dns_journal_destroy(&j);
1946 return (ISC_R_SUCCESS);
1949 if (DNS_SERIAL_GT(j->header.begin.serial, serial) ||
1950 DNS_SERIAL_GT(serial, j->header.end.serial)) {
1951 dns_journal_destroy(&j);
1952 return (ISC_R_RANGE);
1956 * Cope with very small target sizes.
1958 indexend = sizeof(journal_rawheader_t) +
1959 j->header.index_size * sizeof(journal_rawpos_t);
1960 if (target_size < indexend * 2)
1961 target_size = target_size/2 + indexend;
1964 * See if there is any work to do.
1966 if ((isc_uint32_t) j->header.end.offset < target_size) {
1967 dns_journal_destroy(&j);
1968 return (ISC_R_SUCCESS);
1972 * Remove overhead so space test below can succeed.
1974 if (target_size >= indexend)
1975 target_size -= indexend;
1978 * Find if we can create enough free space.
1980 best_guess = j->header.begin;
1981 for (i = 0; i < j->header.index_size; i++) {
1982 if (POS_VALID(j->index[i]) &&
1983 DNS_SERIAL_GE(serial, j->index[i].serial) &&
1984 ((isc_uint32_t)(j->header.end.offset - j->index[i].offset)
1985 >= target_size / 2) &&
1986 j->index[i].offset > best_guess.offset)
1987 best_guess = j->index[i];
1990 current_pos = best_guess;
1991 while (current_pos.serial != serial) {
1992 CHECK(journal_next(j, ¤t_pos));
1993 if (current_pos.serial == j->header.end.serial)
1996 if (DNS_SERIAL_GE(serial, current_pos.serial) &&
1997 ((isc_uint32_t)(j->header.end.offset - current_pos.offset)
1998 >= (target_size / 2)) &&
1999 current_pos.offset > best_guess.offset)
2000 best_guess = current_pos;
2005 INSIST(best_guess.serial != j->header.end.serial);
2006 if (best_guess.serial != serial)
2007 CHECK(journal_next(j, &best_guess));
2010 * Enough space to proceed?
2012 if ((isc_uint32_t) (j->header.end.offset - best_guess.offset) >
2013 (isc_uint32_t) (best_guess.offset - indexend)) {
2014 dns_journal_destroy(&j);
2015 return (ISC_R_NOSPACE);
2018 copy_length = j->header.end.offset - best_guess.offset;
2021 * Invalidate entire index, will be rebuilt at end.
2023 for (i = 0; i < j->header.index_size; i++) {
2024 if (POS_VALID(j->index[i]))
2025 POS_INVALIDATE(j->index[i]);
2029 * Convert the index into on-disk format and write
2032 CHECK(index_to_disk(j));
2033 CHECK(journal_fsync(j));
2036 * Update the journal header.
2038 if (copy_length == 0) {
2039 j->header.begin.serial = 0;
2040 j->header.end.serial = 0;
2041 j->header.begin.offset = 0;
2042 j->header.end.offset = 0;
2044 j->header.begin = best_guess;
2046 journal_header_encode(&j->header, &rawheader);
2047 CHECK(journal_seek(j, 0));
2048 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
2049 CHECK(journal_fsync(j));
2051 if (copy_length != 0) {
2053 * Copy best_guess to end into space just freed.
2056 if (copy_length < size)
2058 buf = isc_mem_get(mctx, size);
2060 result = ISC_R_NOMEMORY;
2064 for (i = 0; i < copy_length; i += size) {
2065 len = (copy_length - i) > size ? size :
2067 CHECK(journal_seek(j, best_guess.offset + i));
2068 CHECK(journal_read(j, buf, len));
2069 CHECK(journal_seek(j, indexend + i));
2070 CHECK(journal_write(j, buf, len));
2073 CHECK(journal_fsync(j));
2076 * Compute new header.
2078 j->header.begin.offset = indexend;
2079 j->header.end.offset = indexend + copy_length;
2081 * Update the journal header.
2083 journal_header_encode(&j->header, &rawheader);
2084 CHECK(journal_seek(j, 0));
2085 CHECK(journal_write(j, &rawheader, sizeof(rawheader)));
2086 CHECK(journal_fsync(j));
2091 current_pos = j->header.begin;
2092 while (current_pos.serial != j->header.end.serial) {
2093 index_add(j, ¤t_pos);
2094 CHECK(journal_next(j, ¤t_pos));
2100 CHECK(index_to_disk(j));
2101 CHECK(journal_fsync(j));
2103 indexend = j->header.end.offset;
2105 dns_journal_destroy(&j);
2106 (void)isc_file_truncate(filename, (isc_offset_t)indexend);
2107 result = ISC_R_SUCCESS;
2111 isc_mem_put(mctx, buf, size);
2113 dns_journal_destroy(&j);
2118 index_to_disk(dns_journal_t *j) {
2119 isc_result_t result = ISC_R_SUCCESS;
2121 if (j->header.index_size != 0) {
2124 unsigned int rawbytes;
2126 rawbytes = j->header.index_size * sizeof(journal_rawpos_t);
2129 for (i = 0; i < j->header.index_size; i++) {
2130 encode_uint32(j->index[i].serial, p);
2132 encode_uint32(j->index[i].offset, p);
2135 INSIST(p == j->rawindex + rawbytes);
2137 CHECK(journal_seek(j, sizeof(journal_rawheader_t)));
2138 CHECK(journal_write(j, j->rawindex, rawbytes));