2 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * HAMMER redo - REDO record support for the UNDO/REDO FIFO.
38 * See also hammer_undo.c
44 * HAMMER version 4+ REDO support.
46 * REDO records are used to improve fsync() performance. Instead of having
47 * to go through a complete double-flush cycle involving at least two disk
48 * synchronizations the fsync need only flush UNDO/REDO FIFO buffers through
49 * the related REDO records, which is a single synchronization requiring
50 * no track seeking. If a recovery becomes necessary the recovery code
51 * will generate logical data writes based on the REDO records encountered.
52 * That is, the recovery code will UNDO any partial meta-data/data writes
53 * at the raw disk block level and then REDO the data writes at the logical
57 hammer_generate_redo(hammer_transaction_t trans, hammer_inode_t ip,
58 hammer_off_t file_off, u_int32_t flags,
62 hammer_volume_t root_volume;
63 hammer_blockmap_t undomap;
64 hammer_buffer_t buffer = NULL;
65 hammer_fifo_redo_t redo;
66 hammer_fifo_tail_t tail;
67 hammer_off_t next_offset;
77 root_volume = trans->rootvol;
78 undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
81 * No undo recursion when modifying the root volume
83 hammer_modify_volume(NULL, root_volume, NULL, 0);
84 hammer_lock_ex(&hmp->undo_lock);
86 /* undo had better not roll over (loose test) */
87 if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3)
88 panic("hammer: insufficient undo FIFO space!");
91 * Loop until the undo for the entire range has been laid down.
95 * Fetch the layout offset in the UNDO FIFO, wrap it as
98 if (undomap->next_offset == undomap->alloc_offset) {
99 undomap->next_offset =
100 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0);
102 next_offset = undomap->next_offset;
105 * This is a tail-chasing FIFO, when we hit the start of a new
106 * buffer we don't have to read it in.
108 if ((next_offset & HAMMER_BUFMASK) == 0) {
109 redo = hammer_bnew(hmp, next_offset, &error, &buffer);
110 hammer_format_undo(redo, hmp->undo_seqno ^ 0x40000000);
112 redo = hammer_bread(hmp, next_offset, &error, &buffer);
116 hammer_modify_buffer(NULL, buffer, NULL, 0);
119 * Calculate how big a media structure fits up to the next
120 * alignment point and how large a data payload we can
123 * If n calculates to 0 or negative there is no room for
124 * anything but a PAD.
126 bytes = HAMMER_UNDO_ALIGN -
127 ((int)next_offset & HAMMER_UNDO_MASK);
129 (int)sizeof(struct hammer_fifo_redo) -
130 (int)sizeof(struct hammer_fifo_tail);
133 * If available space is insufficient for any payload
134 * we have to lay down a PAD.
136 * The minimum PAD is 8 bytes and the head and tail will
137 * overlap each other in that case. PADs do not have
138 * sequence numbers or CRCs.
140 * A PAD may not start on a boundary. That is, every
141 * 512-byte block in the UNDO/REDO FIFO must begin with
142 * a record containing a sequence number.
145 KKASSERT(bytes >= sizeof(struct hammer_fifo_tail));
146 KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0);
147 tail = (void *)((char *)redo + bytes - sizeof(*tail));
148 if ((void *)redo != (void *)tail) {
149 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
150 tail->tail_type = HAMMER_HEAD_TYPE_PAD;
151 tail->tail_size = bytes;
153 redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
154 redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD;
155 redo->head.hdr_size = bytes;
156 /* NO CRC OR SEQ NO */
157 undomap->next_offset += bytes;
158 hammer_modify_buffer_done(buffer);
159 hammer_stats_redo += bytes;
164 * Calculate the actual payload and recalculate the size
165 * of the media structure as necessary. If no data buffer
166 * is supplied there is no payload.
170 } else if (n > len) {
173 bytes = ((n + HAMMER_HEAD_ALIGN_MASK) &
174 ~HAMMER_HEAD_ALIGN_MASK) +
175 (int)sizeof(struct hammer_fifo_redo) +
176 (int)sizeof(struct hammer_fifo_tail);
177 if (hammer_debug_general & 0x0080) {
178 kprintf("redo %016llx %d %d\n",
179 (long long)next_offset, bytes, n);
182 redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
183 redo->head.hdr_type = HAMMER_HEAD_TYPE_REDO;
184 redo->head.hdr_size = bytes;
185 redo->head.hdr_seq = hmp->undo_seqno++;
186 redo->head.hdr_crc = 0;
188 redo->redo_objid = ip->obj_id;
189 redo->redo_mtime = trans->time;
190 redo->redo_offset = file_off;
191 redo->redo_flags = flags;
194 * Incremental payload. If no payload we throw the entire
195 * len into redo_data_bytes and will not loop.
198 redo->redo_data_bytes = n;
199 bcopy(base, redo + 1, n);
201 base = (char *)base + n;
204 redo->redo_data_bytes = len;
209 tail = (void *)((char *)redo + bytes - sizeof(*tail));
210 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
211 tail->tail_type = HAMMER_HEAD_TYPE_REDO;
212 tail->tail_size = bytes;
214 KKASSERT(bytes >= sizeof(redo->head));
215 redo->head.hdr_crc = crc32(redo, HAMMER_FIFO_HEAD_CRCOFF) ^
216 crc32(&redo->head + 1, bytes - sizeof(redo->head));
217 undomap->next_offset += bytes;
218 hammer_stats_redo += bytes;
221 * Before we finish off the buffer we have to deal with any
222 * junk between the end of the media structure we just laid
223 * down and the UNDO alignment boundary. We do this by laying
224 * down a dummy PAD. Even though we will probably overwrite
225 * it almost immediately we have to do this so recovery runs
226 * can iterate the UNDO space without having to depend on
227 * the indices in the volume header.
229 * This dummy PAD will be overwritten on the next undo so
230 * we do not adjust undomap->next_offset.
232 bytes = HAMMER_UNDO_ALIGN -
233 ((int)undomap->next_offset & HAMMER_UNDO_MASK);
234 if (bytes != HAMMER_UNDO_ALIGN) {
235 KKASSERT(bytes >= sizeof(struct hammer_fifo_tail));
236 redo = (void *)(tail + 1);
237 tail = (void *)((char *)redo + bytes - sizeof(*tail));
238 if ((void *)redo != (void *)tail) {
239 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
240 tail->tail_type = HAMMER_HEAD_TYPE_PAD;
241 tail->tail_size = bytes;
243 redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
244 redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD;
245 redo->head.hdr_size = bytes;
246 /* NO CRC OR SEQ NO */
248 hammer_modify_buffer_done(buffer);
250 hammer_modify_volume_done(root_volume);
251 hammer_unlock(&hmp->undo_lock);
254 hammer_rel_buffer(buffer, 0);
259 * Generate a REDO SYNC record. At least one such record must be generated
260 * in the nominal recovery span for the recovery code to be able to run
261 * REDOs outside of the span.
264 hammer_generate_redo_sync(hammer_transaction_t trans)
267 hammer_generate_redo(trans, NULL, 0, HAMMER_REDO_SYNC, NULL, 0);
269 trans->hmp->flags |= HAMMER_MOUNT_REDO_SYNC;