HAMMER VFS - REDO implementation base code part 2/many
[dragonfly.git] / sys / vfs / hammer / hammer_redo.c
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 /*
36  * HAMMER redo - REDO record support for the UNDO/REDO FIFO.
37  *
38  * See also hammer_undo.c
39  */
40
41 #include "hammer.h"
42
43 /*
44  * HAMMER version 4+ REDO support.
45  *
46  * REDO records are used to improve fsync() performance.  Instead of having
47  * to go through a complete double-flush cycle involving at least two disk
48  * synchronizations the fsync need only flush UNDO/REDO FIFO buffers through
49  * the related REDO records, which is a single synchronization requiring
50  * no track seeking.  If a recovery becomes necessary the recovery code
51  * will generate logical data writes based on the REDO records encountered.
52  * That is, the recovery code will UNDO any partial meta-data/data writes
53  * at the raw disk block level and then REDO the data writes at the logical
54  * level.
55  */
56 int
57 hammer_generate_redo(hammer_transaction_t trans, hammer_inode_t ip,
58                      hammer_off_t file_off, u_int32_t flags,
59                      void *base, int len)
60 {
61         hammer_mount_t hmp;
62         hammer_volume_t root_volume;
63         hammer_blockmap_t undomap;
64         hammer_buffer_t buffer = NULL;
65         hammer_fifo_redo_t redo;
66         hammer_fifo_tail_t tail;
67         hammer_off_t next_offset;
68         int error;
69         int bytes;
70         int n;
71
72         /*
73          * Setup
74          */
75         hmp = trans->hmp;
76
77         root_volume = trans->rootvol;
78         undomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
79
80         /*
81          * No undo recursion when modifying the root volume
82          */
83         hammer_modify_volume(NULL, root_volume, NULL, 0);
84         hammer_lock_ex(&hmp->undo_lock);
85
86         /* undo had better not roll over (loose test) */
87         if (hammer_undo_space(trans) < len + HAMMER_BUFSIZE*3)
88                 panic("hammer: insufficient undo FIFO space!");
89
90         /*
91          * Loop until the undo for the entire range has been laid down.
92          */
93         while (len) {
94                 /*
95                  * Fetch the layout offset in the UNDO FIFO, wrap it as
96                  * necessary.
97                  */
98                 if (undomap->next_offset == undomap->alloc_offset) {
99                         undomap->next_offset =
100                                 HAMMER_ZONE_ENCODE(HAMMER_ZONE_UNDO_INDEX, 0);
101                 }
102                 next_offset = undomap->next_offset;
103
104                 /*
105                  * This is a tail-chasing FIFO, when we hit the start of a new
106                  * buffer we don't have to read it in.
107                  */
108                 if ((next_offset & HAMMER_BUFMASK) == 0) {
109                         redo = hammer_bnew(hmp, next_offset, &error, &buffer);
110                         hammer_format_undo(redo, hmp->undo_seqno ^ 0x40000000);
111                 } else {
112                         redo = hammer_bread(hmp, next_offset, &error, &buffer);
113                 }
114                 if (error)
115                         break;
116                 hammer_modify_buffer(NULL, buffer, NULL, 0);
117
118                 /*
119                  * Calculate how big a media structure fits up to the next
120                  * alignment point and how large a data payload we can
121                  * accomodate.
122                  *
123                  * If n calculates to 0 or negative there is no room for
124                  * anything but a PAD.
125                  */
126                 bytes = HAMMER_UNDO_ALIGN -
127                         ((int)next_offset & HAMMER_UNDO_MASK);
128                 n = bytes -
129                     (int)sizeof(struct hammer_fifo_redo) -
130                     (int)sizeof(struct hammer_fifo_tail);
131
132                 /*
133                  * If available space is insufficient for any payload
134                  * we have to lay down a PAD.
135                  *
136                  * The minimum PAD is 8 bytes and the head and tail will
137                  * overlap each other in that case.  PADs do not have
138                  * sequence numbers or CRCs.
139                  *
140                  * A PAD may not start on a boundary.  That is, every
141                  * 512-byte block in the UNDO/REDO FIFO must begin with
142                  * a record containing a sequence number.
143                  */
144                 if (n <= 0) {
145                         KKASSERT(bytes >= sizeof(struct hammer_fifo_tail));
146                         KKASSERT(((int)next_offset & HAMMER_UNDO_MASK) != 0);
147                         tail = (void *)((char *)redo + bytes - sizeof(*tail));
148                         if ((void *)redo != (void *)tail) {
149                                 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
150                                 tail->tail_type = HAMMER_HEAD_TYPE_PAD;
151                                 tail->tail_size = bytes;
152                         }
153                         redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
154                         redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD;
155                         redo->head.hdr_size = bytes;
156                         /* NO CRC OR SEQ NO */
157                         undomap->next_offset += bytes;
158                         hammer_modify_buffer_done(buffer);
159                         hammer_stats_redo += bytes;
160                         continue;
161                 }
162
163                 /*
164                  * Calculate the actual payload and recalculate the size
165                  * of the media structure as necessary.  If no data buffer
166                  * is supplied there is no payload.
167                  */
168                 if (base == NULL) {
169                         n = 0;
170                 } else if (n > len) {
171                         n = len;
172                 }
173                 bytes = ((n + HAMMER_HEAD_ALIGN_MASK) &
174                          ~HAMMER_HEAD_ALIGN_MASK) +
175                         (int)sizeof(struct hammer_fifo_redo) +
176                         (int)sizeof(struct hammer_fifo_tail);
177                 if (hammer_debug_general & 0x0080) {
178                         kprintf("redo %016llx %d %d\n",
179                                 (long long)next_offset, bytes, n);
180                 }
181
182                 redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
183                 redo->head.hdr_type = HAMMER_HEAD_TYPE_REDO;
184                 redo->head.hdr_size = bytes;
185                 redo->head.hdr_seq = hmp->undo_seqno++;
186                 redo->head.hdr_crc = 0;
187                 if (ip)
188                         redo->redo_objid = ip->obj_id;
189                 redo->redo_mtime = trans->time;
190                 redo->redo_offset = file_off;
191                 redo->redo_flags = flags;
192
193                 /*
194                  * Incremental payload.  If no payload we throw the entire
195                  * len into redo_data_bytes and will not loop.
196                  */
197                 if (base) {
198                         redo->redo_data_bytes = n;
199                         bcopy(base, redo + 1, n);
200                         len -= n;
201                         base = (char *)base + n;
202                         file_off += n;
203                 } else {
204                         redo->redo_data_bytes = len;
205                         file_off += len;
206                         len = 0;
207                 }
208
209                 tail = (void *)((char *)redo + bytes - sizeof(*tail));
210                 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
211                 tail->tail_type = HAMMER_HEAD_TYPE_REDO;
212                 tail->tail_size = bytes;
213
214                 KKASSERT(bytes >= sizeof(redo->head));
215                 redo->head.hdr_crc = crc32(redo, HAMMER_FIFO_HEAD_CRCOFF) ^
216                              crc32(&redo->head + 1, bytes - sizeof(redo->head));
217                 undomap->next_offset += bytes;
218                 hammer_stats_redo += bytes;
219
220                 /*
221                  * Before we finish off the buffer we have to deal with any
222                  * junk between the end of the media structure we just laid
223                  * down and the UNDO alignment boundary.  We do this by laying
224                  * down a dummy PAD.  Even though we will probably overwrite
225                  * it almost immediately we have to do this so recovery runs
226                  * can iterate the UNDO space without having to depend on
227                  * the indices in the volume header.
228                  *
229                  * This dummy PAD will be overwritten on the next undo so
230                  * we do not adjust undomap->next_offset.
231                  */
232                 bytes = HAMMER_UNDO_ALIGN -
233                         ((int)undomap->next_offset & HAMMER_UNDO_MASK);
234                 if (bytes != HAMMER_UNDO_ALIGN) {
235                         KKASSERT(bytes >= sizeof(struct hammer_fifo_tail));
236                         redo = (void *)(tail + 1);
237                         tail = (void *)((char *)redo + bytes - sizeof(*tail));
238                         if ((void *)redo != (void *)tail) {
239                                 tail->tail_signature = HAMMER_TAIL_SIGNATURE;
240                                 tail->tail_type = HAMMER_HEAD_TYPE_PAD;
241                                 tail->tail_size = bytes;
242                         }
243                         redo->head.hdr_signature = HAMMER_HEAD_SIGNATURE;
244                         redo->head.hdr_type = HAMMER_HEAD_TYPE_PAD;
245                         redo->head.hdr_size = bytes;
246                         /* NO CRC OR SEQ NO */
247                 }
248                 hammer_modify_buffer_done(buffer);
249         }
250         hammer_modify_volume_done(root_volume);
251         hammer_unlock(&hmp->undo_lock);
252
253         if (buffer)
254                 hammer_rel_buffer(buffer, 0);
255         return(error);
256 }
257
258 /*
259  * Generate a REDO SYNC record.  At least one such record must be generated
260  * in the nominal recovery span for the recovery code to be able to run
261  * REDOs outside of the span.
262  */
263 void
264 hammer_generate_redo_sync(hammer_transaction_t trans)
265 {
266 #if 0
267         hammer_generate_redo(trans, NULL, 0, HAMMER_REDO_SYNC, NULL, 0);
268 #endif
269         trans->hmp->flags |= HAMMER_MOUNT_REDO_SYNC;
270 }