2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.15 2008/05/13 20:46:55 dillon Exp $
37 * HAMMER dependancy flusher thread
39 * Meta data updates create buffer dependancies which are arranged as a
45 static void hammer_flusher_thread(void *arg);
46 static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
47 static void hammer_flusher_flush(hammer_mount_t hmp);
48 static int hammer_must_finalize_undo(hammer_mount_t hmp);
49 static void hammer_flusher_finalize(hammer_transaction_t trans);
51 #define HAMMER_FLUSHER_IMMEDIATE 16
54 hammer_flusher_sync(hammer_mount_t hmp)
58 if (hmp->flusher_td) {
59 seq = hmp->flusher_next;
60 if (hmp->flusher_signal == 0) {
61 hmp->flusher_signal = HAMMER_FLUSHER_IMMEDIATE;
62 wakeup(&hmp->flusher_signal);
64 while ((int)(seq - hmp->flusher_done) > 0)
65 tsleep(&hmp->flusher_done, 0, "hmrfls", 0);
70 hammer_flusher_async(hammer_mount_t hmp)
72 if (hmp->flusher_td) {
73 if (hmp->flusher_signal++ == 0)
74 wakeup(&hmp->flusher_signal);
79 hammer_flusher_create(hammer_mount_t hmp)
81 hmp->flusher_signal = 0;
83 hmp->flusher_done = 0;
84 hmp->flusher_next = 1;
85 lwkt_create(hammer_flusher_thread, hmp, &hmp->flusher_td, NULL,
90 hammer_flusher_destroy(hammer_mount_t hmp)
92 if (hmp->flusher_td) {
93 hmp->flusher_exiting = 1;
94 while (hmp->flusher_td) {
95 hmp->flusher_signal = HAMMER_FLUSHER_IMMEDIATE;
96 wakeup(&hmp->flusher_signal);
97 tsleep(&hmp->flusher_exiting, 0, "hmrwex", 0);
103 hammer_flusher_thread(void *arg)
105 hammer_mount_t hmp = arg;
108 while (hmp->flusher_lock)
109 tsleep(&hmp->flusher_lock, 0, "hmrhld", 0);
110 hmp->flusher_act = hmp->flusher_next;
113 hammer_flusher_clean_loose_ios(hmp);
114 hammer_flusher_flush(hmp);
115 hammer_flusher_clean_loose_ios(hmp);
116 hmp->flusher_done = hmp->flusher_act;
118 wakeup(&hmp->flusher_done);
123 if (hmp->flusher_exiting && TAILQ_EMPTY(&hmp->flush_list))
128 * This is a hack until we can dispose of frontend buffer
129 * cache buffers on the frontend.
131 if (hmp->flusher_signal &&
132 hmp->flusher_signal < HAMMER_FLUSHER_IMMEDIATE) {
133 --hmp->flusher_signal;
134 tsleep(&hmp->flusher_signal, 0, "hmrqwk", hz / 10);
136 while (hmp->flusher_signal == 0 &&
137 TAILQ_EMPTY(&hmp->flush_list)) {
138 tsleep(&hmp->flusher_signal, 0, "hmrwwa", 0);
140 hmp->flusher_signal = 0;
143 hmp->flusher_td = NULL;
144 wakeup(&hmp->flusher_exiting);
149 hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
151 hammer_buffer_t buffer;
155 * loose ends - buffers without bp's aren't tracked by the kernel
156 * and can build up, so clean them out. This can occur when an
157 * IO completes on a buffer with no references left.
159 while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
160 KKASSERT(io->mod_list == &hmp->lose_list);
161 TAILQ_REMOVE(io->mod_list, io, mod_entry);
163 hammer_ref(&io->lock);
165 hammer_rel_buffer(buffer, 0);
173 hammer_flusher_flush(hammer_mount_t hmp)
175 struct hammer_transaction trans;
176 hammer_blockmap_t rootmap;
179 hammer_start_transaction_fls(&trans, hmp);
180 rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
182 while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
184 * Stop when we hit a different flush group
186 if (ip->flush_group != hmp->flusher_act)
190 * Remove the inode from the flush list and inherit
191 * its reference, sync, and clean-up.
193 TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
194 ip->error = hammer_sync_inode(ip);
195 hammer_flush_inode_done(ip);
198 * XXX this breaks atomicy
200 if (hammer_must_finalize_undo(hmp)) {
201 Debugger("Too many undos!!");
202 hammer_flusher_finalize(&trans);
205 hammer_flusher_finalize(&trans);
206 hmp->flusher_tid = trans.tid;
207 hammer_done_transaction(&trans);
211 * If the UNDO area gets over half full we have to flush it. We can't
212 * afford the UNDO area becoming completely full as that would break
213 * the crash recovery atomicy.
217 hammer_must_finalize_undo(hammer_mount_t hmp)
219 if (hammer_undo_space(hmp) < hammer_undo_max(hmp) / 2) {
228 * To finalize the flush we finish flushing all undo and data buffers
229 * still present, then we update the volume header and flush it,
230 * then we flush out the mata-data (that can now be undone).
232 * Note that as long as the undo fifo's start and end points do not
233 * match, we always must at least update the volume header.
235 * The sync_lock is used by other threads to issue modifying operations
236 * to HAMMER media without crossing a synchronization boundary or messing
237 * up the media synchronization operation. Specifically, the pruning
238 * the reblocking ioctls, and allowing the frontend strategy code to
239 * allocate media data space.
243 hammer_flusher_finalize(hammer_transaction_t trans)
245 hammer_mount_t hmp = trans->hmp;
246 hammer_volume_t root_volume = trans->rootvol;
247 hammer_blockmap_t rootmap;
248 const int bmsize = sizeof(root_volume->ondisk->vol0_blockmap);
253 hammer_lock_ex(&hmp->sync_lock);
254 rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
257 * Sync the blockmap to the root volume ondisk buffer and generate
258 * the appropriate undo record. We have to generate the UNDO even
259 * though we flush the volume header along with the UNDO fifo update
260 * because the meta-data (including the volume header) is flushed
261 * after the fifo update, not before, and may have to be undone.
263 * No UNDOs can be created after this point until we finish the
266 if (root_volume->io.modified &&
267 bcmp(hmp->blockmap, root_volume->ondisk->vol0_blockmap, bmsize)) {
268 hammer_modify_volume(trans, root_volume,
269 &root_volume->ondisk->vol0_blockmap,
271 for (i = 0; i < HAMMER_MAX_ZONES; ++i)
272 hammer_crc_set_blockmap(&hmp->blockmap[i]);
273 bcopy(hmp->blockmap, root_volume->ondisk->vol0_blockmap,
275 hammer_modify_volume_done(root_volume);
279 * Flush the undo bufs, clear the undo cache.
281 hammer_clear_undo_history(hmp);
284 while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
285 KKASSERT(io->modify_refs == 0);
286 hammer_ref(&io->lock);
287 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
289 hammer_rel_buffer((hammer_buffer_t)io, 1);
293 hkprintf("X%d", count);
299 while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
300 KKASSERT(io->modify_refs == 0);
301 hammer_ref(&io->lock);
302 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
304 hammer_rel_buffer((hammer_buffer_t)io, 1);
308 hkprintf("Y%d", count);
311 * Wait for I/O to complete
314 while (hmp->io_running_count)
315 tsleep(&hmp->io_running_count, 0, "hmrfl1", 0);
319 * Update the root volume's next_tid field. This field is updated
320 * without any related undo.
322 if (root_volume->ondisk->vol0_next_tid != hmp->next_tid) {
323 hammer_modify_volume(NULL, root_volume, NULL, 0);
324 root_volume->ondisk->vol0_next_tid = hmp->next_tid;
325 hammer_modify_volume_done(root_volume);
329 * Update the UNDO FIFO's first_offset. Same deal.
331 if (rootmap->first_offset != hmp->flusher_undo_start) {
332 hammer_modify_volume(NULL, root_volume, NULL, 0);
333 rootmap->first_offset = hmp->flusher_undo_start;
334 root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX].first_offset = rootmap->first_offset;
335 hammer_crc_set_blockmap(&root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]);
336 hammer_modify_volume_done(root_volume);
338 trans->hmp->flusher_undo_start = rootmap->next_offset;
341 * Flush the root volume header.
343 * If a crash occurs while the root volume header is being written
344 * we just have to hope that the undo range has been updated. It
345 * should be done in one I/O but XXX this won't be perfect.
347 if (root_volume->io.modified) {
348 hammer_crc_set_volume(root_volume->ondisk);
349 hammer_io_flush(&root_volume->io);
353 * Wait for I/O to complete
356 while (hmp->io_running_count)
357 tsleep(&hmp->io_running_count, 0, "hmrfl2", 0);
361 * Flush meta-data. The meta-data will be undone if we crash
362 * so we can safely flush it asynchronously.
365 while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
366 KKASSERT(io->modify_refs == 0);
367 hammer_ref(&io->lock);
368 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
370 hammer_rel_buffer((hammer_buffer_t)io, 1);
373 hammer_unlock(&hmp->sync_lock);
375 hkprintf("Z%d", count);