HAMMER 46/Many: Performance pass, media changes, bug fixes.
[dragonfly.git] / sys / vfs / hammer / hammer_flusher.c
CommitLineData
059819e3
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
2f85fa4d 34 * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.17 2008/05/18 01:48:50 dillon Exp $
059819e3
MD
35 */
36/*
37 * HAMMER dependancy flusher thread
38 *
39 * Meta data updates create buffer dependancies which are arranged as a
40 * hierarchy of lists.
41 */
42
43#include "hammer.h"
44
45static void hammer_flusher_thread(void *arg);
10a5d1ba 46static void hammer_flusher_clean_loose_ios(hammer_mount_t hmp);
059819e3 47static void hammer_flusher_flush(hammer_mount_t hmp);
0729c8c8 48static int hammer_must_finalize_undo(hammer_mount_t hmp);
c9b9e29d
MD
49static void hammer_flusher_finalize(hammer_transaction_t trans);
50
51#define HAMMER_FLUSHER_IMMEDIATE 16
059819e3
MD
52
53void
54hammer_flusher_sync(hammer_mount_t hmp)
55{
56 int seq;
57
f90dde4c 58 if (hmp->flusher_td) {
1f07f686
MD
59 seq = hmp->flusher_next;
60 if (hmp->flusher_signal == 0) {
c9b9e29d 61 hmp->flusher_signal = HAMMER_FLUSHER_IMMEDIATE;
1f07f686
MD
62 wakeup(&hmp->flusher_signal);
63 }
64 while ((int)(seq - hmp->flusher_done) > 0)
65 tsleep(&hmp->flusher_done, 0, "hmrfls", 0);
f90dde4c 66 }
059819e3
MD
67}
68
69void
70hammer_flusher_async(hammer_mount_t hmp)
71{
f90dde4c 72 if (hmp->flusher_td) {
c9b9e29d 73 if (hmp->flusher_signal++ == 0)
1f07f686 74 wakeup(&hmp->flusher_signal);
f90dde4c 75 }
059819e3
MD
76}
77
78void
79hammer_flusher_create(hammer_mount_t hmp)
80{
1f07f686
MD
81 hmp->flusher_signal = 0;
82 hmp->flusher_act = 0;
83 hmp->flusher_done = 0;
84 hmp->flusher_next = 1;
059819e3
MD
85 lwkt_create(hammer_flusher_thread, hmp, &hmp->flusher_td, NULL,
86 0, -1, "hammer");
87}
88
89void
90hammer_flusher_destroy(hammer_mount_t hmp)
91{
f90dde4c
MD
92 if (hmp->flusher_td) {
93 hmp->flusher_exiting = 1;
1f07f686 94 while (hmp->flusher_td) {
c9b9e29d 95 hmp->flusher_signal = HAMMER_FLUSHER_IMMEDIATE;
1f07f686 96 wakeup(&hmp->flusher_signal);
f90dde4c 97 tsleep(&hmp->flusher_exiting, 0, "hmrwex", 0);
1f07f686 98 }
f90dde4c 99 }
059819e3
MD
100}
101
102static void
103hammer_flusher_thread(void *arg)
104{
105 hammer_mount_t hmp = arg;
0729c8c8 106
059819e3 107 for (;;) {
4e17f465
MD
108 while (hmp->flusher_lock)
109 tsleep(&hmp->flusher_lock, 0, "hmrhld", 0);
1f07f686
MD
110 hmp->flusher_act = hmp->flusher_next;
111 ++hmp->flusher_next;
77062c8a 112 hkprintf("F");
10a5d1ba
MD
113 hammer_flusher_clean_loose_ios(hmp);
114 hammer_flusher_flush(hmp);
115 hammer_flusher_clean_loose_ios(hmp);
1f07f686
MD
116 hmp->flusher_done = hmp->flusher_act;
117
118 wakeup(&hmp->flusher_done);
c32a6806
MD
119
120 /*
1f07f686 121 * Wait for activity.
c32a6806 122 */
1f07f686 123 if (hmp->flusher_exiting && TAILQ_EMPTY(&hmp->flush_list))
059819e3 124 break;
77062c8a 125 hkprintf("E");
1f07f686 126
c9b9e29d
MD
127 /*
128 * This is a hack until we can dispose of frontend buffer
129 * cache buffers on the frontend.
130 */
131 if (hmp->flusher_signal &&
132 hmp->flusher_signal < HAMMER_FLUSHER_IMMEDIATE) {
133 --hmp->flusher_signal;
134 tsleep(&hmp->flusher_signal, 0, "hmrqwk", hz / 10);
135 } else {
136 while (hmp->flusher_signal == 0 &&
137 TAILQ_EMPTY(&hmp->flush_list)) {
138 tsleep(&hmp->flusher_signal, 0, "hmrwwa", 0);
139 }
140 hmp->flusher_signal = 0;
1f07f686 141 }
059819e3
MD
142 }
143 hmp->flusher_td = NULL;
144 wakeup(&hmp->flusher_exiting);
145 lwkt_exit();
146}
147
10a5d1ba
MD
148static void
149hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
150{
151 hammer_buffer_t buffer;
152 hammer_io_t io;
153
154 /*
155 * loose ends - buffers without bp's aren't tracked by the kernel
156 * and can build up, so clean them out. This can occur when an
157 * IO completes on a buffer with no references left.
158 */
159 while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
160 KKASSERT(io->mod_list == &hmp->lose_list);
161 TAILQ_REMOVE(io->mod_list, io, mod_entry);
162 io->mod_list = NULL;
163 hammer_ref(&io->lock);
10a5d1ba
MD
164 buffer = (void *)io;
165 hammer_rel_buffer(buffer, 0);
166 }
167}
168
059819e3
MD
169/*
170 * Flush stuff
171 */
172static void
173hammer_flusher_flush(hammer_mount_t hmp)
174{
e8599db1 175 struct hammer_transaction trans;
10a5d1ba 176 hammer_blockmap_t rootmap;
059819e3 177 hammer_inode_t ip;
10a5d1ba 178
e8599db1 179 hammer_start_transaction_fls(&trans, hmp);
0729c8c8 180 rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
059819e3 181
2f85fa4d
MD
182 /*
183 * Flush all pending inodes
184 */
1f07f686
MD
185 while ((ip = TAILQ_FIRST(&hmp->flush_list)) != NULL) {
186 /*
187 * Stop when we hit a different flush group
188 */
189 if (ip->flush_group != hmp->flusher_act)
190 break;
0729c8c8 191
1f07f686
MD
192 /*
193 * Remove the inode from the flush list and inherit
194 * its reference, sync, and clean-up.
195 */
059819e3 196 TAILQ_REMOVE(&hmp->flush_list, ip, flush_entry);
1f07f686
MD
197 ip->error = hammer_sync_inode(ip);
198 hammer_flush_inode_done(ip);
059819e3
MD
199
200 /*
1f07f686 201 * XXX this breaks atomicy
059819e3 202 */
1f07f686
MD
203 if (hammer_must_finalize_undo(hmp)) {
204 Debugger("Too many undos!!");
c9b9e29d 205 hammer_flusher_finalize(&trans);
10a5d1ba 206 }
059819e3 207 }
c9b9e29d 208 hammer_flusher_finalize(&trans);
f36a9737 209 hmp->flusher_tid = trans.tid;
e8599db1 210 hammer_done_transaction(&trans);
059819e3
MD
211}
212
ec4e8497
MD
213/*
214 * If the UNDO area gets over half full we have to flush it. We can't
215 * afford the UNDO area becoming completely full as that would break
216 * the crash recovery atomicy.
217 */
218static
219int
0729c8c8 220hammer_must_finalize_undo(hammer_mount_t hmp)
ec4e8497 221{
1f07f686 222 if (hammer_undo_space(hmp) < hammer_undo_max(hmp) / 2) {
77062c8a 223 hkprintf("*");
1f07f686 224 return(1);
ec4e8497 225 } else {
1f07f686 226 return(0);
ec4e8497 227 }
ec4e8497
MD
228}
229
10a5d1ba
MD
230/*
231 * To finalize the flush we finish flushing all undo and data buffers
232 * still present, then we update the volume header and flush it,
233 * then we flush out the mata-data (that can now be undone).
234 *
235 * Note that as long as the undo fifo's start and end points do not
236 * match, we always must at least update the volume header.
9480ff55
MD
237 *
238 * The sync_lock is used by other threads to issue modifying operations
239 * to HAMMER media without crossing a synchronization boundary or messing
240 * up the media synchronization operation. Specifically, the pruning
241 * the reblocking ioctls, and allowing the frontend strategy code to
242 * allocate media data space.
10a5d1ba
MD
243 */
244static
245void
c9b9e29d 246hammer_flusher_finalize(hammer_transaction_t trans)
059819e3 247{
e8599db1
MD
248 hammer_mount_t hmp = trans->hmp;
249 hammer_volume_t root_volume = trans->rootvol;
059819e3 250 hammer_blockmap_t rootmap;
c9b9e29d 251 const int bmsize = sizeof(root_volume->ondisk->vol0_blockmap);
10a5d1ba 252 hammer_io_t io;
c9b9e29d 253 int count;
19619882 254 int i;
059819e3 255
2f85fa4d 256 hammer_sync_lock_ex(trans);
c9b9e29d 257 rootmap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
9480ff55 258
e8599db1
MD
259 /*
260 * Sync the blockmap to the root volume ondisk buffer and generate
261 * the appropriate undo record. We have to generate the UNDO even
262 * though we flush the volume header along with the UNDO fifo update
263 * because the meta-data (including the volume header) is flushed
c9b9e29d
MD
264 * after the fifo update, not before, and may have to be undone.
265 *
266 * No UNDOs can be created after this point until we finish the
267 * flush.
e8599db1 268 */
c9b9e29d
MD
269 if (root_volume->io.modified &&
270 bcmp(hmp->blockmap, root_volume->ondisk->vol0_blockmap, bmsize)) {
e8599db1 271 hammer_modify_volume(trans, root_volume,
c9b9e29d
MD
272 &root_volume->ondisk->vol0_blockmap,
273 bmsize);
19619882
MD
274 for (i = 0; i < HAMMER_MAX_ZONES; ++i)
275 hammer_crc_set_blockmap(&hmp->blockmap[i]);
e8599db1 276 bcopy(hmp->blockmap, root_volume->ondisk->vol0_blockmap,
c9b9e29d 277 bmsize);
e8599db1
MD
278 hammer_modify_volume_done(root_volume);
279 }
280
059819e3 281 /*
c9b9e29d 282 * Flush the undo bufs, clear the undo cache.
059819e3 283 */
e8599db1
MD
284 hammer_clear_undo_history(hmp);
285
c9b9e29d 286 count = 0;
10a5d1ba
MD
287 while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
288 KKASSERT(io->modify_refs == 0);
289 hammer_ref(&io->lock);
290 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
291 hammer_io_flush(io);
09ac686b 292 hammer_rel_buffer((hammer_buffer_t)io, 0);
c9b9e29d 293 ++count;
059819e3 294 }
c9b9e29d 295 if (count)
77062c8a 296 hkprintf("X%d", count);
059819e3
MD
297
298 /*
10a5d1ba 299 * Flush data bufs
059819e3 300 */
c9b9e29d 301 count = 0;
10a5d1ba
MD
302 while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
303 KKASSERT(io->modify_refs == 0);
304 hammer_ref(&io->lock);
305 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
306 hammer_io_flush(io);
09ac686b 307 hammer_rel_buffer((hammer_buffer_t)io, 0);
c9b9e29d 308 ++count;
059819e3 309 }
c9b9e29d 310 if (count)
77062c8a 311 hkprintf("Y%d", count);
059819e3
MD
312
313 /*
f90dde4c 314 * Wait for I/O to complete
059819e3 315 */
f90dde4c 316 crit_enter();
c9b9e29d 317 while (hmp->io_running_count)
f90dde4c 318 tsleep(&hmp->io_running_count, 0, "hmrfl1", 0);
f90dde4c 319 crit_exit();
059819e3
MD
320
321 /*
c9b9e29d
MD
322 * Update the root volume's next_tid field. This field is updated
323 * without any related undo.
059819e3 324 */
c9b9e29d 325 if (root_volume->ondisk->vol0_next_tid != hmp->next_tid) {
10a5d1ba 326 hammer_modify_volume(NULL, root_volume, NULL, 0);
c9b9e29d 327 root_volume->ondisk->vol0_next_tid = hmp->next_tid;
10a5d1ba 328 hammer_modify_volume_done(root_volume);
0729c8c8 329 }
c9b9e29d 330
09ac686b
MD
331 if (hammer_debug_recover_faults > 0) {
332 if (--hammer_debug_recover_faults == 0) {
333 Debugger("hammer_debug_recover_faults");
334 }
335 }
336
337
c9b9e29d
MD
338 /*
339 * Update the UNDO FIFO's first_offset. Same deal.
340 */
341 if (rootmap->first_offset != hmp->flusher_undo_start) {
0729c8c8 342 hammer_modify_volume(NULL, root_volume, NULL, 0);
c9b9e29d
MD
343 rootmap->first_offset = hmp->flusher_undo_start;
344 root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX].first_offset = rootmap->first_offset;
19619882 345 hammer_crc_set_blockmap(&root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX]);
0729c8c8
MD
346 hammer_modify_volume_done(root_volume);
347 }
09ac686b 348 hmp->flusher_undo_start = rootmap->next_offset;
c9b9e29d
MD
349
350 /*
351 * Flush the root volume header.
352 *
353 * If a crash occurs while the root volume header is being written
354 * we just have to hope that the undo range has been updated. It
355 * should be done in one I/O but XXX this won't be perfect.
356 */
19619882
MD
357 if (root_volume->io.modified) {
358 hammer_crc_set_volume(root_volume->ondisk);
10a5d1ba 359 hammer_io_flush(&root_volume->io);
19619882 360 }
059819e3
MD
361
362 /*
f90dde4c 363 * Wait for I/O to complete
059819e3 364 */
f90dde4c 365 crit_enter();
c9b9e29d 366 while (hmp->io_running_count)
f90dde4c 367 tsleep(&hmp->io_running_count, 0, "hmrfl2", 0);
f90dde4c 368 crit_exit();
059819e3
MD
369
370 /*
e8599db1
MD
371 * Flush meta-data. The meta-data will be undone if we crash
372 * so we can safely flush it asynchronously.
059819e3 373 */
c9b9e29d 374 count = 0;
10a5d1ba
MD
375 while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
376 KKASSERT(io->modify_refs == 0);
377 hammer_ref(&io->lock);
378 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
379 hammer_io_flush(io);
09ac686b 380 hammer_rel_buffer((hammer_buffer_t)io, 0);
c9b9e29d 381 ++count;
059819e3 382 }
2f85fa4d 383 hammer_sync_unlock(trans);
c9b9e29d 384 if (count)
77062c8a 385 hkprintf("Z%d", count);
059819e3
MD
386}
387