HAMMER 61G/Many: Stabilization of new flush_group code
[dragonfly.git] / sys / vfs / hammer / hammer_flusher.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.38 2008/07/13 01:12:41 dillon Exp $
35  */
36 /*
37  * HAMMER dependancy flusher thread
38  *
39  * Meta data updates create buffer dependancies which are arranged as a
40  * hierarchy of lists.
41  */
42
43 #include "hammer.h"
44
45 static void hammer_flusher_master_thread(void *arg);
46 static void hammer_flusher_slave_thread(void *arg);
47 static void hammer_flusher_flush(hammer_mount_t hmp);
48 static void hammer_flusher_flush_inode(hammer_inode_t ip,
49                                         hammer_transaction_t trans);
50
51 /*
52  * Support structures for the flusher threads.
53  */
54 struct hammer_flusher_info {
55         TAILQ_ENTRY(hammer_flusher_info) entry;
56         struct hammer_mount *hmp;
57         thread_t        td;
58         int             runstate;
59         int             count;
60         hammer_flush_group_t flg;
61         hammer_inode_t  work_array[HAMMER_FLUSH_GROUP_SIZE];
62 };
63
64 typedef struct hammer_flusher_info *hammer_flusher_info_t;
65
66 /*
67  * Sync all inodes pending on the flusher.
68  *
69  * All flush groups will be flushed.  This does not queue dirty inodes
70  * to the flush groups, it just flushes out what has already been queued!
71  */
72 void
73 hammer_flusher_sync(hammer_mount_t hmp)
74 {
75         int seq;
76
77         seq = hammer_flusher_async(hmp, NULL);
78         while ((int)(seq - hmp->flusher.done) > 0)
79                 tsleep(&hmp->flusher.done, 0, "hmrfls", 0);
80 }
81
82 /*
83  * Sync all inodes pending on the flusher - return immediately.
84  *
85  * All flush groups will be flushed.
86  */
87 int
88 hammer_flusher_async(hammer_mount_t hmp, hammer_flush_group_t close_flg)
89 {
90         hammer_flush_group_t flg;
91         int seq = hmp->flusher.next;
92
93         TAILQ_FOREACH(flg, &hmp->flush_group_list, flush_entry) {
94                 if (flg->running == 0)
95                         ++seq;
96                 flg->closed = 1;
97                 if (flg == close_flg)
98                         break;
99         }
100         if (hmp->flusher.td) {
101                 if (hmp->flusher.signal++ == 0)
102                         wakeup(&hmp->flusher.signal);
103         } else {
104                 seq = hmp->flusher.done;
105         }
106         return(seq);
107 }
108
109 int
110 hammer_flusher_async_one(hammer_mount_t hmp)
111 {
112         int seq;
113
114         if (hmp->flusher.td) {
115                 seq = hmp->flusher.next;
116                 if (hmp->flusher.signal++ == 0)
117                         wakeup(&hmp->flusher.signal);
118         } else {
119                 seq = hmp->flusher.done;
120         }
121         return(seq);
122 }
123
124 void
125 hammer_flusher_wait(hammer_mount_t hmp, int seq)
126 {
127         while ((int)(seq - hmp->flusher.done) > 0)
128                 tsleep(&hmp->flusher.done, 0, "hmrfls", 0);
129 }
130
131 void
132 hammer_flusher_create(hammer_mount_t hmp)
133 {
134         hammer_flusher_info_t info;
135         int i;
136
137         hmp->flusher.signal = 0;
138         hmp->flusher.act = 0;
139         hmp->flusher.done = 0;
140         hmp->flusher.next = 1;
141         hammer_ref(&hmp->flusher.finalize_lock);
142         TAILQ_INIT(&hmp->flusher.run_list);
143         TAILQ_INIT(&hmp->flusher.ready_list);
144
145         lwkt_create(hammer_flusher_master_thread, hmp,
146                     &hmp->flusher.td, NULL, 0, -1, "hammer-M");
147         for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) {
148                 info = kmalloc(sizeof(*info), M_HAMMER, M_WAITOK|M_ZERO);
149                 info->hmp = hmp;
150                 TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
151                 lwkt_create(hammer_flusher_slave_thread, info,
152                             &info->td, NULL, 0, -1, "hammer-S%d", i);
153         }
154 }
155
156 void
157 hammer_flusher_destroy(hammer_mount_t hmp)
158 {
159         hammer_flusher_info_t info;
160
161         /*
162          * Kill the master
163          */
164         hmp->flusher.exiting = 1;
165         while (hmp->flusher.td) {
166                 ++hmp->flusher.signal;
167                 wakeup(&hmp->flusher.signal);
168                 tsleep(&hmp->flusher.exiting, 0, "hmrwex", hz);
169         }
170
171         /*
172          * Kill the slaves
173          */
174         while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) != NULL) {
175                 KKASSERT(info->runstate == 0);
176                 TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
177                 info->runstate = -1;
178                 wakeup(&info->runstate);
179                 while (info->td)
180                         tsleep(&info->td, 0, "hmrwwc", 0);
181                 TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
182                 kfree(info, M_HAMMER);
183         }
184 }
185
186 /*
187  * The master flusher thread manages the flusher sequence id and
188  * synchronization with the slave work threads.
189  */
190 static void
191 hammer_flusher_master_thread(void *arg)
192 {
193         hammer_flush_group_t flg;
194         hammer_mount_t hmp;
195
196         hmp = arg;
197
198         for (;;) {
199                 /*
200                  * Do at least one flush cycle.  We may have to update the
201                  * UNDO FIFO even if no inodes are queued.
202                  */
203                 for (;;) {
204                         while (hmp->flusher.group_lock)
205                                 tsleep(&hmp->flusher.group_lock, 0, "hmrhld", 0);
206                         hmp->flusher.act = hmp->flusher.next;
207                         ++hmp->flusher.next;
208                         hammer_flusher_clean_loose_ios(hmp);
209                         hammer_flusher_flush(hmp);
210                         hmp->flusher.done = hmp->flusher.act;
211                         wakeup(&hmp->flusher.done);
212                         flg = TAILQ_FIRST(&hmp->flush_group_list);
213                         if (flg == NULL || flg->closed == 0)
214                                 break;
215                 }
216
217                 /*
218                  * Wait for activity.
219                  */
220                 if (hmp->flusher.exiting && TAILQ_EMPTY(&hmp->flush_group_list))
221                         break;
222                 while (hmp->flusher.signal == 0)
223                         tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0);
224                 hmp->flusher.signal = 0;
225         }
226
227         /*
228          * And we are done.
229          */
230         hmp->flusher.td = NULL;
231         wakeup(&hmp->flusher.exiting);
232         lwkt_exit();
233 }
234
235 /*
236  * Flush all inodes in the current flush group.
237  */
238 static void
239 hammer_flusher_flush(hammer_mount_t hmp)
240 {
241         hammer_flusher_info_t info;
242         hammer_flush_group_t flg;
243         hammer_reserve_t resv;
244         hammer_inode_t ip;
245         hammer_inode_t next_ip;
246         int slave_index;
247         int count;
248
249         /*
250          * Just in-case there's a flush race on mount
251          */
252         if (TAILQ_FIRST(&hmp->flusher.ready_list) == NULL)
253                 return;
254
255         /*
256          * We only do one flg but we may have to loop/retry.
257          */
258         count = 0;
259         while ((flg = TAILQ_FIRST(&hmp->flush_group_list)) != NULL) {
260                 ++count;
261                 if (hammer_debug_general & 0x0001) {
262                         kprintf("hammer_flush %d ttl=%d recs=%d\n",
263                                 hmp->flusher.act,
264                                 flg->total_count, flg->refs);
265                 }
266                 hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
267
268                 /*
269                  * If the previous flush cycle just about exhausted our
270                  * UNDO space we may have to do a dummy cycle to move the
271                  * first_offset up before actually digging into a new cycle,
272                  * or the new cycle will not have sufficient undo space.
273                  */
274                 if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3))
275                         hammer_flusher_finalize(&hmp->flusher.trans, 0);
276
277                 /*
278                  * Iterate the inodes in the flg's flush_list and assign
279                  * them to slaves.
280                  */
281                 flg->running = 1;
282                 slave_index = 0;
283                 info = TAILQ_FIRST(&hmp->flusher.ready_list);
284                 next_ip = TAILQ_FIRST(&flg->flush_list);
285
286                 while ((ip = next_ip) != NULL) {
287                         next_ip = TAILQ_NEXT(ip, flush_entry);
288
289                         /*
290                          * Add ip to the slave's work array.  The slave is
291                          * not currently running.
292                          */
293                         info->work_array[info->count++] = ip;
294                         if (info->count != HAMMER_FLUSH_GROUP_SIZE)
295                                 continue;
296
297                         /*
298                          * Get the slave running
299                          */
300                         TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
301                         TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry);
302                         info->flg = flg;
303                         info->runstate = 1;
304                         wakeup(&info->runstate);
305
306                         /*
307                          * Get a new slave.  We may have to wait for one to
308                          * finish running.
309                          */
310                         while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) == NULL) {
311                                 tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0);
312                         }
313                 }
314
315                 /*
316                  * Run the current slave if necessary
317                  */
318                 if (info->count) {
319                         TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
320                         TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry);
321                         info->flg = flg;
322                         info->runstate = 1;
323                         wakeup(&info->runstate);
324                 }
325
326                 /*
327                  * Wait for all slaves to finish running
328                  */
329                 while (TAILQ_FIRST(&hmp->flusher.run_list) != NULL)
330                         tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0);
331
332                 /*
333                  * Do the final finalization, clean up
334                  */
335                 hammer_flusher_finalize(&hmp->flusher.trans, 1);
336                 hmp->flusher.tid = hmp->flusher.trans.tid;
337
338                 hammer_done_transaction(&hmp->flusher.trans);
339
340                 /*
341                  * Loop up on the same flg.  If the flg is done clean it up
342                  * and break out.  We only flush one flg.
343                  */
344                 if (TAILQ_FIRST(&flg->flush_list) == NULL) {
345                         KKASSERT(TAILQ_EMPTY(&flg->flush_list));
346                         KKASSERT(flg->refs == 0);
347                         TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry);
348                         kfree(flg, M_HAMMER);
349                         break;
350                 }
351         }
352
353         /*
354          * We may have pure meta-data to flush, even if there were no
355          * flush groups.
356          */
357         if (count == 0 && hmp->locked_dirty_space) {
358                 hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
359                 hammer_flusher_finalize(&hmp->flusher.trans, 1);
360                 hammer_done_transaction(&hmp->flusher.trans);
361         }
362
363         /*
364          * Clean up any freed big-blocks (typically zone-2). 
365          * resv->flush_group is typically set several flush groups ahead
366          * of the free to ensure that the freed block is not reused until
367          * it can no longer be reused.
368          */
369         while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) {
370                 if (resv->flush_group != hmp->flusher.act)
371                         break;
372                 hammer_reserve_clrdelay(hmp, resv);
373         }
374 }
375
376
377 /*
378  * The slave flusher thread pulls work off the master flush_list until no
379  * work is left.
380  */
381 static void
382 hammer_flusher_slave_thread(void *arg)
383 {
384         hammer_flush_group_t flg;
385         hammer_flusher_info_t info;
386         hammer_mount_t hmp;
387         hammer_inode_t ip;
388         int i;
389
390         info = arg;
391         hmp = info->hmp;
392
393         for (;;) {
394                 while (info->runstate == 0)
395                         tsleep(&info->runstate, 0, "hmrssw", 0);
396                 if (info->runstate < 0)
397                         break;
398                 flg = info->flg;
399
400                 for (i = 0; i < info->count; ++i) {
401                         ip = info->work_array[i];
402                         hammer_flusher_flush_inode(ip, &hmp->flusher.trans);
403                 }
404                 info->count = 0;
405                 info->runstate = 0;
406                 TAILQ_REMOVE(&hmp->flusher.run_list, info, entry);
407                 TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
408                 wakeup(&hmp->flusher.ready_list);
409         }
410         info->td = NULL;
411         wakeup(&info->td);
412         lwkt_exit();
413 }
414
415 void
416 hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
417 {
418         hammer_buffer_t buffer;
419         hammer_io_t io;
420
421         /*
422          * loose ends - buffers without bp's aren't tracked by the kernel
423          * and can build up, so clean them out.  This can occur when an
424          * IO completes on a buffer with no references left.
425          */
426         if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
427                 crit_enter();   /* biodone() race */
428                 while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
429                         KKASSERT(io->mod_list == &hmp->lose_list);
430                         TAILQ_REMOVE(&hmp->lose_list, io, mod_entry);
431                         io->mod_list = NULL;
432                         if (io->lock.refs == 0)
433                                 ++hammer_count_refedbufs;
434                         hammer_ref(&io->lock);
435                         buffer = (void *)io;
436                         hammer_rel_buffer(buffer, 0);
437                 }
438                 crit_exit();
439         }
440 }
441
442 /*
443  * Flush a single inode that is part of a flush group.
444  *
445  * NOTE!  The sync code can return EWOULDBLOCK if the flush operation
446  * would otherwise blow out the buffer cache.  hammer_flush_inode_done()
447  * will re-queue the inode for the next flush sequence and force the
448  * flusher to run again if this occurs.
449  */
450 static
451 void
452 hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
453 {
454         hammer_mount_t hmp = ip->hmp;
455         int error;
456
457         hammer_flusher_clean_loose_ios(hmp);
458         error = hammer_sync_inode(ip);
459         if (error != EWOULDBLOCK)
460                 ip->error = error;
461         hammer_flush_inode_done(ip);
462         while (hmp->flusher.finalize_want)
463                 tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0);
464         if (hammer_flusher_undo_exhausted(trans, 1)) {
465                 kprintf("HAMMER: Warning: UNDO area too small!\n");
466                 hammer_flusher_finalize(trans, 1);
467         } else if (hammer_flusher_meta_limit(trans->hmp)) {
468                 hammer_flusher_finalize(trans, 0);
469         }
470 }
471
472 /*
473  * Return non-zero if the UNDO area has less then (QUARTER / 4) of its
474  * space left.
475  *
476  * 1/4 - Emergency free undo space level.  Below this point the flusher
477  *       will finalize even if directory dependancies have not been resolved.
478  *
479  * 2/4 - Used by the pruning and reblocking code.  These functions may be
480  *       running in parallel with a flush and cannot be allowed to drop
481  *       available undo space to emergency levels.
482  *
483  * 3/4 - Used at the beginning of a flush to force-sync the volume header
484  *       to give the flush plenty of runway to work in.
485  */
486 int
487 hammer_flusher_undo_exhausted(hammer_transaction_t trans, int quarter)
488 {
489         if (hammer_undo_space(trans) <
490             hammer_undo_max(trans->hmp) * quarter / 4) {
491                 return(1);
492         } else {
493                 return(0);
494         }
495 }
496
497 /*
498  * Flush all pending UNDOs, wait for write completion, update the volume
499  * header with the new UNDO end position, and flush it.  Then
500  * asynchronously flush the meta-data.
501  *
502  * If this is the last finalization in a flush group we also synchronize
503  * our cached blockmap and set hmp->flusher_undo_start and our cached undo
504  * fifo first_offset so the next flush resets the FIFO pointers.
505  *
506  * If this is not final it is being called because too many dirty meta-data
507  * buffers have built up and must be flushed with UNDO synchronization to
508  * avoid a buffer cache deadlock.
509  */
510 void
511 hammer_flusher_finalize(hammer_transaction_t trans, int final)
512 {
513         hammer_volume_t root_volume;
514         hammer_blockmap_t cundomap, dundomap;
515         hammer_mount_t hmp;
516         hammer_io_t io;
517         int count;
518         int i;
519
520         hmp = trans->hmp;
521         root_volume = trans->rootvol;
522
523         /*
524          * Exclusively lock the flusher.  This guarantees that all dirty
525          * buffers will be idled (have a mod-count of 0).
526          */
527         ++hmp->flusher.finalize_want;
528         hammer_lock_ex(&hmp->flusher.finalize_lock);
529
530         /*
531          * If this isn't the final sync several threads may have hit the
532          * meta-limit at the same time and raced.  Only sync if we really
533          * have to, after acquiring the lock.
534          */
535         if (final == 0 && !hammer_flusher_meta_limit(hmp))
536                 goto done;
537
538         /*
539          * Flush data buffers.  This can occur asynchronously and at any
540          * time.  We must interlock against the frontend direct-data write
541          * but do not have to acquire the sync-lock yet.
542          */
543         count = 0;
544         while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
545                 if (io->lock.refs == 0)
546                         ++hammer_count_refedbufs;
547                 hammer_ref(&io->lock);
548                 hammer_io_write_interlock(io);
549                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
550                 hammer_io_flush(io);
551                 hammer_io_done_interlock(io);
552                 hammer_rel_buffer((hammer_buffer_t)io, 0);
553                 ++count;
554         }
555
556         /*
557          * The sync-lock is required for the remaining sequence.  This lock
558          * prevents meta-data from being modified.
559          */
560         hammer_sync_lock_ex(trans);
561
562         /*
563          * If we have been asked to finalize the volume header sync the
564          * cached blockmap to the on-disk blockmap.  Generate an UNDO
565          * record for the update.
566          */
567         if (final) {
568                 cundomap = &hmp->blockmap[0];
569                 dundomap = &root_volume->ondisk->vol0_blockmap[0];
570                 if (root_volume->io.modified) {
571                         hammer_modify_volume(trans, root_volume,
572                                              dundomap, sizeof(hmp->blockmap));
573                         for (i = 0; i < HAMMER_MAX_ZONES; ++i)
574                                 hammer_crc_set_blockmap(&cundomap[i]);
575                         bcopy(cundomap, dundomap, sizeof(hmp->blockmap));
576                         hammer_modify_volume_done(root_volume);
577                 }
578         }
579
580         /*
581          * Flush UNDOs
582          */
583         count = 0;
584         while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
585                 KKASSERT(io->modify_refs == 0);
586                 if (io->lock.refs == 0)
587                         ++hammer_count_refedbufs;
588                 hammer_ref(&io->lock);
589                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
590                 hammer_io_flush(io);
591                 hammer_rel_buffer((hammer_buffer_t)io, 0);
592                 ++count;
593         }
594
595         /*
596          * Wait for I/Os to complete
597          */
598         hammer_flusher_clean_loose_ios(hmp);
599         hammer_io_wait_all(hmp, "hmrfl1");
600
601         /*
602          * Update the on-disk volume header with new UNDO FIFO end position
603          * (do not generate new UNDO records for this change).  We have to
604          * do this for the UNDO FIFO whether (final) is set or not.
605          *
606          * Also update the on-disk next_tid field.  This does not require
607          * an UNDO.  However, because our TID is generated before we get
608          * the sync lock another sync may have beat us to the punch.
609          *
610          * This also has the side effect of updating first_offset based on
611          * a prior finalization when the first finalization of the next flush
612          * cycle occurs, removing any undo info from the prior finalization
613          * from consideration.
614          *
615          * The volume header will be flushed out synchronously.
616          */
617         dundomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
618         cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
619
620         if (dundomap->first_offset != cundomap->first_offset ||
621             dundomap->next_offset != cundomap->next_offset) {
622                 hammer_modify_volume(NULL, root_volume, NULL, 0);
623                 dundomap->first_offset = cundomap->first_offset;
624                 dundomap->next_offset = cundomap->next_offset;
625                 hammer_crc_set_blockmap(dundomap);
626                 hammer_modify_volume_done(root_volume);
627         }
628
629         if (root_volume->io.modified) {
630                 hammer_modify_volume(NULL, root_volume, NULL, 0);
631                 if (root_volume->ondisk->vol0_next_tid < trans->tid)
632                         root_volume->ondisk->vol0_next_tid = trans->tid;
633                 hammer_crc_set_volume(root_volume->ondisk);
634                 hammer_modify_volume_done(root_volume);
635                 hammer_io_flush(&root_volume->io);
636         }
637
638         /*
639          * Wait for I/Os to complete
640          */
641         hammer_flusher_clean_loose_ios(hmp);
642         hammer_io_wait_all(hmp, "hmrfl2");
643
644         /*
645          * Flush meta-data.  The meta-data will be undone if we crash
646          * so we can safely flush it asynchronously.
647          *
648          * Repeated catchups will wind up flushing this update's meta-data
649          * and the UNDO buffers for the next update simultaniously.  This
650          * is ok.
651          */
652         count = 0;
653         while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
654                 KKASSERT(io->modify_refs == 0);
655                 if (io->lock.refs == 0)
656                         ++hammer_count_refedbufs;
657                 hammer_ref(&io->lock);
658                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
659                 hammer_io_flush(io);
660                 hammer_rel_buffer((hammer_buffer_t)io, 0);
661                 ++count;
662         }
663
664         /*
665          * If this is the final finalization for the flush group set
666          * up for the next sequence by setting a new first_offset in
667          * our cached blockmap and clearing the undo history.
668          *
669          * Even though we have updated our cached first_offset, the on-disk
670          * first_offset still governs available-undo-space calculations.
671          */
672         if (final) {
673                 cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
674                 cundomap->first_offset = cundomap->next_offset;
675                 hammer_clear_undo_history(hmp);
676         }
677
678         hammer_sync_unlock(trans);
679
680 done:
681         hammer_unlock(&hmp->flusher.finalize_lock);
682         if (--hmp->flusher.finalize_want == 0)
683                 wakeup(&hmp->flusher.finalize_want);
684 }
685
686 /*
687  * Return non-zero if too many dirty meta-data buffers have built up.
688  *
689  * Since we cannot allow such buffers to flush until we have dealt with
690  * the UNDOs, we risk deadlocking the kernel's buffer cache.
691  */
692 int
693 hammer_flusher_meta_limit(hammer_mount_t hmp)
694 {
695         if (hmp->locked_dirty_space + hmp->io_running_space >
696             hammer_limit_dirtybufspace) {
697                 return(1);
698         }
699         return(0);
700 }
701
702 int
703 hammer_flusher_meta_halflimit(hammer_mount_t hmp)
704 {
705         if (hmp->locked_dirty_space + hmp->io_running_space >
706             hammer_limit_dirtybufspace / 2) {
707                 return(1);
708         }
709         return(0);
710 }
711