d78ba7212d53b43c4d782702067e7a885ab540fd
[dragonfly.git] / sys / vfs / hammer / hammer_flusher.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_flusher.c,v 1.42 2008/07/16 18:30:59 dillon Exp $
35  */
36 /*
37  * HAMMER dependancy flusher thread
38  *
39  * Meta data updates create buffer dependancies which are arranged as a
40  * hierarchy of lists.
41  */
42
43 #include "hammer.h"
44
45 static void hammer_flusher_master_thread(void *arg);
46 static void hammer_flusher_slave_thread(void *arg);
47 static void hammer_flusher_flush(hammer_mount_t hmp);
48 static void hammer_flusher_flush_inode(hammer_inode_t ip,
49                                         hammer_transaction_t trans);
50
51 /*
52  * Support structures for the flusher threads.
53  */
54 struct hammer_flusher_info {
55         TAILQ_ENTRY(hammer_flusher_info) entry;
56         struct hammer_mount *hmp;
57         thread_t        td;
58         int             runstate;
59         int             count;
60         hammer_flush_group_t flg;
61         hammer_inode_t  work_array[HAMMER_FLUSH_GROUP_SIZE];
62 };
63
64 typedef struct hammer_flusher_info *hammer_flusher_info_t;
65
66 /*
67  * Sync all inodes pending on the flusher.
68  *
69  * All flush groups will be flushed.  This does not queue dirty inodes
70  * to the flush groups, it just flushes out what has already been queued!
71  */
72 void
73 hammer_flusher_sync(hammer_mount_t hmp)
74 {
75         int seq;
76
77         seq = hammer_flusher_async(hmp, NULL);
78         while ((int)(seq - hmp->flusher.done) > 0)
79                 tsleep(&hmp->flusher.done, 0, "hmrfls", 0);
80 }
81
82 /*
83  * Sync all inodes pending on the flusher - return immediately.
84  *
85  * All flush groups will be flushed.
86  */
87 int
88 hammer_flusher_async(hammer_mount_t hmp, hammer_flush_group_t close_flg)
89 {
90         hammer_flush_group_t flg;
91         int seq = hmp->flusher.next;
92
93         TAILQ_FOREACH(flg, &hmp->flush_group_list, flush_entry) {
94                 if (flg->running == 0)
95                         ++seq;
96                 flg->closed = 1;
97                 if (flg == close_flg)
98                         break;
99         }
100         if (hmp->flusher.td) {
101                 if (hmp->flusher.signal++ == 0)
102                         wakeup(&hmp->flusher.signal);
103         } else {
104                 seq = hmp->flusher.done;
105         }
106         return(seq);
107 }
108
109 int
110 hammer_flusher_async_one(hammer_mount_t hmp)
111 {
112         int seq;
113
114         if (hmp->flusher.td) {
115                 seq = hmp->flusher.next;
116                 if (hmp->flusher.signal++ == 0)
117                         wakeup(&hmp->flusher.signal);
118         } else {
119                 seq = hmp->flusher.done;
120         }
121         return(seq);
122 }
123
124 void
125 hammer_flusher_wait(hammer_mount_t hmp, int seq)
126 {
127         while ((int)(seq - hmp->flusher.done) > 0)
128                 tsleep(&hmp->flusher.done, 0, "hmrfls", 0);
129 }
130
131 void
132 hammer_flusher_create(hammer_mount_t hmp)
133 {
134         hammer_flusher_info_t info;
135         int i;
136
137         hmp->flusher.signal = 0;
138         hmp->flusher.act = 0;
139         hmp->flusher.done = 0;
140         hmp->flusher.next = 1;
141         hammer_ref(&hmp->flusher.finalize_lock);
142         TAILQ_INIT(&hmp->flusher.run_list);
143         TAILQ_INIT(&hmp->flusher.ready_list);
144
145         lwkt_create(hammer_flusher_master_thread, hmp,
146                     &hmp->flusher.td, NULL, 0, -1, "hammer-M");
147         for (i = 0; i < HAMMER_MAX_FLUSHERS; ++i) {
148                 info = kmalloc(sizeof(*info), M_HAMMER, M_WAITOK|M_ZERO);
149                 info->hmp = hmp;
150                 TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
151                 lwkt_create(hammer_flusher_slave_thread, info,
152                             &info->td, NULL, 0, -1, "hammer-S%d", i);
153         }
154 }
155
156 void
157 hammer_flusher_destroy(hammer_mount_t hmp)
158 {
159         hammer_flusher_info_t info;
160
161         /*
162          * Kill the master
163          */
164         hmp->flusher.exiting = 1;
165         while (hmp->flusher.td) {
166                 ++hmp->flusher.signal;
167                 wakeup(&hmp->flusher.signal);
168                 tsleep(&hmp->flusher.exiting, 0, "hmrwex", hz);
169         }
170
171         /*
172          * Kill the slaves
173          */
174         while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) != NULL) {
175                 KKASSERT(info->runstate == 0);
176                 TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
177                 info->runstate = -1;
178                 wakeup(&info->runstate);
179                 while (info->td)
180                         tsleep(&info->td, 0, "hmrwwc", 0);
181                 TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
182                 kfree(info, M_HAMMER);
183         }
184 }
185
186 /*
187  * The master flusher thread manages the flusher sequence id and
188  * synchronization with the slave work threads.
189  */
190 static void
191 hammer_flusher_master_thread(void *arg)
192 {
193         hammer_flush_group_t flg;
194         hammer_mount_t hmp;
195
196         hmp = arg;
197
198         for (;;) {
199                 /*
200                  * Do at least one flush cycle.  We may have to update the
201                  * UNDO FIFO even if no inodes are queued.
202                  */
203                 for (;;) {
204                         while (hmp->flusher.group_lock)
205                                 tsleep(&hmp->flusher.group_lock, 0, "hmrhld", 0);
206                         hmp->flusher.act = hmp->flusher.next;
207                         ++hmp->flusher.next;
208                         hammer_flusher_clean_loose_ios(hmp);
209                         hammer_flusher_flush(hmp);
210                         hmp->flusher.done = hmp->flusher.act;
211                         wakeup(&hmp->flusher.done);
212                         flg = TAILQ_FIRST(&hmp->flush_group_list);
213                         if (flg == NULL || flg->closed == 0)
214                                 break;
215                 }
216
217                 /*
218                  * Wait for activity.
219                  */
220                 if (hmp->flusher.exiting && TAILQ_EMPTY(&hmp->flush_group_list))
221                         break;
222                 while (hmp->flusher.signal == 0)
223                         tsleep(&hmp->flusher.signal, 0, "hmrwwa", 0);
224                 hmp->flusher.signal = 0;
225         }
226
227         /*
228          * And we are done.
229          */
230         hmp->flusher.td = NULL;
231         wakeup(&hmp->flusher.exiting);
232         lwkt_exit();
233 }
234
235 /*
236  * Flush all inodes in the current flush group.
237  */
238 static void
239 hammer_flusher_flush(hammer_mount_t hmp)
240 {
241         hammer_flusher_info_t info;
242         hammer_flush_group_t flg;
243         hammer_reserve_t resv;
244         hammer_inode_t ip;
245         hammer_inode_t next_ip;
246         int slave_index;
247         int count;
248
249         /*
250          * Just in-case there's a flush race on mount
251          */
252         if (TAILQ_FIRST(&hmp->flusher.ready_list) == NULL)
253                 return;
254
255         /*
256          * We only do one flg but we may have to loop/retry.
257          */
258         count = 0;
259         while ((flg = TAILQ_FIRST(&hmp->flush_group_list)) != NULL) {
260                 ++count;
261                 if (hammer_debug_general & 0x0001) {
262                         kprintf("hammer_flush %d ttl=%d recs=%d\n",
263                                 hmp->flusher.act,
264                                 flg->total_count, flg->refs);
265                 }
266                 hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
267
268                 /*
269                  * If the previous flush cycle just about exhausted our
270                  * UNDO space we may have to do a dummy cycle to move the
271                  * first_offset up before actually digging into a new cycle,
272                  * or the new cycle will not have sufficient undo space.
273                  */
274                 if (hammer_flusher_undo_exhausted(&hmp->flusher.trans, 3))
275                         hammer_flusher_finalize(&hmp->flusher.trans, 0);
276
277                 /*
278                  * Ok, we are running this flush group now (this prevents new
279                  * additions to it).
280                  */
281                 flg->running = 1;
282                 if (hmp->next_flush_group == flg)
283                         hmp->next_flush_group = TAILQ_NEXT(flg, flush_entry);
284
285                 /*
286                  * Iterate the inodes in the flg's flush_list and assign
287                  * them to slaves.
288                  */
289                 slave_index = 0;
290                 info = TAILQ_FIRST(&hmp->flusher.ready_list);
291                 next_ip = TAILQ_FIRST(&flg->flush_list);
292
293                 while ((ip = next_ip) != NULL) {
294                         next_ip = TAILQ_NEXT(ip, flush_entry);
295
296                         /*
297                          * Add ip to the slave's work array.  The slave is
298                          * not currently running.
299                          */
300                         info->work_array[info->count++] = ip;
301                         if (info->count != HAMMER_FLUSH_GROUP_SIZE)
302                                 continue;
303
304                         /*
305                          * Get the slave running
306                          */
307                         TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
308                         TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry);
309                         info->flg = flg;
310                         info->runstate = 1;
311                         wakeup(&info->runstate);
312
313                         /*
314                          * Get a new slave.  We may have to wait for one to
315                          * finish running.
316                          */
317                         while ((info = TAILQ_FIRST(&hmp->flusher.ready_list)) == NULL) {
318                                 tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0);
319                         }
320                 }
321
322                 /*
323                  * Run the current slave if necessary
324                  */
325                 if (info->count) {
326                         TAILQ_REMOVE(&hmp->flusher.ready_list, info, entry);
327                         TAILQ_INSERT_TAIL(&hmp->flusher.run_list, info, entry);
328                         info->flg = flg;
329                         info->runstate = 1;
330                         wakeup(&info->runstate);
331                 }
332
333                 /*
334                  * Wait for all slaves to finish running
335                  */
336                 while (TAILQ_FIRST(&hmp->flusher.run_list) != NULL)
337                         tsleep(&hmp->flusher.ready_list, 0, "hmrfcc", 0);
338
339                 /*
340                  * Do the final finalization, clean up
341                  */
342                 hammer_flusher_finalize(&hmp->flusher.trans, 1);
343                 hmp->flusher.tid = hmp->flusher.trans.tid;
344
345                 hammer_done_transaction(&hmp->flusher.trans);
346
347                 /*
348                  * Loop up on the same flg.  If the flg is done clean it up
349                  * and break out.  We only flush one flg.
350                  */
351                 if (TAILQ_FIRST(&flg->flush_list) == NULL) {
352                         KKASSERT(TAILQ_EMPTY(&flg->flush_list));
353                         KKASSERT(flg->refs == 0);
354                         TAILQ_REMOVE(&hmp->flush_group_list, flg, flush_entry);
355                         kfree(flg, M_HAMMER);
356                         break;
357                 }
358         }
359
360         /*
361          * We may have pure meta-data to flush, or we may have to finish
362          * cycling the UNDO FIFO, even if there were no flush groups.
363          */
364         if (count == 0 && hammer_flusher_haswork(hmp)) {
365                 hammer_start_transaction_fls(&hmp->flusher.trans, hmp);
366                 hammer_flusher_finalize(&hmp->flusher.trans, 1);
367                 hammer_done_transaction(&hmp->flusher.trans);
368         }
369
370         /*
371          * Clean up any freed big-blocks (typically zone-2). 
372          * resv->flush_group is typically set several flush groups ahead
373          * of the free to ensure that the freed block is not reused until
374          * it can no longer be reused.
375          */
376         while ((resv = TAILQ_FIRST(&hmp->delay_list)) != NULL) {
377                 if (resv->flush_group != hmp->flusher.act)
378                         break;
379                 hammer_reserve_clrdelay(hmp, resv);
380         }
381 }
382
383
384 /*
385  * The slave flusher thread pulls work off the master flush_list until no
386  * work is left.
387  */
388 static void
389 hammer_flusher_slave_thread(void *arg)
390 {
391         hammer_flush_group_t flg;
392         hammer_flusher_info_t info;
393         hammer_mount_t hmp;
394         hammer_inode_t ip;
395         int i;
396
397         info = arg;
398         hmp = info->hmp;
399
400         for (;;) {
401                 while (info->runstate == 0)
402                         tsleep(&info->runstate, 0, "hmrssw", 0);
403                 if (info->runstate < 0)
404                         break;
405                 flg = info->flg;
406
407                 for (i = 0; i < info->count; ++i) {
408                         ip = info->work_array[i];
409                         hammer_flusher_flush_inode(ip, &hmp->flusher.trans);
410                         ++hammer_stats_inode_flushes;
411                 }
412                 info->count = 0;
413                 info->runstate = 0;
414                 TAILQ_REMOVE(&hmp->flusher.run_list, info, entry);
415                 TAILQ_INSERT_TAIL(&hmp->flusher.ready_list, info, entry);
416                 wakeup(&hmp->flusher.ready_list);
417         }
418         info->td = NULL;
419         wakeup(&info->td);
420         lwkt_exit();
421 }
422
423 void
424 hammer_flusher_clean_loose_ios(hammer_mount_t hmp)
425 {
426         hammer_buffer_t buffer;
427         hammer_io_t io;
428
429         /*
430          * loose ends - buffers without bp's aren't tracked by the kernel
431          * and can build up, so clean them out.  This can occur when an
432          * IO completes on a buffer with no references left.
433          */
434         if ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
435                 crit_enter();   /* biodone() race */
436                 while ((io = TAILQ_FIRST(&hmp->lose_list)) != NULL) {
437                         KKASSERT(io->mod_list == &hmp->lose_list);
438                         TAILQ_REMOVE(&hmp->lose_list, io, mod_entry);
439                         io->mod_list = NULL;
440                         if (io->lock.refs == 0)
441                                 ++hammer_count_refedbufs;
442                         hammer_ref(&io->lock);
443                         buffer = (void *)io;
444                         hammer_rel_buffer(buffer, 0);
445                 }
446                 crit_exit();
447         }
448 }
449
450 /*
451  * Flush a single inode that is part of a flush group.
452  *
453  * NOTE!  The sync code can return EWOULDBLOCK if the flush operation
454  * would otherwise blow out the buffer cache.  hammer_flush_inode_done()
455  * will re-queue the inode for the next flush sequence and force the
456  * flusher to run again if this occurs.
457  */
458 static
459 void
460 hammer_flusher_flush_inode(hammer_inode_t ip, hammer_transaction_t trans)
461 {
462         hammer_mount_t hmp = ip->hmp;
463         int error;
464
465         hammer_flusher_clean_loose_ios(hmp);
466         error = hammer_sync_inode(trans, ip);
467         if (error != EWOULDBLOCK)
468                 ip->error = error;
469         hammer_flush_inode_done(ip);
470         while (hmp->flusher.finalize_want)
471                 tsleep(&hmp->flusher.finalize_want, 0, "hmrsxx", 0);
472         if (hammer_flusher_undo_exhausted(trans, 1)) {
473                 kprintf("HAMMER: Warning: UNDO area too small!\n");
474                 hammer_flusher_finalize(trans, 1);
475         } else if (hammer_flusher_meta_limit(trans->hmp)) {
476                 hammer_flusher_finalize(trans, 0);
477         }
478 }
479
480 /*
481  * Return non-zero if the UNDO area has less then (QUARTER / 4) of its
482  * space left.
483  *
484  * 1/4 - Emergency free undo space level.  Below this point the flusher
485  *       will finalize even if directory dependancies have not been resolved.
486  *
487  * 2/4 - Used by the pruning and reblocking code.  These functions may be
488  *       running in parallel with a flush and cannot be allowed to drop
489  *       available undo space to emergency levels.
490  *
491  * 3/4 - Used at the beginning of a flush to force-sync the volume header
492  *       to give the flush plenty of runway to work in.
493  */
494 int
495 hammer_flusher_undo_exhausted(hammer_transaction_t trans, int quarter)
496 {
497         if (hammer_undo_space(trans) <
498             hammer_undo_max(trans->hmp) * quarter / 4) {
499                 return(1);
500         } else {
501                 return(0);
502         }
503 }
504
505 /*
506  * Flush all pending UNDOs, wait for write completion, update the volume
507  * header with the new UNDO end position, and flush it.  Then
508  * asynchronously flush the meta-data.
509  *
510  * If this is the last finalization in a flush group we also synchronize
511  * our cached blockmap and set hmp->flusher_undo_start and our cached undo
512  * fifo first_offset so the next flush resets the FIFO pointers.
513  *
514  * If this is not final it is being called because too many dirty meta-data
515  * buffers have built up and must be flushed with UNDO synchronization to
516  * avoid a buffer cache deadlock.
517  */
518 void
519 hammer_flusher_finalize(hammer_transaction_t trans, int final)
520 {
521         hammer_volume_t root_volume;
522         hammer_blockmap_t cundomap, dundomap;
523         hammer_mount_t hmp;
524         hammer_io_t io;
525         int count;
526         int i;
527
528         hmp = trans->hmp;
529         root_volume = trans->rootvol;
530
531         /*
532          * Exclusively lock the flusher.  This guarantees that all dirty
533          * buffers will be idled (have a mod-count of 0).
534          */
535         ++hmp->flusher.finalize_want;
536         hammer_lock_ex(&hmp->flusher.finalize_lock);
537
538         /*
539          * If this isn't the final sync several threads may have hit the
540          * meta-limit at the same time and raced.  Only sync if we really
541          * have to, after acquiring the lock.
542          */
543         if (final == 0 && !hammer_flusher_meta_limit(hmp))
544                 goto done;
545
546         /*
547          * Flush data buffers.  This can occur asynchronously and at any
548          * time.  We must interlock against the frontend direct-data write
549          * but do not have to acquire the sync-lock yet.
550          */
551         count = 0;
552         while ((io = TAILQ_FIRST(&hmp->data_list)) != NULL) {
553                 if (io->lock.refs == 0)
554                         ++hammer_count_refedbufs;
555                 hammer_ref(&io->lock);
556                 hammer_io_write_interlock(io);
557                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
558                 hammer_io_flush(io);
559                 hammer_io_done_interlock(io);
560                 hammer_rel_buffer((hammer_buffer_t)io, 0);
561                 ++count;
562         }
563
564         /*
565          * The sync-lock is required for the remaining sequence.  This lock
566          * prevents meta-data from being modified.
567          */
568         hammer_sync_lock_ex(trans);
569
570         /*
571          * If we have been asked to finalize the volume header sync the
572          * cached blockmap to the on-disk blockmap.  Generate an UNDO
573          * record for the update.
574          */
575         if (final) {
576                 cundomap = &hmp->blockmap[0];
577                 dundomap = &root_volume->ondisk->vol0_blockmap[0];
578                 if (root_volume->io.modified) {
579                         hammer_modify_volume(trans, root_volume,
580                                              dundomap, sizeof(hmp->blockmap));
581                         for (i = 0; i < HAMMER_MAX_ZONES; ++i)
582                                 hammer_crc_set_blockmap(&cundomap[i]);
583                         bcopy(cundomap, dundomap, sizeof(hmp->blockmap));
584                         hammer_modify_volume_done(root_volume);
585                 }
586         }
587
588         /*
589          * Flush UNDOs
590          */
591         count = 0;
592         while ((io = TAILQ_FIRST(&hmp->undo_list)) != NULL) {
593                 KKASSERT(io->modify_refs == 0);
594                 if (io->lock.refs == 0)
595                         ++hammer_count_refedbufs;
596                 hammer_ref(&io->lock);
597                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
598                 hammer_io_flush(io);
599                 hammer_rel_buffer((hammer_buffer_t)io, 0);
600                 ++count;
601         }
602
603         /*
604          * Wait for I/Os to complete
605          */
606         hammer_flusher_clean_loose_ios(hmp);
607         hammer_io_wait_all(hmp, "hmrfl1");
608
609         /*
610          * Update the on-disk volume header with new UNDO FIFO end position
611          * (do not generate new UNDO records for this change).  We have to
612          * do this for the UNDO FIFO whether (final) is set or not.
613          *
614          * Also update the on-disk next_tid field.  This does not require
615          * an UNDO.  However, because our TID is generated before we get
616          * the sync lock another sync may have beat us to the punch.
617          *
618          * This also has the side effect of updating first_offset based on
619          * a prior finalization when the first finalization of the next flush
620          * cycle occurs, removing any undo info from the prior finalization
621          * from consideration.
622          *
623          * The volume header will be flushed out synchronously.
624          */
625         dundomap = &root_volume->ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
626         cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
627
628         if (dundomap->first_offset != cundomap->first_offset ||
629             dundomap->next_offset != cundomap->next_offset) {
630                 hammer_modify_volume(NULL, root_volume, NULL, 0);
631                 dundomap->first_offset = cundomap->first_offset;
632                 dundomap->next_offset = cundomap->next_offset;
633                 hammer_crc_set_blockmap(dundomap);
634                 hammer_modify_volume_done(root_volume);
635         }
636
637         if (root_volume->io.modified) {
638                 hammer_modify_volume(NULL, root_volume, NULL, 0);
639                 if (root_volume->ondisk->vol0_next_tid < trans->tid)
640                         root_volume->ondisk->vol0_next_tid = trans->tid;
641                 hammer_crc_set_volume(root_volume->ondisk);
642                 hammer_modify_volume_done(root_volume);
643                 hammer_io_flush(&root_volume->io);
644         }
645
646         /*
647          * Wait for I/Os to complete
648          */
649         hammer_flusher_clean_loose_ios(hmp);
650         hammer_io_wait_all(hmp, "hmrfl2");
651
652         /*
653          * Flush meta-data.  The meta-data will be undone if we crash
654          * so we can safely flush it asynchronously.
655          *
656          * Repeated catchups will wind up flushing this update's meta-data
657          * and the UNDO buffers for the next update simultaniously.  This
658          * is ok.
659          */
660         count = 0;
661         while ((io = TAILQ_FIRST(&hmp->meta_list)) != NULL) {
662                 KKASSERT(io->modify_refs == 0);
663                 if (io->lock.refs == 0)
664                         ++hammer_count_refedbufs;
665                 hammer_ref(&io->lock);
666                 KKASSERT(io->type != HAMMER_STRUCTURE_VOLUME);
667                 hammer_io_flush(io);
668                 hammer_rel_buffer((hammer_buffer_t)io, 0);
669                 ++count;
670         }
671
672         /*
673          * If this is the final finalization for the flush group set
674          * up for the next sequence by setting a new first_offset in
675          * our cached blockmap and clearing the undo history.
676          *
677          * Even though we have updated our cached first_offset, the on-disk
678          * first_offset still governs available-undo-space calculations.
679          */
680         if (final) {
681                 cundomap = &hmp->blockmap[HAMMER_ZONE_UNDO_INDEX];
682                 if (cundomap->first_offset == cundomap->next_offset) {
683                         hmp->hflags &= ~HMNT_UNDO_DIRTY;
684                 } else {
685                         cundomap->first_offset = cundomap->next_offset;
686                         hmp->hflags |= HMNT_UNDO_DIRTY;
687                 }
688                 hammer_clear_undo_history(hmp);
689         }
690
691         hammer_sync_unlock(trans);
692
693 done:
694         hammer_unlock(&hmp->flusher.finalize_lock);
695         if (--hmp->flusher.finalize_want == 0)
696                 wakeup(&hmp->flusher.finalize_want);
697         hammer_stats_commits += final;
698 }
699
700 /*
701  * Return non-zero if too many dirty meta-data buffers have built up.
702  *
703  * Since we cannot allow such buffers to flush until we have dealt with
704  * the UNDOs, we risk deadlocking the kernel's buffer cache.
705  */
706 int
707 hammer_flusher_meta_limit(hammer_mount_t hmp)
708 {
709         if (hmp->locked_dirty_space + hmp->io_running_space >
710             hammer_limit_dirtybufspace) {
711                 return(1);
712         }
713         return(0);
714 }
715
716 /*
717  * Return non-zero if too many dirty meta-data buffers have built up.
718  *
719  * This version is used by background operations (mirror, prune, reblock)
720  * to leave room for foreground operations.
721  */
722 int
723 hammer_flusher_meta_halflimit(hammer_mount_t hmp)
724 {
725         if (hmp->locked_dirty_space + hmp->io_running_space >
726             hammer_limit_dirtybufspace / 2) {
727                 return(1);
728         }
729         return(0);
730 }
731
732 /*
733  * Return non-zero if the flusher still has something to flush.
734  */
735 int
736 hammer_flusher_haswork(hammer_mount_t hmp)
737 {
738         if (TAILQ_FIRST(&hmp->flush_group_list) ||      /* dirty inodes */
739             TAILQ_FIRST(&hmp->volu_list) ||             /* dirty bufffers */
740             TAILQ_FIRST(&hmp->undo_list) ||
741             TAILQ_FIRST(&hmp->data_list) ||
742             TAILQ_FIRST(&hmp->meta_list) ||
743             (hmp->hflags & HMNT_UNDO_DIRTY)             /* UNDO FIFO sync */
744         ) {
745                 return(1);
746         }
747         return(0);
748 }
749