Merge branch 'vendor/BMAKE'
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static void hammer2_io_callback(struct bio *bio);
45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
46
47 static int
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
49 {
50         if (io2->pbase < io1->pbase)
51                 return(-1);
52         if (io2->pbase > io1->pbase)
53                 return(1);
54         return(0);
55 }
56
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
59                 off_t, pbase);
60
61 #define HAMMER2_DIO_INPROG      0x80000000
62 #define HAMMER2_DIO_GOOD        0x40000000
63 #define HAMMER2_DIO_WAITING     0x20000000
64 #define HAMMER2_DIO_DIRTY       0x10000000
65
66 #define HAMMER2_DIO_MASK        0x0FFFFFFF
67
68 /*
69  * Acquire the requested dio, set *ownerp based on state.  If state is good
70  * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the
71  * caller must resolve the buffer.
72  */
73 hammer2_io_t *
74 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp)
75 {
76         hammer2_io_t *dio;
77         hammer2_io_t *xio;
78         off_t pbase;
79         off_t pmask;
80         int psize = hammer2_devblksize(lsize);
81         int refs;
82
83         pmask = ~(hammer2_off_t)(psize - 1);
84
85         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
86         lbase &= ~HAMMER2_OFF_MASK_RADIX;
87         pbase = lbase & pmask;
88         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
89
90         /*
91          * Access/Allocate the DIO
92          */
93         spin_lock_shared(&hmp->io_spin);
94         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
95         if (dio) {
96                 if (atomic_fetchadd_int(&dio->refs, 1) == 0)
97                         atomic_add_int(&dio->hmp->iofree_count, -1);
98                 spin_unlock_shared(&hmp->io_spin);
99         } else {
100                 spin_unlock_shared(&hmp->io_spin);
101                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
102                 dio->hmp = hmp;
103                 dio->pbase = pbase;
104                 dio->psize = psize;
105                 dio->refs = 1;
106                 spin_lock(&hmp->io_spin);
107                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
108                 if (xio == NULL) {
109                         spin_unlock(&hmp->io_spin);
110                 } else {
111                         if (atomic_fetchadd_int(&xio->refs, 1) == 0)
112                                 atomic_add_int(&xio->hmp->iofree_count, -1);
113                         spin_unlock(&hmp->io_spin);
114                         kfree(dio, M_HAMMER2);
115                         dio = xio;
116                 }
117         }
118
119         /*
120          * Obtain/Validate the buffer.
121          */
122         for (;;) {
123                 refs = dio->refs;
124                 cpu_ccfence();
125
126                 /*
127                  * Stop if the buffer is good.  Once set GOOD the flag cannot
128                  * be cleared until refs drops to 0.
129                  */
130                 if (refs & HAMMER2_DIO_GOOD) {
131                         *ownerp = 0;
132                         return dio;
133                 }
134
135                 /*
136                  * We need to acquire the in-progress lock on the buffer
137                  */
138                 if (refs & HAMMER2_DIO_INPROG) {
139                         tsleep_interlock(dio, 0);
140                         if (atomic_cmpset_int(&dio->refs, refs,
141                                               refs | HAMMER2_DIO_WAITING)) {
142                                 tsleep(dio, PINTERLOCKED, "h2dio", 0);
143                         }
144                         /* retry */
145                 } else {
146                         if (atomic_cmpset_int(&dio->refs, refs,
147                                               refs | HAMMER2_DIO_INPROG)) {
148                                 break;
149                         }
150                 }
151                 /* retry */
152         }
153
154         /*
155          * We need to do more work before the buffer is usable
156          */
157         *ownerp = HAMMER2_DIO_INPROG;
158
159         return(dio);
160 }
161
162 /*
163  * If part of an asynchronous I/O the asynchronous I/O is biodone()'d.
164  *
165  * If the caller owned INPROG then the dio will be set GOOD or not
166  * depending on whether the caller disposed of dio->bp or not.
167  */
168 static
169 void
170 hammer2_io_complete(hammer2_io_t *dio, int owner)
171 {
172         int refs;
173         int good;
174
175         while (owner & HAMMER2_DIO_INPROG) {
176                 refs = dio->refs;
177                 cpu_ccfence();
178                 good = dio->bp ? HAMMER2_DIO_GOOD : 0;
179                 if (atomic_cmpset_int(&dio->refs, refs,
180                                       (refs & ~(HAMMER2_DIO_WAITING |
181                                                 HAMMER2_DIO_INPROG)) |
182                                       good)) {
183                         if (refs & HAMMER2_DIO_WAITING)
184                                 wakeup(dio);
185                         if (good)
186                                 BUF_KERNPROC(dio->bp);
187                         break;
188                 }
189                 /* retry */
190         }
191 }
192
193 /*
194  * Release our ref on *diop, dispose of the underlying buffer.
195  */
196 void
197 hammer2_io_putblk(hammer2_io_t **diop)
198 {
199         hammer2_mount_t *hmp;
200         hammer2_io_t *dio;
201         struct buf *bp;
202         off_t peof;
203         off_t pbase;
204         int psize;
205         int refs;
206
207         dio = *diop;
208         *diop = NULL;
209
210         for (;;) {
211                 refs = dio->refs;
212
213                 if ((refs & HAMMER2_DIO_MASK) == 1) {
214                         KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
215                         if (atomic_cmpset_int(&dio->refs, refs,
216                                               ((refs - 1) &
217                                                ~(HAMMER2_DIO_GOOD |
218                                                  HAMMER2_DIO_DIRTY)) |
219                                               HAMMER2_DIO_INPROG)) {
220                                 break;
221                         }
222                         /* retry */
223                 } else {
224                         if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
225                                 return;
226                         /* retry */
227                 }
228                 /* retry */
229         }
230
231         /*
232          * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is
233          * legal only on the last ref).  This allows us to dispose of the
234          * buffer.  refs is now 0.
235          *
236          * The instant we call io_complete dio is a free agent again and
237          * can be ripped out from under us.  Acquisition of the dio after
238          * this point will require a shared or exclusive spinlock.
239          */
240         hmp = dio->hmp;
241         bp = dio->bp;
242         dio->bp = NULL;
243         pbase = dio->pbase;
244         psize = dio->psize;
245         hammer2_io_complete(dio, HAMMER2_DIO_INPROG);   /* clears INPROG */
246         dio = NULL;     /* dio stale */
247
248         atomic_add_int(&hmp->iofree_count, 1);
249
250         if (refs & HAMMER2_DIO_GOOD) {
251                 KKASSERT(bp != NULL);
252                 if (refs & HAMMER2_DIO_DIRTY) {
253                         if (hammer2_cluster_enable) {
254                                 peof = (pbase + HAMMER2_SEGMASK64) &
255                                        ~HAMMER2_SEGMASK64;
256                                 cluster_write(bp, peof, psize, 4);
257                         } else {
258                                 bp->b_flags |= B_CLUSTEROK;
259                                 bdwrite(bp);
260                         }
261                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
262                         brelse(bp);
263                 } else {
264                         bqrelse(bp);
265                 }
266         }
267
268         /*
269          * We cache free buffers so re-use cases can use a shared lock, but
270          * if too many build up we have to clean them out.
271          */
272         if (hmp->iofree_count > 1000) {
273                 struct hammer2_io_tree tmptree;
274
275                 RB_INIT(&tmptree);
276                 spin_lock(&hmp->io_spin);
277                 if (hmp->iofree_count > 1000) {
278                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
279                                 hammer2_io_cleanup_callback, &tmptree);
280                 }
281                 spin_unlock(&hmp->io_spin);
282                 hammer2_io_cleanup(hmp, &tmptree);
283         }
284 }
285
286 static
287 int
288 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
289 {
290         struct hammer2_io_tree *tmptree = arg;
291         hammer2_io_t *xio;
292
293         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
294                 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
295                 xio = RB_INSERT(hammer2_io_tree, tmptree, dio);
296                 KKASSERT(xio == NULL);
297         }
298         return 0;
299 }
300
301 void
302 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
303 {
304         hammer2_io_t *dio;
305
306         while ((dio = RB_ROOT(tree)) != NULL) {
307                 RB_REMOVE(hammer2_io_tree, tree, dio);
308                 KKASSERT(dio->bp == NULL &&
309                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
310                 kfree(dio, M_HAMMER2);
311                 atomic_add_int(&hmp->iofree_count, -1);
312         }
313 }
314
315 char *
316 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
317 {
318         struct buf *bp;
319         int off;
320
321         bp = dio->bp;
322         KKASSERT(bp != NULL);
323         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
324         KKASSERT(off >= 0 && off < bp->b_bufsize);
325         return(bp->b_data + off);
326 }
327
328 static
329 int
330 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
331                 hammer2_io_t **diop, int dozero, int quick)
332 {
333         hammer2_io_t *dio;
334         int owner;
335         int error;
336
337         dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
338         if (owner) {
339                 if (lsize == dio->psize) {
340                         dio->bp = getblk(hmp->devvp,
341                                              dio->pbase, dio->psize,
342                                              (quick ? GETBLK_NOWAIT : 0),
343                                              0);
344                         if (dio->bp) {
345                                 vfs_bio_clrbuf(dio->bp);
346                                 if (quick) {
347                                         dio->bp->b_flags |= B_CACHE;
348                                         bqrelse(dio->bp);
349                                         dio->bp = NULL;
350                                 }
351                         }
352                         error = 0;
353                 } else if (quick) {
354                         /* do nothing */
355                         error = 0;
356                 } else {
357                         error = bread(hmp->devvp, dio->pbase,
358                                       dio->psize, &dio->bp);
359                 }
360                 if (error) {
361                         brelse(dio->bp);
362                         dio->bp = NULL;
363                 }
364                 hammer2_io_complete(dio, owner);
365         } else {
366                 error = 0;
367         }
368         if (dio->bp) {
369                 if (dozero)
370                         bzero(hammer2_io_data(dio, lbase), lsize);
371                 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
372         }
373         return error;
374 }
375
376 int
377 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
378                hammer2_io_t **diop)
379 {
380         return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0));
381 }
382
383 int
384 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
385                hammer2_io_t **diop)
386 {
387         return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0));
388 }
389
390 int
391 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
392                hammer2_io_t **diop)
393 {
394         return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1));
395 }
396
397 int
398 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
399                 hammer2_io_t **diop)
400 {
401         hammer2_io_t *dio;
402         off_t peof;
403         int owner;
404         int error;
405
406         dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
407         if (owner) {
408                 if (hammer2_cluster_enable) {
409                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
410                                ~HAMMER2_SEGMASK64;
411                         error = cluster_read(hmp->devvp, peof, dio->pbase,
412                                              dio->psize,
413                                              dio->psize, HAMMER2_PBUFSIZE*4,
414                                              &dio->bp);
415                 } else {
416                         error = bread(hmp->devvp, dio->pbase,
417                                       dio->psize, &dio->bp);
418                 }
419                 if (error) {
420                         brelse(dio->bp);
421                         dio->bp = NULL;
422                 }
423                 hammer2_io_complete(dio, owner);
424         } else {
425                 error = 0;
426         }
427         return error;
428 }
429
430 void
431 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
432                   void (*callback)(hammer2_io_t *dio, hammer2_chain_t *arg_c,
433                                    void *arg_p, off_t arg_o),
434                   hammer2_chain_t *arg_c, void *arg_p, off_t arg_o)
435 {
436         hammer2_io_t *dio;
437         int owner;
438         int error;
439
440         dio = hammer2_io_getblk(hmp, lbase, lsize, &owner);
441         if (owner) {
442                 dio->callback = callback;
443                 dio->arg_c = arg_c;
444                 dio->arg_p = arg_p;
445                 dio->arg_o = arg_o;
446                 breadcb(hmp->devvp, dio->pbase, dio->psize,
447                         hammer2_io_callback, dio);
448         } else {
449                 error = 0;
450                 callback(dio, arg_c, arg_p, arg_o);
451                 hammer2_io_bqrelse(&dio);
452         }
453 }
454
455 static void
456 hammer2_io_callback(struct bio *bio)
457 {
458         struct buf *dbp = bio->bio_buf;
459         hammer2_io_t *dio = bio->bio_caller_info1.ptr;
460
461         if ((bio->bio_flags & BIO_DONE) == 0)
462                 bpdone(dbp, 0);
463         bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
464         dio->bp = bio->bio_buf;
465         KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */
466         hammer2_io_complete(dio, HAMMER2_DIO_INPROG);
467         dio->callback(dio, dio->arg_c, dio->arg_p, dio->arg_o);
468         hammer2_io_bqrelse(&dio);
469         /* TODO: async load meta-data and assign chain->dio */
470 }
471
472 void
473 hammer2_io_bawrite(hammer2_io_t **diop)
474 {
475         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
476         hammer2_io_putblk(diop);
477 }
478
479 void
480 hammer2_io_bdwrite(hammer2_io_t **diop)
481 {
482         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
483         hammer2_io_putblk(diop);
484 }
485
486 int
487 hammer2_io_bwrite(hammer2_io_t **diop)
488 {
489         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
490         hammer2_io_putblk(diop);
491         return (0);     /* XXX */
492 }
493
494 void
495 hammer2_io_setdirty(hammer2_io_t *dio)
496 {
497         atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
498 }
499
500 void
501 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
502 {
503         if ((u_int)dio->psize == bytes)
504                 dio->bp->b_flags |= B_INVAL | B_RELBUF;
505 }
506
507 void
508 hammer2_io_brelse(hammer2_io_t **diop)
509 {
510         hammer2_io_putblk(diop);
511 }
512
513 void
514 hammer2_io_bqrelse(hammer2_io_t **diop)
515 {
516         hammer2_io_putblk(diop);
517 }
518
519 int
520 hammer2_io_isdirty(hammer2_io_t *dio)
521 {
522         return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
523 }