Merge branch 'vendor/TNFTP'
[dragonfly.git] / sys / vfs / hammer2 / hammer2_io.c
1 /*
2  * Copyright (c) 2013-2014 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34
35 #include "hammer2.h"
36
37 /*
38  * Implements an abstraction layer for synchronous and asynchronous
39  * buffered device I/O.  Can be used for OS-abstraction but the main
40  * purpose is to allow larger buffers to be used against hammer2_chain's
41  * using smaller allocations, without causing deadlocks.
42  *
43  */
44 static void hammer2_io_callback(struct bio *bio);
45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg);
46
47 static int
48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2)
49 {
50         if (io2->pbase < io1->pbase)
51                 return(-1);
52         if (io2->pbase > io1->pbase)
53                 return(1);
54         return(0);
55 }
56
57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t);
58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp,
59                 off_t, pbase);
60
61 struct hammer2_cleanupcb_info {
62         struct hammer2_io_tree tmptree;
63         int     count;
64 };
65
66
67 #define HAMMER2_DIO_INPROG      0x80000000
68 #define HAMMER2_DIO_GOOD        0x40000000
69 #define HAMMER2_DIO_WAITING     0x20000000
70 #define HAMMER2_DIO_DIRTY       0x10000000
71
72 #define HAMMER2_DIO_MASK        0x0FFFFFFF
73
74 /*
75  * Acquire the requested dio, set *ownerp based on state.  If state is good
76  * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the
77  * caller must resolve the buffer.
78  */
79 hammer2_io_t *
80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp)
81 {
82         hammer2_io_t *dio;
83         hammer2_io_t *xio;
84         off_t pbase;
85         off_t pmask;
86         int psize = hammer2_devblksize(lsize);
87         int refs;
88
89         pmask = ~(hammer2_off_t)(psize - 1);
90
91         KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize);
92         lbase &= ~HAMMER2_OFF_MASK_RADIX;
93         pbase = lbase & pmask;
94         KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase);
95
96         /*
97          * Access/Allocate the DIO
98          */
99         spin_lock_shared(&hmp->io_spin);
100         dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase);
101         if (dio) {
102                 if ((atomic_fetchadd_int(&dio->refs, 1) &
103                      HAMMER2_DIO_MASK) == 0) {
104                         atomic_add_int(&dio->hmp->iofree_count, -1);
105                 }
106                 spin_unlock_shared(&hmp->io_spin);
107         } else {
108                 spin_unlock_shared(&hmp->io_spin);
109                 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO);
110                 dio->hmp = hmp;
111                 dio->pbase = pbase;
112                 dio->psize = psize;
113                 dio->refs = 1;
114                 spin_lock(&hmp->io_spin);
115                 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio);
116                 if (xio == NULL) {
117                         atomic_add_int(&hammer2_dio_count, 1);
118                         spin_unlock(&hmp->io_spin);
119                 } else {
120                         if ((atomic_fetchadd_int(&xio->refs, 1) &
121                              HAMMER2_DIO_MASK) == 0) {
122                                 atomic_add_int(&xio->hmp->iofree_count, -1);
123                         }
124                         spin_unlock(&hmp->io_spin);
125                         kfree(dio, M_HAMMER2);
126                         dio = xio;
127                 }
128         }
129
130         /*
131          * Obtain/Validate the buffer.
132          */
133         for (;;) {
134                 refs = dio->refs;
135                 cpu_ccfence();
136
137                 /*
138                  * Stop if the buffer is good.  Once set GOOD the flag cannot
139                  * be cleared until refs drops to 0.
140                  */
141                 if (refs & HAMMER2_DIO_GOOD) {
142                         *ownerp = 0;
143                         goto done;
144                 }
145
146                 /*
147                  * We need to acquire the in-progress lock on the buffer
148                  */
149                 if (refs & HAMMER2_DIO_INPROG) {
150                         tsleep_interlock(dio, 0);
151                         if (atomic_cmpset_int(&dio->refs, refs,
152                                               refs | HAMMER2_DIO_WAITING)) {
153                                 tsleep(dio, PINTERLOCKED, "h2dio", 0);
154                         }
155                         /* retry */
156                 } else {
157                         if (atomic_cmpset_int(&dio->refs, refs,
158                                               refs | HAMMER2_DIO_INPROG)) {
159                                 break;
160                         }
161                 }
162                 /* retry */
163         }
164
165         /*
166          * We need to do more work before the buffer is usable
167          */
168         *ownerp = HAMMER2_DIO_INPROG;
169 done:
170         if (dio->act < 5)
171                 ++dio->act;
172         return(dio);
173 }
174
175 /*
176  * If part of an asynchronous I/O the asynchronous I/O is biodone()'d.
177  *
178  * If the caller owned INPROG then the dio will be set GOOD or not
179  * depending on whether the caller disposed of dio->bp or not.
180  */
181 static
182 void
183 hammer2_io_complete(hammer2_io_t *dio, int owner)
184 {
185         int refs;
186         int good;
187
188         while (owner & HAMMER2_DIO_INPROG) {
189                 refs = dio->refs;
190                 cpu_ccfence();
191                 good = dio->bp ? HAMMER2_DIO_GOOD : 0;
192                 if (atomic_cmpset_int(&dio->refs, refs,
193                                       (refs & ~(HAMMER2_DIO_WAITING |
194                                                 HAMMER2_DIO_INPROG)) |
195                                       good)) {
196                         if (refs & HAMMER2_DIO_WAITING)
197                                 wakeup(dio);
198                         if (good)
199                                 BUF_KERNPROC(dio->bp);
200                         break;
201                 }
202                 /* retry */
203         }
204 }
205
206 /*
207  * Release our ref on *diop, dispose of the underlying buffer.
208  */
209 void
210 hammer2_io_putblk(hammer2_io_t **diop)
211 {
212         hammer2_mount_t *hmp;
213         hammer2_io_t *dio;
214         struct buf *bp;
215         off_t peof;
216         off_t pbase;
217         int psize;
218         int refs;
219
220         dio = *diop;
221         *diop = NULL;
222
223         for (;;) {
224                 refs = dio->refs;
225
226                 if ((refs & HAMMER2_DIO_MASK) == 1) {
227                         KKASSERT((refs & HAMMER2_DIO_INPROG) == 0);
228                         if (atomic_cmpset_int(&dio->refs, refs,
229                                               ((refs - 1) &
230                                                ~(HAMMER2_DIO_GOOD |
231                                                  HAMMER2_DIO_DIRTY)) |
232                                               HAMMER2_DIO_INPROG)) {
233                                 break;
234                         }
235                         /* retry */
236                 } else {
237                         if (atomic_cmpset_int(&dio->refs, refs, refs - 1))
238                                 return;
239                         /* retry */
240                 }
241                 /* retry */
242         }
243
244         /*
245          * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is
246          * legal only on the last ref).  This allows us to dispose of the
247          * buffer.  refs is now 0.
248          *
249          * The instant we call io_complete dio is a free agent again and
250          * can be ripped out from under us.  Acquisition of the dio after
251          * this point will require a shared or exclusive spinlock.
252          */
253         hmp = dio->hmp;
254         bp = dio->bp;
255         dio->bp = NULL;
256         pbase = dio->pbase;
257         psize = dio->psize;
258         atomic_add_int(&hmp->iofree_count, 1);
259         hammer2_io_complete(dio, HAMMER2_DIO_INPROG);   /* clears INPROG */
260         dio = NULL;     /* dio stale */
261
262         if (refs & HAMMER2_DIO_GOOD) {
263                 KKASSERT(bp != NULL);
264                 if (refs & HAMMER2_DIO_DIRTY) {
265                         if (hammer2_cluster_enable) {
266                                 peof = (pbase + HAMMER2_SEGMASK64) &
267                                        ~HAMMER2_SEGMASK64;
268                                 cluster_write(bp, peof, psize, 4);
269                         } else {
270                                 bp->b_flags |= B_CLUSTEROK;
271                                 bdwrite(bp);
272                         }
273                 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) {
274                         brelse(bp);
275                 } else {
276                         bqrelse(bp);
277                 }
278         }
279
280         /*
281          * We cache free buffers so re-use cases can use a shared lock, but
282          * if too many build up we have to clean them out.
283          */
284         if (hmp->iofree_count > 1000) {
285                 struct hammer2_cleanupcb_info info;
286
287                 RB_INIT(&info.tmptree);
288                 spin_lock(&hmp->io_spin);
289                 if (hmp->iofree_count > 1000) {
290                         info.count = hmp->iofree_count / 2;
291                         RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL,
292                                 hammer2_io_cleanup_callback, &info);
293                 }
294                 spin_unlock(&hmp->io_spin);
295                 hammer2_io_cleanup(hmp, &info.tmptree);
296         }
297 }
298
299 /*
300  * Cleanup any dio's with no references which are not in-progress.
301  */
302 static
303 int
304 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg)
305 {
306         struct hammer2_cleanupcb_info *info = arg;
307         hammer2_io_t *xio;
308
309         if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) {
310                 if (dio->act > 0) {
311                         --dio->act;
312                         return 0;
313                 }
314                 KKASSERT(dio->bp == NULL);
315                 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio);
316                 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio);
317                 KKASSERT(xio == NULL);
318                 if (--info->count <= 0) /* limit scan */
319                         return(-1);
320         }
321         return 0;
322 }
323
324 void
325 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree)
326 {
327         hammer2_io_t *dio;
328
329         while ((dio = RB_ROOT(tree)) != NULL) {
330                 RB_REMOVE(hammer2_io_tree, tree, dio);
331                 KKASSERT(dio->bp == NULL &&
332                     (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0);
333                 kfree(dio, M_HAMMER2);
334                 atomic_add_int(&hammer2_dio_count, -1);
335                 atomic_add_int(&hmp->iofree_count, -1);
336         }
337 }
338
339 char *
340 hammer2_io_data(hammer2_io_t *dio, off_t lbase)
341 {
342         struct buf *bp;
343         int off;
344
345         bp = dio->bp;
346         KKASSERT(bp != NULL);
347         off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset;
348         KKASSERT(off >= 0 && off < bp->b_bufsize);
349         return(bp->b_data + off);
350 }
351
352 static
353 int
354 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
355                 hammer2_io_t **diop, int dozero, int quick)
356 {
357         hammer2_io_t *dio;
358         int owner;
359         int error;
360
361         dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
362         if (owner) {
363                 if (lsize == dio->psize) {
364                         dio->bp = getblk(hmp->devvp,
365                                              dio->pbase, dio->psize,
366                                              (quick ? GETBLK_NOWAIT : 0),
367                                              0);
368                         if (dio->bp) {
369                                 vfs_bio_clrbuf(dio->bp);
370                                 if (quick) {
371                                         dio->bp->b_flags |= B_CACHE;
372                                         bqrelse(dio->bp);
373                                         dio->bp = NULL;
374                                 }
375                         }
376                         error = 0;
377                 } else if (quick) {
378                         /* do nothing */
379                         error = 0;
380                 } else {
381                         error = bread(hmp->devvp, dio->pbase,
382                                       dio->psize, &dio->bp);
383                 }
384                 if (error) {
385                         brelse(dio->bp);
386                         dio->bp = NULL;
387                 }
388                 hammer2_io_complete(dio, owner);
389         } else {
390                 error = 0;
391         }
392         if (dio->bp) {
393                 if (dozero)
394                         bzero(hammer2_io_data(dio, lbase), lsize);
395                 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
396         }
397         return error;
398 }
399
400 int
401 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize,
402                hammer2_io_t **diop)
403 {
404         return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0));
405 }
406
407 int
408 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize,
409                hammer2_io_t **diop)
410 {
411         return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0));
412 }
413
414 int
415 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize,
416                hammer2_io_t **diop)
417 {
418         return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1));
419 }
420
421 int
422 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize,
423                 hammer2_io_t **diop)
424 {
425         hammer2_io_t *dio;
426         off_t peof;
427         int owner;
428         int error;
429
430         dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner);
431         if (owner) {
432                 if (hammer2_cluster_enable) {
433                         peof = (dio->pbase + HAMMER2_SEGMASK64) &
434                                ~HAMMER2_SEGMASK64;
435                         error = cluster_read(hmp->devvp, peof, dio->pbase,
436                                              dio->psize,
437                                              dio->psize, HAMMER2_PBUFSIZE*4,
438                                              &dio->bp);
439                 } else {
440                         error = bread(hmp->devvp, dio->pbase,
441                                       dio->psize, &dio->bp);
442                 }
443                 if (error) {
444                         brelse(dio->bp);
445                         dio->bp = NULL;
446                 }
447                 hammer2_io_complete(dio, owner);
448         } else {
449                 error = 0;
450         }
451         return error;
452 }
453
454 void
455 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize,
456                   void (*callback)(hammer2_io_t *dio,
457                                    hammer2_cluster_t *arg_l,
458                                    hammer2_chain_t *arg_c,
459                                    void *arg_p, off_t arg_o),
460                   hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c,
461                   void *arg_p, off_t arg_o)
462 {
463         hammer2_io_t *dio;
464         int owner;
465         int error;
466
467         dio = hammer2_io_getblk(hmp, lbase, lsize, &owner);
468         if (owner) {
469                 dio->callback = callback;
470                 dio->arg_l = arg_l;
471                 dio->arg_c = arg_c;
472                 dio->arg_p = arg_p;
473                 dio->arg_o = arg_o;
474                 breadcb(hmp->devvp, dio->pbase, dio->psize,
475                         hammer2_io_callback, dio);
476         } else {
477                 error = 0;
478                 callback(dio, arg_l, arg_c, arg_p, arg_o);
479                 hammer2_io_bqrelse(&dio);
480         }
481 }
482
483 static void
484 hammer2_io_callback(struct bio *bio)
485 {
486         struct buf *dbp = bio->bio_buf;
487         hammer2_io_t *dio = bio->bio_caller_info1.ptr;
488
489         if ((bio->bio_flags & BIO_DONE) == 0)
490                 bpdone(dbp, 0);
491         bio->bio_flags &= ~(BIO_DONE | BIO_SYNC);
492         dio->bp = bio->bio_buf;
493         KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */
494         hammer2_io_complete(dio, HAMMER2_DIO_INPROG);
495
496         /*
497          * We still have the ref and DIO_GOOD is now set so nothing else
498          * should mess with the callback fields until we release the dio.
499          */
500         dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o);
501         hammer2_io_bqrelse(&dio);
502         /* TODO: async load meta-data and assign chain->dio */
503 }
504
505 void
506 hammer2_io_bawrite(hammer2_io_t **diop)
507 {
508         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
509         hammer2_io_putblk(diop);
510 }
511
512 void
513 hammer2_io_bdwrite(hammer2_io_t **diop)
514 {
515         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
516         hammer2_io_putblk(diop);
517 }
518
519 int
520 hammer2_io_bwrite(hammer2_io_t **diop)
521 {
522         atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY);
523         hammer2_io_putblk(diop);
524         return (0);     /* XXX */
525 }
526
527 void
528 hammer2_io_setdirty(hammer2_io_t *dio)
529 {
530         atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY);
531 }
532
533 void
534 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes)
535 {
536         if ((u_int)dio->psize == bytes)
537                 dio->bp->b_flags |= B_INVAL | B_RELBUF;
538 }
539
540 void
541 hammer2_io_brelse(hammer2_io_t **diop)
542 {
543         hammer2_io_putblk(diop);
544 }
545
546 void
547 hammer2_io_bqrelse(hammer2_io_t **diop)
548 {
549         hammer2_io_putblk(diop);
550 }
551
552 int
553 hammer2_io_isdirty(hammer2_io_t *dio)
554 {
555         return((dio->refs & HAMMER2_DIO_DIRTY) != 0);
556 }