Merge branch 'vendor/LIBARCHIVE'
[dragonfly.git] / sys / vfs / hammer2 / hammer2.h
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35
36 /*
37  * This header file contains structures used internally by the HAMMER2
38  * implementation.  See hammer2_disk.h for on-disk structures.
39  */
40
41 #ifndef _VFS_HAMMER2_HAMMER2_H_
42 #define _VFS_HAMMER2_HAMMER2_H_
43
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
47 #include <sys/conf.h>
48 #include <sys/systm.h>
49 #include <sys/tree.h>
50 #include <sys/malloc.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/mountctl.h>
55 #include <sys/priv.h>
56 #include <sys/stat.h>
57 #include <sys/thread.h>
58 #include <sys/globaldata.h>
59 #include <sys/lockf.h>
60 #include <sys/buf.h>
61 #include <sys/queue.h>
62 #include <sys/limits.h>
63 #include <sys/buf2.h>
64 #include <sys/signal2.h>
65 #include <sys/tree.h>
66 #include <sys/dmsg.h>
67
68 #include "hammer2_disk.h"
69 #include "hammer2_mount.h"
70 #include "hammer2_ioctl.h"
71 #include "hammer2_ccms.h"
72
73 struct hammer2_chain;
74 struct hammer2_inode;
75 struct hammer2_mount;
76 struct hammer2_pfsmount;
77 struct hammer2_span;
78 struct hammer2_state;
79 struct hammer2_msg;
80
81 /*
82  * The chain structure tracks blockref recursions all the way to
83  * the root volume.  These consist of indirect blocks, inodes,
84  * and eventually the volume header.
85  *
86  * The chain structure is embedded in the hammer2_mount, hammer2_inode,
87  * and other system memory structures.  The chain structure typically
88  * implements the reference count and busy flag for the larger structure.
89  *
90  * It is always possible to track a chain element all the way back to the
91  * root by following the (parent) links.  (index) is a type-dependent index
92  * in the parent indicating where in the parent the chain element resides.
93  *
94  * When a blockref is added or deleted the related chain element is marked
95  * modified and all of its parents are marked SUBMODIFIED (the parent
96  * recursion can stop once we hit a node that is already marked SUBMODIFIED).
97  * A deleted chain element must remain intact until synchronized against
98  * its parent.
99  *
100  * The blockref at (parent, index) is not adjusted until the modified chain
101  * element is flushed and unmarked.  Until then the child's blockref may
102  * not match the blockref at (parent, index).
103  */
104 RB_HEAD(hammer2_chain_tree, hammer2_chain);
105 TAILQ_HEAD(flush_deferral_list, hammer2_chain);
106
107 struct hammer2_chain {
108         ccms_cst_t      cst;                    /* attr or data cst */
109         struct hammer2_blockref bref;
110         struct hammer2_blockref bref_flush;     /* synchronized w/MOVED bit */
111         struct hammer2_chain    *parent;        /* return chain to root */
112         struct hammer2_chain_tree rbhead;
113         struct hammer2_state    *state;         /* if active cache msg */
114         RB_ENTRY(hammer2_chain) rbnode;
115         TAILQ_ENTRY(hammer2_chain) flush_node;  /* flush deferral list */
116
117         struct buf      *bp;            /* buffer cache (ro) */
118         hammer2_media_data_t *data;     /* modified copy of data (rw) */
119         u_int           bytes;          /* physical size of data */
120         int             index;          /* index in parent */
121         u_int           flushing;       /* element undergoing flush (count) */
122         u_int           refs;
123         u_int           flags;
124 };
125
126 typedef struct hammer2_chain hammer2_chain_t;
127
128 int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
129 RB_PROTOTYPE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp);
130
131 /*
132  * MOVED - This bit is set during the flush when the MODIFIED bit is cleared,
133  *         indicating that the parent's blocktable must inherit a change to
134  *         the bref (typically a block reallocation)
135  *
136  *         It must also be set in situations where a chain is not MODIFIED
137  *         but whos bref has changed (typically due to fields other than
138  *         a block reallocation).
139  */
140 #define HAMMER2_CHAIN_MODIFIED          0x00000001      /* active mods */
141 #define HAMMER2_CHAIN_UNUSED0002        0x00000002
142 #define HAMMER2_CHAIN_DIRTYBP           0x00000004      /* dirty on unlock */
143 #define HAMMER2_CHAIN_SUBMODIFIED       0x00000008      /* 1+ subs modified */
144 #define HAMMER2_CHAIN_DELETED           0x00000010      /* deleted chain */
145 #define HAMMER2_CHAIN_INITIAL           0x00000020      /* initial create */
146 #define HAMMER2_CHAIN_FLUSHED           0x00000040      /* flush on unlock */
147 #define HAMMER2_CHAIN_MOVED             0x00000080      /* bref changed */
148 #define HAMMER2_CHAIN_IOFLUSH           0x00000100      /* bawrite on put */
149 #define HAMMER2_CHAIN_DEFERRED          0x00000200      /* on a deferral list*/
150 #define HAMMER2_CHAIN_DESTROYED         0x00000400      /* destroying inode */
151 #define HAMMER2_CHAIN_MODIFIED_AUX      0x00000800      /* hmp->vchain only */
152 #define HAMMER2_CHAIN_MODIFY_TID        0x00001000      /* mod updates field */
153 #define HAMMER2_CHAIN_MOUNTED           0x00002000      /* PFS is mounted */
154 #define HAMMER2_CHAIN_ONRBTREE          0x00004000      /* on parent RB tree */
155
156 /*
157  * Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
158  */
159 #define HAMMER2_LOOKUP_NOLOCK           0x00000001      /* ref only */
160 #define HAMMER2_LOOKUP_NODATA           0x00000002      /* data left NULL */
161 #define HAMMER2_LOOKUP_SHARED           0x00000100
162
163 /*
164  * Flags passed to hammer2_chain_modify() and hammer2_chain_resize()
165  *
166  * NOTE: OPTDATA allows us to avoid instantiating buffers for INDIRECT
167  *       blocks in the INITIAL-create state.
168  *
169  * NOTE: NO_MODIFY_TID tells the function to not set HAMMER2_CHAIN_MODIFY_TID
170  *       when marking the chain modified (used when a sub-chain modification
171  *       propagates upward).
172  */
173 #define HAMMER2_MODIFY_NOSUB            0x00000001      /* do not set SUBMOD */
174 #define HAMMER2_MODIFY_OPTDATA          0x00000002      /* data can be NULL */
175 #define HAMMER2_MODIFY_NO_MODIFY_TID    0x00000004
176
177 /*
178  * Flags passed to hammer2_chain_lock()
179  */
180 #define HAMMER2_RESOLVE_NEVER           1
181 #define HAMMER2_RESOLVE_MAYBE           2
182 #define HAMMER2_RESOLVE_ALWAYS          3
183 #define HAMMER2_RESOLVE_MASK            0x0F
184
185 #define HAMMER2_RESOLVE_SHARED          0x10
186
187 /*
188  * Cluster different types of storage together for allocations
189  */
190 #define HAMMER2_FREECACHE_INODE         0
191 #define HAMMER2_FREECACHE_INDIR         1
192 #define HAMMER2_FREECACHE_DATA          2
193 #define HAMMER2_FREECACHE_UNUSED3       3
194 #define HAMMER2_FREECACHE_TYPES         4
195
196 /*
197  * BMAP read-ahead maximum parameters
198  */
199 #define HAMMER2_BMAP_COUNT              16      /* max bmap read-ahead */
200 #define HAMMER2_BMAP_BYTES              (HAMMER2_PBUFSIZE * HAMMER2_BMAP_COUNT)
201
202 /*
203  * Misc
204  */
205 #define HAMMER2_FLUSH_DEPTH_LIMIT       40      /* stack recursion limit */
206
207 /*
208  * HAMMER2 IN-MEMORY CACHE OF MEDIA STRUCTURES
209  *
210  * There is an in-memory representation of all on-media data structure.
211  *
212  * When accessed read-only the data will be mapped to the related buffer
213  * cache buffer.
214  *
215  * When accessed read-write (marked modified) a kmalloc()'d copy of the
216  * is created which can then be modified.  The copy is destroyed when a
217  * filesystem block is allocated to replace it.
218  *
219  * Active inodes (those with vnodes attached) will maintain the kmalloc()'d
220  * copy for both the read-only and the read-write case.  The combination of
221  * (bp) and (data) determines whether (data) was allocated or not.
222  *
223  * The in-memory representation may remain cached (for example in order to
224  * placemark clustering locks) even after the related data has been
225  * detached.
226  */
227
228 /*
229  * A hammer2 inode.
230  *
231  * NOTE: The inode's attribute CST which is also used to lock the inode
232  *       is embedded in the chain (chain.cst) and aliased w/ attr_cst.
233  */
234 struct hammer2_inode {
235         ccms_cst_t              topo_cst;       /* directory topology cst */
236         struct hammer2_mount    *hmp;           /* Global mount */
237         struct hammer2_pfsmount *pmp;           /* PFS mount */
238         struct hammer2_inode    *pip;           /* parent inode */
239         struct vnode            *vp;
240         hammer2_chain_t         *chain;
241         struct lockf            advlock;
242         u_int                   flags;
243         u_int                   refs;           /* +vpref, +flushref */
244 };
245
246 typedef struct hammer2_inode hammer2_inode_t;
247
248 #define HAMMER2_INODE_MODIFIED          0x0001
249 #define HAMMER2_INODE_DIRTYEMBED        0x0002
250 #define HAMMER2_INODE_RENAME_INPROG     0x0004
251
252 /*
253  * XXX
254  */
255 struct hammer2_freecache {
256         hammer2_off_t   bulk;
257         hammer2_off_t   single;
258 };
259
260 typedef struct hammer2_freecache hammer2_freecache_t;
261
262 /*
263  * Global (per device) mount structure for device (aka vp->v_mount->hmp)
264  */
265 struct hammer2_mount {
266         struct vnode    *devvp;         /* device vnode */
267         int             ronly;          /* read-only mount */
268         int             pmp_count;      /* PFS mounts backed by us */
269         TAILQ_ENTRY(hammer2_mount) mntentry; /* hammer2_mntlist */
270
271         struct malloc_type *minode;
272         int             ninodes;
273         int             maxinodes;
274
275         struct malloc_type *mchain;
276         int             nipstacks;
277         int             maxipstacks;
278         hammer2_chain_t vchain;         /* anchor chain */
279         hammer2_chain_t *schain;        /* super-root */
280         hammer2_inode_t *sroot;         /* super-root inode */
281         struct lock     alloclk;        /* lockmgr lock */
282         struct lock     voldatalk;      /* lockmgr lock */
283
284         int             volhdrno;       /* last volhdrno written */
285         hammer2_volume_data_t voldata;
286         hammer2_volume_data_t volsync;  /* synchronized voldata */
287         hammer2_freecache_t freecache[HAMMER2_FREECACHE_TYPES]
288                                      [HAMMER2_MAX_RADIX+1];
289 };
290
291 typedef struct hammer2_mount hammer2_mount_t;
292
293 /*
294  * Per-PFS mount structure for device (aka vp->v_mount)
295  */
296 struct hammer2_pfsmount {
297         struct mount            *mp;            /* kernel mount */
298         struct hammer2_mount    *hmp;           /* device global mount */
299         hammer2_chain_t         *rchain;        /* PFS root chain */
300         hammer2_inode_t         *iroot;         /* PFS root inode */
301         hammer2_off_t           inode_count;    /* copy of inode_count */
302         ccms_domain_t           ccms_dom;
303         struct netexport        export;         /* nfs export */
304         int                     ronly;          /* read-only mount */
305         struct malloc_type      *mmsg;
306         kdmsg_iocom_t           iocom;
307 };
308
309 typedef struct hammer2_pfsmount hammer2_pfsmount_t;
310
311 #if defined(_KERNEL)
312
313 MALLOC_DECLARE(M_HAMMER2);
314
315 #define VTOI(vp)        ((hammer2_inode_t *)(vp)->v_data)
316 #define ITOV(ip)        ((ip)->vp)
317
318 static __inline
319 hammer2_pfsmount_t *
320 MPTOPMP(struct mount *mp)
321 {
322         return ((hammer2_pfsmount_t *)mp->mnt_data);
323 }
324
325 static __inline
326 hammer2_mount_t *
327 MPTOHMP(struct mount *mp)
328 {
329         return (((hammer2_pfsmount_t *)mp->mnt_data)->hmp);
330 }
331
332 extern struct vop_ops hammer2_vnode_vops;
333 extern struct vop_ops hammer2_spec_vops;
334 extern struct vop_ops hammer2_fifo_vops;
335
336 extern int hammer2_debug;
337 extern int hammer2_cluster_enable;
338 extern int hammer2_hardlink_enable;
339 extern long hammer2_iod_file_read;
340 extern long hammer2_iod_meta_read;
341 extern long hammer2_iod_indr_read;
342 extern long hammer2_iod_file_write;
343 extern long hammer2_iod_meta_write;
344 extern long hammer2_iod_indr_write;
345 extern long hammer2_iod_fmap_write;
346 extern long hammer2_iod_volu_write;
347 extern long hammer2_ioa_file_read;
348 extern long hammer2_ioa_meta_read;
349 extern long hammer2_ioa_indr_read;
350 extern long hammer2_ioa_file_write;
351 extern long hammer2_ioa_meta_write;
352 extern long hammer2_ioa_indr_write;
353 extern long hammer2_ioa_fmap_write;
354 extern long hammer2_ioa_volu_write;
355
356 /*
357  * hammer2_subr.c
358  */
359 #define hammer2_icrc32(buf, size)       iscsi_crc32((buf), (size))
360 #define hammer2_icrc32c(buf, size, crc) iscsi_crc32_ext((buf), (size), (crc))
361
362 hammer2_chain_t *hammer2_inode_lock_ex(hammer2_inode_t *ip);
363 hammer2_chain_t *hammer2_inode_lock_sh(hammer2_inode_t *ip);
364 void hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_chain_t *chain);
365 void hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_chain_t *chain);
366 void hammer2_voldata_lock(hammer2_mount_t *hmp);
367 void hammer2_voldata_unlock(hammer2_mount_t *hmp);
368 ccms_state_t hammer2_inode_lock_temp_release(hammer2_inode_t *ip);
369 ccms_state_t hammer2_inode_lock_upgrade(hammer2_inode_t *ip);
370 void hammer2_inode_lock_restore(hammer2_inode_t *ip, ccms_state_t ostate);
371
372 void hammer2_mount_exlock(hammer2_mount_t *hmp);
373 void hammer2_mount_shlock(hammer2_mount_t *hmp);
374 void hammer2_mount_unlock(hammer2_mount_t *hmp);
375
376 int hammer2_get_dtype(hammer2_chain_t *chain);
377 int hammer2_get_vtype(hammer2_chain_t *chain);
378 u_int8_t hammer2_get_obj_type(enum vtype vtype);
379 void hammer2_time_to_timespec(u_int64_t xtime, struct timespec *ts);
380 u_int64_t hammer2_timespec_to_time(struct timespec *ts);
381 u_int32_t hammer2_to_unix_xid(uuid_t *uuid);
382 void hammer2_guid_to_uuid(uuid_t *uuid, u_int32_t guid);
383
384 hammer2_key_t hammer2_dirhash(const unsigned char *name, size_t len);
385 int hammer2_allocsize(size_t bytes);
386
387 int hammer2_calc_logical(hammer2_inode_t *ip, hammer2_off_t uoff,
388                          hammer2_key_t *lbasep, hammer2_key_t *leofp);
389 void hammer2_update_time(uint64_t *timep);
390
391 /*
392  * hammer2_inode.c
393  */
394 struct vnode *hammer2_igetv(hammer2_inode_t *ip, int *errorp);
395
396 void hammer2_inode_lock_nlinks(hammer2_inode_t *ip);
397 void hammer2_inode_unlock_nlinks(hammer2_inode_t *ip);
398 hammer2_inode_t *hammer2_inode_get(hammer2_mount_t *hmp,
399                         hammer2_pfsmount_t *pmp, hammer2_inode_t *dip,
400                         hammer2_chain_t *chain);
401 void hammer2_inode_put(hammer2_inode_t *ip, hammer2_chain_t *passed_chain);
402 void hammer2_inode_free(hammer2_inode_t *ip);
403 void hammer2_inode_ref(hammer2_inode_t *ip);
404 void hammer2_inode_drop(hammer2_inode_t *ip);
405 int hammer2_inode_calc_alloc(hammer2_key_t filesize);
406
407 int hammer2_inode_create(hammer2_inode_t *dip,
408                         struct vattr *vap, struct ucred *cred,
409                         const uint8_t *name, size_t name_len,
410                         hammer2_inode_t **nipp, hammer2_chain_t **nchainp);
411
412 int hammer2_inode_duplicate(hammer2_inode_t *dip,
413                         hammer2_chain_t *ochain, hammer2_chain_t **nchainp);
414 int hammer2_inode_connect(hammer2_inode_t *dip, hammer2_chain_t **chainp,
415                         const uint8_t *name, size_t name_len);
416 hammer2_inode_t *hammer2_inode_common_parent(hammer2_mount_t *hmp,
417                         hammer2_inode_t *fdip, hammer2_inode_t *tdip);
418
419 int hammer2_unlink_file(hammer2_inode_t *dip,
420                         const uint8_t *name, size_t name_len,
421                         int isdir, hammer2_chain_t *retain_chain);
422 int hammer2_hardlink_consolidate(hammer2_inode_t *ip, hammer2_chain_t **chainp,
423                         hammer2_inode_t *tdip, int linkcnt);
424 int hammer2_hardlink_deconsolidate(hammer2_inode_t *dip,
425                         hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
426 int hammer2_hardlink_find(hammer2_inode_t *dip,
427                         hammer2_chain_t **chainp, hammer2_chain_t **ochainp);
428
429 /*
430  * hammer2_chain.c
431  */
432 void hammer2_modify_volume(hammer2_mount_t *hmp);
433 hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
434                                 hammer2_blockref_t *bref);
435 void hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain);
436 void hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain);
437 void hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain);
438 int hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain, int how);
439 void hammer2_chain_moved(hammer2_mount_t *hmp, hammer2_chain_t *chain);
440 void hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain,
441                                 int flags);
442 void hammer2_chain_resize(hammer2_inode_t *ip, hammer2_chain_t *chain,
443                                 int nradix, int flags);
444 void hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain);
445 void hammer2_chain_wait(hammer2_mount_t *hmp, hammer2_chain_t *chain);
446 hammer2_chain_t *hammer2_chain_find(hammer2_mount_t *hmp,
447                                 hammer2_chain_t *parent, int index);
448 hammer2_chain_t *hammer2_chain_get(hammer2_mount_t *hmp,
449                                 hammer2_chain_t *parent,
450                                 int index, int flags);
451 hammer2_chain_t *hammer2_chain_lookup(hammer2_mount_t *hmp,
452                                 hammer2_chain_t **parentp,
453                                 hammer2_key_t key_beg, hammer2_key_t key_end,
454                                 int flags);
455 hammer2_chain_t *hammer2_chain_next(hammer2_mount_t *hmp,
456                                 hammer2_chain_t **parentp,
457                                 hammer2_chain_t *chain,
458                                 hammer2_key_t key_beg, hammer2_key_t key_end,
459                                 int flags);
460 hammer2_chain_t *hammer2_chain_create(hammer2_mount_t *hmp,
461                                 hammer2_chain_t *parent,
462                                 hammer2_chain_t *chain,
463                                 hammer2_key_t key, int keybits,
464                                 int type, size_t bytes,
465                                 int *errorp);
466 void hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent,
467                                 hammer2_chain_t *chain, int retain);
468 void hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
469                                 hammer2_tid_t modify_tid);
470 void hammer2_chain_commit(hammer2_mount_t *hmp, hammer2_chain_t *chain);
471 void hammer2_chain_parent_setsubmod(hammer2_mount_t *hmp,
472                                 hammer2_chain_t *chain);
473
474 /*
475  * hammer2_ioctl.c
476  */
477 int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
478                                 int fflag, struct ucred *cred);
479
480 /*
481  * hammer2_msgops.c
482  */
483 int hammer2_msg_dbg_rcvmsg(kdmsg_msg_t *msg);
484 int hammer2_msg_adhoc_input(kdmsg_msg_t *msg);
485
486 /*
487  * hammer2_vfsops.c
488  */
489 void hammer2_clusterctl_wakeup(kdmsg_iocom_t *iocom);
490 void hammer2_volconf_update(hammer2_pfsmount_t *pmp, int index);
491 void hammer2_cluster_reconnect(hammer2_pfsmount_t *pmp, struct file *fp);
492
493 /*
494  * hammer2_freemap.c
495  */
496 hammer2_off_t hammer2_freemap_alloc(hammer2_mount_t *hmp,
497                                 int type, size_t bytes);
498 void hammer2_freemap_free(hammer2_mount_t *hmp, hammer2_off_t data_off,
499                                 int type);
500
501 #endif /* !_KERNEL */
502 #endif /* !_VFS_HAMMER2_HAMMER2_H_ */