use rb-tree for directory lookups
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vfsops.c
CommitLineData
7a2de9a4
MD
1/* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */
2
3/*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Efficient memory file system.
35 *
36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37 * (the well-known UVM) to store file data and metadata in an efficient
38 * way. This means that it does not follow the structure of an on-disk
39 * file system because it simply does not need to. Instead, it uses
40 * memory-specific data structures and algorithms to automatically
41 * allocate and release resources.
42 */
651eeb07 43
7a2de9a4
MD
44#include <sys/conf.h>
45#include <sys/param.h>
46#include <sys/limits.h>
47#include <sys/lock.h>
48#include <sys/mutex.h>
49#include <sys/kernel.h>
50#include <sys/stat.h>
51#include <sys/systm.h>
52#include <sys/sysctl.h>
53#include <sys/objcache.h>
54
55#include <vm/vm.h>
56#include <vm/vm_object.h>
57#include <vm/vm_param.h>
58
59#include <vfs/tmpfs/tmpfs.h>
60#include <vfs/tmpfs/tmpfs_vnops.h>
29ffeb28 61#include <vfs/tmpfs/tmpfs_args.h>
7a2de9a4
MD
62
63/*
64 * Default permission for root node
65 */
66#define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67
68MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
7a2de9a4
MD
69
70/* --------------------------------------------------------------------- */
71
72static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
73static int tmpfs_unmount(struct mount *, int);
74static int tmpfs_root(struct mount *, struct vnode **);
75static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
76static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
77
78/* --------------------------------------------------------------------- */
0786baf1 79int
7a2de9a4
MD
80tmpfs_node_ctor(void *obj, void *privdata, int flags)
81{
82 struct tmpfs_node *node = (struct tmpfs_node *)obj;
83
84 node->tn_gen++;
85 node->tn_size = 0;
86 node->tn_status = 0;
87 node->tn_flags = 0;
88 node->tn_links = 0;
89 node->tn_vnode = NULL;
90 node->tn_vpstate = TMPFS_VNODE_WANT;
0786baf1 91 bzero(&node->tn_spec, sizeof(node->tn_spec));
7a2de9a4
MD
92
93 return (1);
94}
95
96static void
97tmpfs_node_dtor(void *obj, void *privdata)
98{
99 struct tmpfs_node *node = (struct tmpfs_node *)obj;
100 node->tn_type = VNON;
101 node->tn_vpstate = TMPFS_VNODE_DOOMED;
102}
103
104static void*
105tmpfs_node_init(void *args, int flags)
106{
107 struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags);
881dac8b
VS
108 if (node == NULL)
109 return (NULL);
7a2de9a4
MD
110 node->tn_id = 0;
111
112 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
113 node->tn_gen = karc4random();
114
115 return node;
116}
117
118static void
119tmpfs_node_fini(void *obj, void *args)
120{
121 struct tmpfs_node *node = (struct tmpfs_node *)obj;
122 lockuninit(&node->tn_interlock);
123 objcache_malloc_free(obj, args);
124}
125
7a2de9a4
MD
126static int
127tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
128{
129 struct tmpfs_mount *tmp;
130 struct tmpfs_node *root;
29ffeb28
MD
131 struct tmpfs_args args;
132 vm_pindex_t pages;
133 vm_pindex_t pages_limit;
7a2de9a4 134 ino_t nodes;
817a2fd9 135 u_int64_t maxfsize;
7a2de9a4
MD
136 int error;
137 /* Size counters. */
29ffeb28
MD
138 ino_t nodes_max;
139 off_t size_max;
817a2fd9 140 size_t maxfsize_max;
29ffeb28 141 size_t size;
7a2de9a4
MD
142
143 /* Root node attributes. */
144 uid_t root_uid = cred->cr_uid;
145 gid_t root_gid = cred->cr_gid;
146 mode_t root_mode = (VREAD | VWRITE);
147
148 if (mp->mnt_flag & MNT_UPDATE) {
149 /* XXX: There is no support yet to update file system
150 * settings. Should be added. */
151
152 return EOPNOTSUPP;
153 }
154
29ffeb28
MD
155 /*
156 * mount info
157 */
158 bzero(&args, sizeof(args));
159 size_max = 0;
160 nodes_max = 0;
817a2fd9 161 maxfsize_max = 0;
29ffeb28
MD
162
163 if (path) {
164 if (data) {
165 error = copyin(data, &args, sizeof(args));
166 if (error)
167 return (error);
168 }
169 size_max = args.ta_size_max;
170 nodes_max = args.ta_nodes_max;
817a2fd9 171 maxfsize_max = args.ta_maxfsize_max;
1b5c5deb
MD
172 root_uid = args.ta_root_uid;
173 root_gid = args.ta_root_gid;
174 root_mode = args.ta_root_mode;
29ffeb28 175 }
7a2de9a4
MD
176
177 /*
178 * If mount by non-root, then verify that user has necessary
179 * permissions on the device.
180 */
181 if (cred->cr_uid != 0) {
182 root_mode = VREAD;
183 if ((mp->mnt_flag & MNT_RDONLY) == 0)
184 root_mode |= VWRITE;
185 }
186
29ffeb28
MD
187 pages_limit = vm_swap_max + vmstats.v_page_count / 2;
188
2a3a6ffd 189 if (size_max == 0) {
29ffeb28 190 pages = pages_limit / 2;
2a3a6ffd 191 } else if (size_max < PAGE_SIZE) {
29ffeb28 192 pages = 1;
2a3a6ffd
MD
193 } else if (OFF_TO_IDX(size_max) > pages_limit) {
194 /*
195 * do not force pages = pages_limit for this case, otherwise
196 * we might not honor tmpfs size requests from /etc/fstab
197 * during boot because they are mounted prior to swap being
198 * turned on.
199 */
200 pages = OFF_TO_IDX(size_max);
201 } else {
29ffeb28 202 pages = OFF_TO_IDX(size_max);
2a3a6ffd 203 }
7a2de9a4 204
29ffeb28 205 if (nodes_max == 0)
7a2de9a4 206 nodes = 3 + pages * PAGE_SIZE / 1024;
29ffeb28
MD
207 else if (nodes_max < 3)
208 nodes = 3;
209 else if (nodes_max > pages)
210 nodes = pages;
7a2de9a4
MD
211 else
212 nodes = nodes_max;
7a2de9a4 213
817a2fd9
MD
214 maxfsize = IDX_TO_OFF(pages_limit);
215 if (maxfsize_max != 0 && maxfsize > maxfsize_max)
216 maxfsize = maxfsize_max;
217
7a2de9a4 218 /* Allocate the tmpfs mount structure and fill it. */
29ffeb28 219 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
7a2de9a4
MD
220
221 lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE);
222 tmp->tm_nodes_max = nodes;
223 tmp->tm_nodes_inuse = 0;
817a2fd9 224 tmp->tm_maxfilesize = maxfsize;
7a2de9a4
MD
225 LIST_INIT(&tmp->tm_nodes_used);
226
227 tmp->tm_pages_max = pages;
228 tmp->tm_pages_used = 0;
29ffeb28 229
dcaa8a41 230 kmalloc_create(&tmp->tm_node_zone, "tmpfs node");
8e771504 231 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent");
d00cd01c 232 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
dcaa8a41
VS
233
234 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) *
c01f27eb
VS
235 tmp->tm_nodes_max);
236
dcaa8a41
VS
237 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node);
238 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone;
239
8e771504
VS
240 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent);
241 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone;
242
7a2de9a4
MD
243 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache",
244 0, 0,
245 NULL, NULL, NULL,
246 objcache_malloc_alloc, objcache_malloc_free,
8e771504 247 &tmp->tm_dirent_zone_malloc_args);
7a2de9a4
MD
248 tmp->tm_node_pool = objcache_create( "tmpfs node cache",
249 0, 0,
250 tmpfs_node_ctor, tmpfs_node_dtor, NULL,
251 tmpfs_node_init, tmpfs_node_fini,
dcaa8a41 252 &tmp->tm_node_zone_malloc_args);
7a2de9a4 253
f7db522f
VS
254 tmp->tm_ino = 2;
255
7a2de9a4 256 /* Allocate the root node. */
d4623db3
MD
257 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
258 root_mode & ALLPERMS, NULL, NULL,
259 VNOVAL, VNOVAL, &root);
260
261 /*
262 * We are backed by swap, set snocache chflags flag so we
263 * don't trip over swapcache.
264 */
265 root->tn_flags = SF_NOCACHE;
7a2de9a4
MD
266
267 if (error != 0 || root == NULL) {
268 objcache_destroy(tmp->tm_node_pool);
269 objcache_destroy(tmp->tm_dirent_pool);
270 kfree(tmp, M_TMPFSMNT);
271 return error;
272 }
273 KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id));
274 tmp->tm_root = root;
275
276 mp->mnt_flag |= MNT_LOCAL;
9fc94b5f 277#if 0
7a2de9a4
MD
278 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE |
279 MNTK_IN_MPSAFE | MNTK_SG_MPSAFE;
9fc94b5f 280#endif
e575e508 281 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_SG_MPSAFE;
1be4932c 282 mp->mnt_kern_flag |= MNTK_WR_MPSAFE;
f96f2f39 283 mp->mnt_kern_flag |= MNTK_NOMSYNC;
7a2de9a4
MD
284 mp->mnt_data = (qaddr_t)tmp;
285 vfs_getnewfsid(mp);
286
287
288 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
289 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
290
291 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
292 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
293 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
294 copyinstr(path, mp->mnt_stat.f_mntonname,
295 sizeof(mp->mnt_stat.f_mntonname) -1,
296 &size);
297
298 tmpfs_statfs(mp, &mp->mnt_stat, cred);
299
300 return 0;
301}
302
303/* --------------------------------------------------------------------- */
304
305/* ARGSUSED2 */
306static int
307tmpfs_unmount(struct mount *mp, int mntflags)
308{
309 int error;
310 int flags = 0;
0786baf1 311 int found;
7a2de9a4
MD
312 struct tmpfs_mount *tmp;
313 struct tmpfs_node *node;
7a2de9a4
MD
314
315 /* Handle forced unmounts. */
316 if (mntflags & MNT_FORCE)
317 flags |= FORCECLOSE;
318
9fc94b5f 319 tmp = VFS_TO_TMPFS(mp);
9fc94b5f 320
d4623db3
MD
321 /*
322 * Finalize all pending I/O. In the case of tmpfs we want
323 * to throw all the data away so clean out the buffer cache
324 * and vm objects before calling vflush().
325 */
326 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
327 if (node->tn_type == VREG && node->tn_vnode) {
328 ++node->tn_links;
329 TMPFS_NODE_LOCK(node);
330 vx_get(node->tn_vnode);
331 tmpfs_truncate(node->tn_vnode, 0);
332 vx_put(node->tn_vnode);
333 TMPFS_NODE_UNLOCK(node);
334 --node->tn_links;
335 }
336 }
7a2de9a4
MD
337 error = vflush(mp, 0, flags);
338 if (error != 0)
339 return error;
340
0786baf1
MD
341 /*
342 * First pass get rid of all the directory entries and
343 * vnode associations. The directory structure will
344 * remain via the extra link count representing tn_dir.tn_parent.
345 *
346 * No vnodes should remain after the vflush above.
347 */
348 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
349 ++node->tn_links;
350 TMPFS_NODE_LOCK(node);
7a2de9a4
MD
351 if (node->tn_type == VDIR) {
352 struct tmpfs_dirent *de;
353
29ca4fd6
JH
354 while (!RB_EMPTY(&node->tn_dir.tn_dirtree)) {
355 de = RB_FIRST(tmpfs_dirtree, &node->tn_dir.tn_dirtree);
22d3b394 356 tmpfs_dir_detach(node, de);
0786baf1 357 tmpfs_free_dirent(tmp, de);
7a2de9a4
MD
358 node->tn_size -= sizeof(struct tmpfs_dirent);
359 }
360 }
0786baf1
MD
361 KKASSERT(node->tn_vnode == NULL);
362#if 0
7a2de9a4
MD
363 vp = node->tn_vnode;
364 if (vp != NULL) {
365 tmpfs_free_vp(vp);
366 vrecycle(vp);
0786baf1 367 node->tn_vnode = NULL;
7a2de9a4 368 }
0786baf1
MD
369#endif
370 TMPFS_NODE_UNLOCK(node);
371 --node->tn_links;
7a2de9a4
MD
372 }
373
0786baf1
MD
374 /*
375 * Now get rid of all nodes. We can remove any node with a
376 * link count of 0 or any directory node with a link count of
377 * 1. The parents will not be destroyed until all their children
378 * have been destroyed.
379 *
380 * Recursion in tmpfs_free_node() can further modify the list so
381 * we cannot use a next pointer here.
382 *
383 * The root node will be destroyed by this loop (it will be last).
384 */
385 while (!LIST_EMPTY(&tmp->tm_nodes_used)) {
386 found = 0;
387 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
388 if (node->tn_links == 0 ||
389 (node->tn_links == 1 && node->tn_type == VDIR)) {
390 TMPFS_NODE_LOCK(node);
391 tmpfs_free_node(tmp, node);
392 /* eats lock */
393 found = 1;
394 break;
395 }
396 }
397 if (found == 0) {
398 kprintf("tmpfs: Cannot free entire node tree!");
399 break;
400 }
401 }
402
403 KKASSERT(tmp->tm_root == NULL);
404
7a2de9a4
MD
405 objcache_destroy(tmp->tm_dirent_pool);
406 objcache_destroy(tmp->tm_node_pool);
407
d00cd01c 408 kmalloc_destroy(&tmp->tm_name_zone);
8e771504 409 kmalloc_destroy(&tmp->tm_dirent_zone);
dcaa8a41
VS
410 kmalloc_destroy(&tmp->tm_node_zone);
411
8e771504
VS
412 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL;
413
7a2de9a4
MD
414 lockuninit(&tmp->allnode_lock);
415 KKASSERT(tmp->tm_pages_used == 0);
416 KKASSERT(tmp->tm_nodes_inuse == 0);
417
418 /* Throw away the tmpfs_mount structure. */
0786baf1 419 kfree(tmp, M_TMPFSMNT);
7a2de9a4
MD
420 mp->mnt_data = NULL;
421
422 mp->mnt_flag &= ~MNT_LOCAL;
423 return 0;
424}
425
426/* --------------------------------------------------------------------- */
427
428static int
429tmpfs_root(struct mount *mp, struct vnode **vpp)
430{
0786baf1 431 struct tmpfs_mount *tmp;
7a2de9a4 432 int error;
7a2de9a4 433
0786baf1
MD
434 tmp = VFS_TO_TMPFS(mp);
435 if (tmp->tm_root == NULL) {
436 kprintf("tmpfs_root: called without root node %p\n", mp);
7ce2998e 437 print_backtrace(-1);
0786baf1
MD
438 *vpp = NULL;
439 error = EINVAL;
440 } else {
441 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp);
442 (*vpp)->v_flag |= VROOT;
443 (*vpp)->v_type = VDIR;
444 }
7a2de9a4
MD
445 return error;
446}
447
448/* --------------------------------------------------------------------- */
449
450static int
451tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
452{
453 boolean_t found;
454 struct tmpfs_fid *tfhp;
455 struct tmpfs_mount *tmp;
456 struct tmpfs_node *node;
457
458 tmp = VFS_TO_TMPFS(mp);
459
460 tfhp = (struct tmpfs_fid *)fhp;
461 if (tfhp->tf_len != sizeof(struct tmpfs_fid))
462 return EINVAL;
463
464 if (tfhp->tf_id >= tmp->tm_nodes_max)
465 return EINVAL;
466
467 found = FALSE;
468
469 TMPFS_LOCK(tmp);
470 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
471 if (node->tn_id == tfhp->tf_id &&
472 node->tn_gen == tfhp->tf_gen) {
473 found = TRUE;
474 break;
475 }
476 }
477 TMPFS_UNLOCK(tmp);
478
479 if (found)
480 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp));
481
482 return (EINVAL);
483}
484
485/* --------------------------------------------------------------------- */
486
487/* ARGSUSED2 */
488static int
489tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
490{
491 fsfilcnt_t freenodes;
492 struct tmpfs_mount *tmp;
493
494 tmp = VFS_TO_TMPFS(mp);
495
496 sbp->f_iosize = PAGE_SIZE;
497 sbp->f_bsize = PAGE_SIZE;
498
29ffeb28
MD
499 sbp->f_blocks = tmp->tm_pages_max;
500 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
501 sbp->f_bfree = sbp->f_bavail;
7a2de9a4 502
29ffeb28 503 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
7a2de9a4
MD
504
505 sbp->f_files = freenodes + tmp->tm_nodes_inuse;
506 sbp->f_ffree = freenodes;
817a2fd9 507 sbp->f_owner = tmp->tm_root->tn_uid;
7a2de9a4
MD
508
509 return 0;
510}
511
190c11cc
SZ
512/* --------------------------------------------------------------------- */
513
514static int
515tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
516{
517 struct tmpfs_node *node;
518 struct tmpfs_fid tfh;
519 node = VP_TO_TMPFS_NODE(vp);
520 memset(&tfh, 0, sizeof(tfh));
521 tfh.tf_len = sizeof(struct tmpfs_fid);
522 tfh.tf_gen = node->tn_gen;
523 tfh.tf_id = node->tn_id;
524 memcpy(fhp, &tfh, sizeof(tfh));
525 return (0);
526}
527
7a2de9a4
MD
528/* --------------------------------------------------------------------- */
529
66fa44e7
VS
530static int
531tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
532 struct ucred **credanonp)
533{
534 struct tmpfs_mount *tmp;
535 struct netcred *nc;
536
537 tmp = (struct tmpfs_mount *) mp->mnt_data;
538 nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
539 if (nc == NULL)
540 return (EACCES);
541
542 *exflagsp = nc->netc_exflags;
543 *credanonp = &nc->netc_anon;
544
545 return (0);
546}
547
548/* --------------------------------------------------------------------- */
549
7a2de9a4
MD
550/*
551 * tmpfs vfs operations.
552 */
553
554static struct vfsops tmpfs_vfsops = {
555 .vfs_mount = tmpfs_mount,
556 .vfs_unmount = tmpfs_unmount,
557 .vfs_root = tmpfs_root,
558 .vfs_statfs = tmpfs_statfs,
559 .vfs_fhtovp = tmpfs_fhtovp,
190c11cc 560 .vfs_vptofh = tmpfs_vptofh,
66fa44e7
VS
561 .vfs_sync = vfs_stdsync,
562 .vfs_checkexp = tmpfs_checkexp,
7a2de9a4
MD
563};
564
565VFS_SET(tmpfs_vfsops, tmpfs, 0);
e5e63c20 566MODULE_VERSION(tmpfs, 1);