tmpfs - Fix numerous races and adjust to use cache_unlink()
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vfsops.c
CommitLineData
7a2de9a4
MD
1/* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */
2
3/*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Efficient memory file system.
35 *
36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37 * (the well-known UVM) to store file data and metadata in an efficient
38 * way. This means that it does not follow the structure of an on-disk
39 * file system because it simply does not need to. Instead, it uses
40 * memory-specific data structures and algorithms to automatically
41 * allocate and release resources.
42 */
651eeb07 43
7a2de9a4
MD
44#include <sys/conf.h>
45#include <sys/param.h>
46#include <sys/limits.h>
47#include <sys/lock.h>
48#include <sys/mutex.h>
49#include <sys/kernel.h>
50#include <sys/stat.h>
51#include <sys/systm.h>
52#include <sys/sysctl.h>
53#include <sys/objcache.h>
54
55#include <vm/vm.h>
56#include <vm/vm_object.h>
57#include <vm/vm_param.h>
58
59#include <vfs/tmpfs/tmpfs.h>
60#include <vfs/tmpfs/tmpfs_vnops.h>
29ffeb28 61#include <vfs/tmpfs/tmpfs_args.h>
7a2de9a4
MD
62
63/*
64 * Default permission for root node
65 */
66#define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67
68MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
7a2de9a4
MD
69
70/* --------------------------------------------------------------------- */
71
72static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
73static int tmpfs_unmount(struct mount *, int);
74static int tmpfs_root(struct mount *, struct vnode **);
75static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
76static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
77
78/* --------------------------------------------------------------------- */
0786baf1 79int
7a2de9a4
MD
80tmpfs_node_ctor(void *obj, void *privdata, int flags)
81{
82 struct tmpfs_node *node = (struct tmpfs_node *)obj;
83
84 node->tn_gen++;
85 node->tn_size = 0;
86 node->tn_status = 0;
87 node->tn_flags = 0;
88 node->tn_links = 0;
89 node->tn_vnode = NULL;
90 node->tn_vpstate = TMPFS_VNODE_WANT;
0786baf1 91 bzero(&node->tn_spec, sizeof(node->tn_spec));
7a2de9a4
MD
92
93 return (1);
94}
95
96static void
97tmpfs_node_dtor(void *obj, void *privdata)
98{
99 struct tmpfs_node *node = (struct tmpfs_node *)obj;
100 node->tn_type = VNON;
101 node->tn_vpstate = TMPFS_VNODE_DOOMED;
102}
103
104static void*
105tmpfs_node_init(void *args, int flags)
106{
107 struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags);
881dac8b
VS
108 if (node == NULL)
109 return (NULL);
7a2de9a4
MD
110 node->tn_id = 0;
111
112 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
113 node->tn_gen = karc4random();
114
115 return node;
116}
117
118static void
119tmpfs_node_fini(void *obj, void *args)
120{
121 struct tmpfs_node *node = (struct tmpfs_node *)obj;
122 lockuninit(&node->tn_interlock);
123 objcache_malloc_free(obj, args);
124}
125
7a2de9a4
MD
126static int
127tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
128{
129 struct tmpfs_mount *tmp;
130 struct tmpfs_node *root;
29ffeb28
MD
131 struct tmpfs_args args;
132 vm_pindex_t pages;
133 vm_pindex_t pages_limit;
7a2de9a4 134 ino_t nodes;
817a2fd9 135 u_int64_t maxfsize;
7a2de9a4
MD
136 int error;
137 /* Size counters. */
29ffeb28
MD
138 ino_t nodes_max;
139 off_t size_max;
817a2fd9 140 size_t maxfsize_max;
29ffeb28 141 size_t size;
7a2de9a4
MD
142
143 /* Root node attributes. */
144 uid_t root_uid = cred->cr_uid;
145 gid_t root_gid = cred->cr_gid;
146 mode_t root_mode = (VREAD | VWRITE);
147
148 if (mp->mnt_flag & MNT_UPDATE) {
149 /* XXX: There is no support yet to update file system
150 * settings. Should be added. */
151
152 return EOPNOTSUPP;
153 }
154
29ffeb28
MD
155 /*
156 * mount info
157 */
158 bzero(&args, sizeof(args));
159 size_max = 0;
160 nodes_max = 0;
817a2fd9 161 maxfsize_max = 0;
29ffeb28
MD
162
163 if (path) {
164 if (data) {
165 error = copyin(data, &args, sizeof(args));
166 if (error)
167 return (error);
168 }
169 size_max = args.ta_size_max;
170 nodes_max = args.ta_nodes_max;
817a2fd9 171 maxfsize_max = args.ta_maxfsize_max;
1b5c5deb
MD
172 root_uid = args.ta_root_uid;
173 root_gid = args.ta_root_gid;
174 root_mode = args.ta_root_mode;
29ffeb28 175 }
7a2de9a4
MD
176
177 /*
178 * If mount by non-root, then verify that user has necessary
179 * permissions on the device.
180 */
181 if (cred->cr_uid != 0) {
182 root_mode = VREAD;
183 if ((mp->mnt_flag & MNT_RDONLY) == 0)
184 root_mode |= VWRITE;
185 }
186
29ffeb28
MD
187 pages_limit = vm_swap_max + vmstats.v_page_count / 2;
188
2a3a6ffd 189 if (size_max == 0) {
29ffeb28 190 pages = pages_limit / 2;
2a3a6ffd 191 } else if (size_max < PAGE_SIZE) {
29ffeb28 192 pages = 1;
2a3a6ffd
MD
193 } else if (OFF_TO_IDX(size_max) > pages_limit) {
194 /*
195 * do not force pages = pages_limit for this case, otherwise
196 * we might not honor tmpfs size requests from /etc/fstab
197 * during boot because they are mounted prior to swap being
198 * turned on.
199 */
200 pages = OFF_TO_IDX(size_max);
201 } else {
29ffeb28 202 pages = OFF_TO_IDX(size_max);
2a3a6ffd 203 }
7a2de9a4 204
29ffeb28 205 if (nodes_max == 0)
7a2de9a4 206 nodes = 3 + pages * PAGE_SIZE / 1024;
29ffeb28
MD
207 else if (nodes_max < 3)
208 nodes = 3;
209 else if (nodes_max > pages)
210 nodes = pages;
7a2de9a4
MD
211 else
212 nodes = nodes_max;
7a2de9a4 213
817a2fd9
MD
214 maxfsize = IDX_TO_OFF(pages_limit);
215 if (maxfsize_max != 0 && maxfsize > maxfsize_max)
216 maxfsize = maxfsize_max;
217
7a2de9a4 218 /* Allocate the tmpfs mount structure and fill it. */
29ffeb28 219 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
7a2de9a4
MD
220
221 lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE);
222 tmp->tm_nodes_max = nodes;
223 tmp->tm_nodes_inuse = 0;
817a2fd9 224 tmp->tm_maxfilesize = maxfsize;
7a2de9a4
MD
225 LIST_INIT(&tmp->tm_nodes_used);
226
227 tmp->tm_pages_max = pages;
228 tmp->tm_pages_used = 0;
29ffeb28 229
dcaa8a41 230 kmalloc_create(&tmp->tm_node_zone, "tmpfs node");
8e771504 231 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent");
d00cd01c 232 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
dcaa8a41
VS
233
234 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) *
c01f27eb
VS
235 tmp->tm_nodes_max);
236
dcaa8a41
VS
237 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node);
238 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone;
239
8e771504
VS
240 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent);
241 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone;
242
7a2de9a4
MD
243 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache",
244 0, 0,
245 NULL, NULL, NULL,
246 objcache_malloc_alloc, objcache_malloc_free,
8e771504 247 &tmp->tm_dirent_zone_malloc_args);
7a2de9a4
MD
248 tmp->tm_node_pool = objcache_create( "tmpfs node cache",
249 0, 0,
250 tmpfs_node_ctor, tmpfs_node_dtor, NULL,
251 tmpfs_node_init, tmpfs_node_fini,
dcaa8a41 252 &tmp->tm_node_zone_malloc_args);
7a2de9a4 253
f7db522f
VS
254 tmp->tm_ino = 2;
255
7a2de9a4 256 /* Allocate the root node. */
d4623db3 257 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
6e0c5aab 258 root_mode & ALLPERMS, NULL,
d4623db3
MD
259 VNOVAL, VNOVAL, &root);
260
261 /*
262 * We are backed by swap, set snocache chflags flag so we
263 * don't trip over swapcache.
264 */
265 root->tn_flags = SF_NOCACHE;
7a2de9a4
MD
266
267 if (error != 0 || root == NULL) {
268 objcache_destroy(tmp->tm_node_pool);
269 objcache_destroy(tmp->tm_dirent_pool);
270 kfree(tmp, M_TMPFSMNT);
271 return error;
272 }
273 KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id));
6e0c5aab 274 ++root->tn_links; /* prevent destruction */
7a2de9a4
MD
275 tmp->tm_root = root;
276
277 mp->mnt_flag |= MNT_LOCAL;
9fc94b5f 278#if 0
7a2de9a4
MD
279 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE |
280 MNTK_IN_MPSAFE | MNTK_SG_MPSAFE;
9fc94b5f 281#endif
e575e508 282 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_SG_MPSAFE;
1be4932c 283 mp->mnt_kern_flag |= MNTK_WR_MPSAFE;
f96f2f39 284 mp->mnt_kern_flag |= MNTK_NOMSYNC;
7a2de9a4
MD
285 mp->mnt_data = (qaddr_t)tmp;
286 vfs_getnewfsid(mp);
287
288
289 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
290 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
291
292 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
293 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
294 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
295 copyinstr(path, mp->mnt_stat.f_mntonname,
296 sizeof(mp->mnt_stat.f_mntonname) -1,
297 &size);
298
299 tmpfs_statfs(mp, &mp->mnt_stat, cred);
300
301 return 0;
302}
303
304/* --------------------------------------------------------------------- */
305
306/* ARGSUSED2 */
307static int
308tmpfs_unmount(struct mount *mp, int mntflags)
309{
310 int error;
311 int flags = 0;
312 struct tmpfs_mount *tmp;
313 struct tmpfs_node *node;
7a2de9a4
MD
314
315 /* Handle forced unmounts. */
316 if (mntflags & MNT_FORCE)
317 flags |= FORCECLOSE;
318
9fc94b5f 319 tmp = VFS_TO_TMPFS(mp);
9fc94b5f 320
d4623db3
MD
321 /*
322 * Finalize all pending I/O. In the case of tmpfs we want
323 * to throw all the data away so clean out the buffer cache
324 * and vm objects before calling vflush().
325 */
326 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
327 if (node->tn_type == VREG && node->tn_vnode) {
328 ++node->tn_links;
329 TMPFS_NODE_LOCK(node);
330 vx_get(node->tn_vnode);
331 tmpfs_truncate(node->tn_vnode, 0);
332 vx_put(node->tn_vnode);
333 TMPFS_NODE_UNLOCK(node);
334 --node->tn_links;
335 }
336 }
7a2de9a4
MD
337 error = vflush(mp, 0, flags);
338 if (error != 0)
339 return error;
340
0786baf1
MD
341 /*
342 * First pass get rid of all the directory entries and
6e0c5aab
MD
343 * vnode associations. This will also destroy the
344 * directory topology and should drop all link counts
345 * to 0 except for the root.
0786baf1
MD
346 *
347 * No vnodes should remain after the vflush above.
348 */
349 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
350 ++node->tn_links;
351 TMPFS_NODE_LOCK(node);
7a2de9a4
MD
352 if (node->tn_type == VDIR) {
353 struct tmpfs_dirent *de;
354
6e0c5aab 355 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) {
22d3b394 356 tmpfs_dir_detach(node, de);
0786baf1 357 tmpfs_free_dirent(tmp, de);
7a2de9a4
MD
358 }
359 }
0786baf1
MD
360 KKASSERT(node->tn_vnode == NULL);
361#if 0
7a2de9a4
MD
362 vp = node->tn_vnode;
363 if (vp != NULL) {
364 tmpfs_free_vp(vp);
365 vrecycle(vp);
0786baf1 366 node->tn_vnode = NULL;
7a2de9a4 367 }
0786baf1
MD
368#endif
369 TMPFS_NODE_UNLOCK(node);
370 --node->tn_links;
7a2de9a4
MD
371 }
372
0786baf1 373 /*
6e0c5aab
MD
374 * Allow the root node to be destroyed by dropping the link count
375 * we bumped in the mount code.
0786baf1 376 */
6e0c5aab
MD
377 KKASSERT(tmp->tm_root);
378 --tmp->tm_root->tn_links;
0786baf1 379
6e0c5aab
MD
380 /*
381 * At this point all nodes, including the root node, should have a
382 * link count of 0. The root is not necessarily going to be last.
383 */
384 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) {
385 if (node->tn_links)
386 panic("tmpfs: Dangling nodes during umount (%p)!\n", node);
387 TMPFS_NODE_LOCK(node);
388 tmpfs_free_node(tmp, node);
389 /* eats lock */
390 }
0786baf1
MD
391 KKASSERT(tmp->tm_root == NULL);
392
7a2de9a4
MD
393 objcache_destroy(tmp->tm_dirent_pool);
394 objcache_destroy(tmp->tm_node_pool);
395
d00cd01c 396 kmalloc_destroy(&tmp->tm_name_zone);
8e771504 397 kmalloc_destroy(&tmp->tm_dirent_zone);
dcaa8a41
VS
398 kmalloc_destroy(&tmp->tm_node_zone);
399
8e771504
VS
400 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL;
401
7a2de9a4
MD
402 lockuninit(&tmp->allnode_lock);
403 KKASSERT(tmp->tm_pages_used == 0);
404 KKASSERT(tmp->tm_nodes_inuse == 0);
405
406 /* Throw away the tmpfs_mount structure. */
0786baf1 407 kfree(tmp, M_TMPFSMNT);
7a2de9a4
MD
408 mp->mnt_data = NULL;
409
410 mp->mnt_flag &= ~MNT_LOCAL;
411 return 0;
412}
413
414/* --------------------------------------------------------------------- */
415
416static int
417tmpfs_root(struct mount *mp, struct vnode **vpp)
418{
0786baf1 419 struct tmpfs_mount *tmp;
7a2de9a4 420 int error;
7a2de9a4 421
0786baf1
MD
422 tmp = VFS_TO_TMPFS(mp);
423 if (tmp->tm_root == NULL) {
424 kprintf("tmpfs_root: called without root node %p\n", mp);
7ce2998e 425 print_backtrace(-1);
0786baf1
MD
426 *vpp = NULL;
427 error = EINVAL;
428 } else {
429 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp);
430 (*vpp)->v_flag |= VROOT;
431 (*vpp)->v_type = VDIR;
432 }
7a2de9a4
MD
433 return error;
434}
435
436/* --------------------------------------------------------------------- */
437
438static int
439tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
440{
441 boolean_t found;
442 struct tmpfs_fid *tfhp;
443 struct tmpfs_mount *tmp;
444 struct tmpfs_node *node;
445
446 tmp = VFS_TO_TMPFS(mp);
447
448 tfhp = (struct tmpfs_fid *)fhp;
449 if (tfhp->tf_len != sizeof(struct tmpfs_fid))
450 return EINVAL;
451
452 if (tfhp->tf_id >= tmp->tm_nodes_max)
453 return EINVAL;
454
455 found = FALSE;
456
457 TMPFS_LOCK(tmp);
458 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
459 if (node->tn_id == tfhp->tf_id &&
460 node->tn_gen == tfhp->tf_gen) {
461 found = TRUE;
462 break;
463 }
464 }
465 TMPFS_UNLOCK(tmp);
466
467 if (found)
468 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp));
469
470 return (EINVAL);
471}
472
473/* --------------------------------------------------------------------- */
474
475/* ARGSUSED2 */
476static int
477tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
478{
479 fsfilcnt_t freenodes;
480 struct tmpfs_mount *tmp;
481
482 tmp = VFS_TO_TMPFS(mp);
483
484 sbp->f_iosize = PAGE_SIZE;
485 sbp->f_bsize = PAGE_SIZE;
486
29ffeb28
MD
487 sbp->f_blocks = tmp->tm_pages_max;
488 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
489 sbp->f_bfree = sbp->f_bavail;
7a2de9a4 490
29ffeb28 491 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
7a2de9a4
MD
492
493 sbp->f_files = freenodes + tmp->tm_nodes_inuse;
494 sbp->f_ffree = freenodes;
817a2fd9 495 sbp->f_owner = tmp->tm_root->tn_uid;
7a2de9a4
MD
496
497 return 0;
498}
499
190c11cc
SZ
500/* --------------------------------------------------------------------- */
501
502static int
503tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
504{
505 struct tmpfs_node *node;
506 struct tmpfs_fid tfh;
507 node = VP_TO_TMPFS_NODE(vp);
508 memset(&tfh, 0, sizeof(tfh));
509 tfh.tf_len = sizeof(struct tmpfs_fid);
510 tfh.tf_gen = node->tn_gen;
511 tfh.tf_id = node->tn_id;
512 memcpy(fhp, &tfh, sizeof(tfh));
513 return (0);
514}
515
7a2de9a4
MD
516/* --------------------------------------------------------------------- */
517
66fa44e7
VS
518static int
519tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
520 struct ucred **credanonp)
521{
522 struct tmpfs_mount *tmp;
523 struct netcred *nc;
524
525 tmp = (struct tmpfs_mount *) mp->mnt_data;
526 nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
527 if (nc == NULL)
528 return (EACCES);
529
530 *exflagsp = nc->netc_exflags;
531 *credanonp = &nc->netc_anon;
532
533 return (0);
534}
535
536/* --------------------------------------------------------------------- */
537
7a2de9a4
MD
538/*
539 * tmpfs vfs operations.
540 */
541
542static struct vfsops tmpfs_vfsops = {
543 .vfs_mount = tmpfs_mount,
544 .vfs_unmount = tmpfs_unmount,
545 .vfs_root = tmpfs_root,
546 .vfs_statfs = tmpfs_statfs,
547 .vfs_fhtovp = tmpfs_fhtovp,
190c11cc 548 .vfs_vptofh = tmpfs_vptofh,
66fa44e7
VS
549 .vfs_sync = vfs_stdsync,
550 .vfs_checkexp = tmpfs_checkexp,
7a2de9a4
MD
551};
552
553VFS_SET(tmpfs_vfsops, tmpfs, 0);
e5e63c20 554MODULE_VERSION(tmpfs, 1);