tmpfs - Remove size cap
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vfsops.c
CommitLineData
7a2de9a4
MD
1/* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */
2
3/*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * Efficient memory file system.
35 *
36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37 * (the well-known UVM) to store file data and metadata in an efficient
38 * way. This means that it does not follow the structure of an on-disk
39 * file system because it simply does not need to. Instead, it uses
40 * memory-specific data structures and algorithms to automatically
41 * allocate and release resources.
42 */
651eeb07 43
7a2de9a4
MD
44#include <sys/conf.h>
45#include <sys/param.h>
46#include <sys/limits.h>
47#include <sys/lock.h>
48#include <sys/mutex.h>
49#include <sys/kernel.h>
50#include <sys/stat.h>
51#include <sys/systm.h>
52#include <sys/sysctl.h>
53#include <sys/objcache.h>
54
55#include <vm/vm.h>
56#include <vm/vm_object.h>
57#include <vm/vm_param.h>
58
59#include <vfs/tmpfs/tmpfs.h>
60#include <vfs/tmpfs/tmpfs_vnops.h>
29ffeb28 61#include <vfs/tmpfs/tmpfs_args.h>
7a2de9a4
MD
62
63/*
64 * Default permission for root node
65 */
66#define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67
68MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
7a2de9a4
MD
69
70/* --------------------------------------------------------------------- */
71
72static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
73static int tmpfs_unmount(struct mount *, int);
74static int tmpfs_root(struct mount *, struct vnode **);
75static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
76static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
77
7a2de9a4 78/* --------------------------------------------------------------------- */
0786baf1 79int
7a2de9a4
MD
80tmpfs_node_ctor(void *obj, void *privdata, int flags)
81{
82 struct tmpfs_node *node = (struct tmpfs_node *)obj;
83
84 node->tn_gen++;
85 node->tn_size = 0;
86 node->tn_status = 0;
87 node->tn_flags = 0;
88 node->tn_links = 0;
89 node->tn_vnode = NULL;
90 node->tn_vpstate = TMPFS_VNODE_WANT;
0786baf1 91 bzero(&node->tn_spec, sizeof(node->tn_spec));
7a2de9a4
MD
92
93 return (1);
94}
95
96static void
97tmpfs_node_dtor(void *obj, void *privdata)
98{
99 struct tmpfs_node *node = (struct tmpfs_node *)obj;
100 node->tn_type = VNON;
101 node->tn_vpstate = TMPFS_VNODE_DOOMED;
102}
103
104static void*
105tmpfs_node_init(void *args, int flags)
106{
107 struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags);
881dac8b
VS
108 if (node == NULL)
109 return (NULL);
7a2de9a4
MD
110 node->tn_id = 0;
111
112 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
113 node->tn_gen = karc4random();
114
115 return node;
116}
117
118static void
119tmpfs_node_fini(void *obj, void *args)
120{
121 struct tmpfs_node *node = (struct tmpfs_node *)obj;
122 lockuninit(&node->tn_interlock);
123 objcache_malloc_free(obj, args);
124}
125
7a2de9a4
MD
126static int
127tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
128{
129 struct tmpfs_mount *tmp;
130 struct tmpfs_node *root;
29ffeb28
MD
131 struct tmpfs_args args;
132 vm_pindex_t pages;
133 vm_pindex_t pages_limit;
7a2de9a4 134 ino_t nodes;
817a2fd9 135 u_int64_t maxfsize;
7a2de9a4
MD
136 int error;
137 /* Size counters. */
29ffeb28
MD
138 ino_t nodes_max;
139 off_t size_max;
817a2fd9 140 size_t maxfsize_max;
29ffeb28 141 size_t size;
7a2de9a4
MD
142
143 /* Root node attributes. */
144 uid_t root_uid = cred->cr_uid;
145 gid_t root_gid = cred->cr_gid;
146 mode_t root_mode = (VREAD | VWRITE);
147
148 if (mp->mnt_flag & MNT_UPDATE) {
149 /* XXX: There is no support yet to update file system
150 * settings. Should be added. */
151
152 return EOPNOTSUPP;
153 }
154
29ffeb28
MD
155 /*
156 * mount info
157 */
158 bzero(&args, sizeof(args));
159 size_max = 0;
160 nodes_max = 0;
817a2fd9 161 maxfsize_max = 0;
29ffeb28
MD
162
163 if (path) {
164 if (data) {
165 error = copyin(data, &args, sizeof(args));
166 if (error)
167 return (error);
168 }
169 size_max = args.ta_size_max;
170 nodes_max = args.ta_nodes_max;
817a2fd9 171 maxfsize_max = args.ta_maxfsize_max;
1b5c5deb
MD
172 root_uid = args.ta_root_uid;
173 root_gid = args.ta_root_gid;
174 root_mode = args.ta_root_mode;
29ffeb28 175 }
7a2de9a4
MD
176
177 /*
178 * If mount by non-root, then verify that user has necessary
179 * permissions on the device.
180 */
181 if (cred->cr_uid != 0) {
182 root_mode = VREAD;
183 if ((mp->mnt_flag & MNT_RDONLY) == 0)
184 root_mode |= VWRITE;
185 }
186
29ffeb28
MD
187 pages_limit = vm_swap_max + vmstats.v_page_count / 2;
188
2a3a6ffd 189 if (size_max == 0) {
29ffeb28 190 pages = pages_limit / 2;
2a3a6ffd 191 } else if (size_max < PAGE_SIZE) {
29ffeb28 192 pages = 1;
2a3a6ffd
MD
193 } else if (OFF_TO_IDX(size_max) > pages_limit) {
194 /*
195 * do not force pages = pages_limit for this case, otherwise
196 * we might not honor tmpfs size requests from /etc/fstab
197 * during boot because they are mounted prior to swap being
198 * turned on.
199 */
200 pages = OFF_TO_IDX(size_max);
201 } else {
29ffeb28 202 pages = OFF_TO_IDX(size_max);
2a3a6ffd 203 }
7a2de9a4 204
29ffeb28 205 if (nodes_max == 0)
7a2de9a4 206 nodes = 3 + pages * PAGE_SIZE / 1024;
29ffeb28
MD
207 else if (nodes_max < 3)
208 nodes = 3;
209 else if (nodes_max > pages)
210 nodes = pages;
7a2de9a4
MD
211 else
212 nodes = nodes_max;
7a2de9a4 213
817a2fd9
MD
214 maxfsize = IDX_TO_OFF(pages_limit);
215 if (maxfsize_max != 0 && maxfsize > maxfsize_max)
216 maxfsize = maxfsize_max;
217
7a2de9a4 218 /* Allocate the tmpfs mount structure and fill it. */
29ffeb28 219 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
7a2de9a4
MD
220
221 lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE);
222 tmp->tm_nodes_max = nodes;
223 tmp->tm_nodes_inuse = 0;
817a2fd9 224 tmp->tm_maxfilesize = maxfsize;
7a2de9a4
MD
225 LIST_INIT(&tmp->tm_nodes_used);
226
227 tmp->tm_pages_max = pages;
228 tmp->tm_pages_used = 0;
29ffeb28 229
dcaa8a41 230 kmalloc_create(&tmp->tm_node_zone, "tmpfs node");
8e771504 231 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent");
d00cd01c 232 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
dcaa8a41
VS
233
234 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) *
c01f27eb
VS
235 tmp->tm_nodes_max);
236
dcaa8a41
VS
237 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node);
238 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone;
239
8e771504
VS
240 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent);
241 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone;
242
7a2de9a4
MD
243 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache",
244 0, 0,
245 NULL, NULL, NULL,
246 objcache_malloc_alloc, objcache_malloc_free,
8e771504 247 &tmp->tm_dirent_zone_malloc_args);
7a2de9a4
MD
248 tmp->tm_node_pool = objcache_create( "tmpfs node cache",
249 0, 0,
250 tmpfs_node_ctor, tmpfs_node_dtor, NULL,
251 tmpfs_node_init, tmpfs_node_fini,
dcaa8a41 252 &tmp->tm_node_zone_malloc_args);
7a2de9a4
MD
253
254 /* Allocate the root node. */
d4623db3
MD
255 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
256 root_mode & ALLPERMS, NULL, NULL,
257 VNOVAL, VNOVAL, &root);
258
259 /*
260 * We are backed by swap, set snocache chflags flag so we
261 * don't trip over swapcache.
262 */
263 root->tn_flags = SF_NOCACHE;
7a2de9a4
MD
264
265 if (error != 0 || root == NULL) {
266 objcache_destroy(tmp->tm_node_pool);
267 objcache_destroy(tmp->tm_dirent_pool);
268 kfree(tmp, M_TMPFSMNT);
269 return error;
270 }
271 KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id));
272 tmp->tm_root = root;
273
274 mp->mnt_flag |= MNT_LOCAL;
9fc94b5f 275#if 0
7a2de9a4
MD
276 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE |
277 MNTK_IN_MPSAFE | MNTK_SG_MPSAFE;
9fc94b5f 278#endif
e575e508 279 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_SG_MPSAFE;
1be4932c 280 mp->mnt_kern_flag |= MNTK_WR_MPSAFE;
f96f2f39 281 mp->mnt_kern_flag |= MNTK_NOMSYNC;
7a2de9a4
MD
282 mp->mnt_data = (qaddr_t)tmp;
283 vfs_getnewfsid(mp);
284
285
286 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
287 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
288
289 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
290 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
291 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
292 copyinstr(path, mp->mnt_stat.f_mntonname,
293 sizeof(mp->mnt_stat.f_mntonname) -1,
294 &size);
295
296 tmpfs_statfs(mp, &mp->mnt_stat, cred);
297
298 return 0;
299}
300
301/* --------------------------------------------------------------------- */
302
303/* ARGSUSED2 */
304static int
305tmpfs_unmount(struct mount *mp, int mntflags)
306{
307 int error;
308 int flags = 0;
0786baf1 309 int found;
7a2de9a4
MD
310 struct tmpfs_mount *tmp;
311 struct tmpfs_node *node;
7a2de9a4
MD
312
313 /* Handle forced unmounts. */
314 if (mntflags & MNT_FORCE)
315 flags |= FORCECLOSE;
316
9fc94b5f 317 tmp = VFS_TO_TMPFS(mp);
9fc94b5f 318
d4623db3
MD
319 /*
320 * Finalize all pending I/O. In the case of tmpfs we want
321 * to throw all the data away so clean out the buffer cache
322 * and vm objects before calling vflush().
323 */
324 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
325 if (node->tn_type == VREG && node->tn_vnode) {
326 ++node->tn_links;
327 TMPFS_NODE_LOCK(node);
328 vx_get(node->tn_vnode);
329 tmpfs_truncate(node->tn_vnode, 0);
330 vx_put(node->tn_vnode);
331 TMPFS_NODE_UNLOCK(node);
332 --node->tn_links;
333 }
334 }
7a2de9a4
MD
335 error = vflush(mp, 0, flags);
336 if (error != 0)
337 return error;
338
0786baf1
MD
339 /*
340 * First pass get rid of all the directory entries and
341 * vnode associations. The directory structure will
342 * remain via the extra link count representing tn_dir.tn_parent.
343 *
344 * No vnodes should remain after the vflush above.
345 */
346 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
347 ++node->tn_links;
348 TMPFS_NODE_LOCK(node);
7a2de9a4
MD
349 if (node->tn_type == VDIR) {
350 struct tmpfs_dirent *de;
351
0786baf1
MD
352 while (!TAILQ_EMPTY(&node->tn_dir.tn_dirhead)) {
353 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
22d3b394 354 tmpfs_dir_detach(node, de);
0786baf1 355 tmpfs_free_dirent(tmp, de);
7a2de9a4
MD
356 node->tn_size -= sizeof(struct tmpfs_dirent);
357 }
358 }
0786baf1
MD
359 KKASSERT(node->tn_vnode == NULL);
360#if 0
7a2de9a4
MD
361 vp = node->tn_vnode;
362 if (vp != NULL) {
363 tmpfs_free_vp(vp);
364 vrecycle(vp);
0786baf1 365 node->tn_vnode = NULL;
7a2de9a4 366 }
0786baf1
MD
367#endif
368 TMPFS_NODE_UNLOCK(node);
369 --node->tn_links;
7a2de9a4
MD
370 }
371
0786baf1
MD
372 /*
373 * Now get rid of all nodes. We can remove any node with a
374 * link count of 0 or any directory node with a link count of
375 * 1. The parents will not be destroyed until all their children
376 * have been destroyed.
377 *
378 * Recursion in tmpfs_free_node() can further modify the list so
379 * we cannot use a next pointer here.
380 *
381 * The root node will be destroyed by this loop (it will be last).
382 */
383 while (!LIST_EMPTY(&tmp->tm_nodes_used)) {
384 found = 0;
385 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
386 if (node->tn_links == 0 ||
387 (node->tn_links == 1 && node->tn_type == VDIR)) {
388 TMPFS_NODE_LOCK(node);
389 tmpfs_free_node(tmp, node);
390 /* eats lock */
391 found = 1;
392 break;
393 }
394 }
395 if (found == 0) {
396 kprintf("tmpfs: Cannot free entire node tree!");
397 break;
398 }
399 }
400
401 KKASSERT(tmp->tm_root == NULL);
402
7a2de9a4
MD
403 objcache_destroy(tmp->tm_dirent_pool);
404 objcache_destroy(tmp->tm_node_pool);
405
d00cd01c 406 kmalloc_destroy(&tmp->tm_name_zone);
8e771504 407 kmalloc_destroy(&tmp->tm_dirent_zone);
dcaa8a41
VS
408 kmalloc_destroy(&tmp->tm_node_zone);
409
8e771504
VS
410 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL;
411
7a2de9a4
MD
412 lockuninit(&tmp->allnode_lock);
413 KKASSERT(tmp->tm_pages_used == 0);
414 KKASSERT(tmp->tm_nodes_inuse == 0);
415
416 /* Throw away the tmpfs_mount structure. */
0786baf1 417 kfree(tmp, M_TMPFSMNT);
7a2de9a4
MD
418 mp->mnt_data = NULL;
419
420 mp->mnt_flag &= ~MNT_LOCAL;
421 return 0;
422}
423
424/* --------------------------------------------------------------------- */
425
426static int
427tmpfs_root(struct mount *mp, struct vnode **vpp)
428{
0786baf1 429 struct tmpfs_mount *tmp;
7a2de9a4 430 int error;
7a2de9a4 431
0786baf1
MD
432 tmp = VFS_TO_TMPFS(mp);
433 if (tmp->tm_root == NULL) {
434 kprintf("tmpfs_root: called without root node %p\n", mp);
7ce2998e 435 print_backtrace(-1);
0786baf1
MD
436 *vpp = NULL;
437 error = EINVAL;
438 } else {
439 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp);
440 (*vpp)->v_flag |= VROOT;
441 (*vpp)->v_type = VDIR;
442 }
7a2de9a4
MD
443 return error;
444}
445
446/* --------------------------------------------------------------------- */
447
448static int
449tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
450{
451 boolean_t found;
452 struct tmpfs_fid *tfhp;
453 struct tmpfs_mount *tmp;
454 struct tmpfs_node *node;
455
456 tmp = VFS_TO_TMPFS(mp);
457
458 tfhp = (struct tmpfs_fid *)fhp;
459 if (tfhp->tf_len != sizeof(struct tmpfs_fid))
460 return EINVAL;
461
462 if (tfhp->tf_id >= tmp->tm_nodes_max)
463 return EINVAL;
464
465 found = FALSE;
466
467 TMPFS_LOCK(tmp);
468 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
469 if (node->tn_id == tfhp->tf_id &&
470 node->tn_gen == tfhp->tf_gen) {
471 found = TRUE;
472 break;
473 }
474 }
475 TMPFS_UNLOCK(tmp);
476
477 if (found)
478 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp));
479
480 return (EINVAL);
481}
482
483/* --------------------------------------------------------------------- */
484
485/* ARGSUSED2 */
486static int
487tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
488{
489 fsfilcnt_t freenodes;
490 struct tmpfs_mount *tmp;
491
492 tmp = VFS_TO_TMPFS(mp);
493
494 sbp->f_iosize = PAGE_SIZE;
495 sbp->f_bsize = PAGE_SIZE;
496
29ffeb28
MD
497 sbp->f_blocks = tmp->tm_pages_max;
498 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
499 sbp->f_bfree = sbp->f_bavail;
7a2de9a4 500
29ffeb28 501 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
7a2de9a4
MD
502
503 sbp->f_files = freenodes + tmp->tm_nodes_inuse;
504 sbp->f_ffree = freenodes;
817a2fd9 505 sbp->f_owner = tmp->tm_root->tn_uid;
7a2de9a4
MD
506
507 return 0;
508}
509
190c11cc
SZ
510/* --------------------------------------------------------------------- */
511
512static int
513tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
514{
515 struct tmpfs_node *node;
516 struct tmpfs_fid tfh;
517 node = VP_TO_TMPFS_NODE(vp);
518 memset(&tfh, 0, sizeof(tfh));
519 tfh.tf_len = sizeof(struct tmpfs_fid);
520 tfh.tf_gen = node->tn_gen;
521 tfh.tf_id = node->tn_id;
522 memcpy(fhp, &tfh, sizeof(tfh));
523 return (0);
524}
525
7a2de9a4
MD
526/* --------------------------------------------------------------------- */
527
66fa44e7
VS
528static int
529tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
530 struct ucred **credanonp)
531{
532 struct tmpfs_mount *tmp;
533 struct netcred *nc;
534
535 tmp = (struct tmpfs_mount *) mp->mnt_data;
536 nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
537 if (nc == NULL)
538 return (EACCES);
539
540 *exflagsp = nc->netc_exflags;
541 *credanonp = &nc->netc_anon;
542
543 return (0);
544}
545
546/* --------------------------------------------------------------------- */
547
7a2de9a4
MD
548/*
549 * tmpfs vfs operations.
550 */
551
552static struct vfsops tmpfs_vfsops = {
553 .vfs_mount = tmpfs_mount,
554 .vfs_unmount = tmpfs_unmount,
555 .vfs_root = tmpfs_root,
556 .vfs_statfs = tmpfs_statfs,
557 .vfs_fhtovp = tmpfs_fhtovp,
190c11cc 558 .vfs_vptofh = tmpfs_vptofh,
66fa44e7
VS
559 .vfs_sync = vfs_stdsync,
560 .vfs_checkexp = tmpfs_checkexp,
7a2de9a4
MD
561};
562
563VFS_SET(tmpfs_vfsops, tmpfs, 0);
e5e63c20 564MODULE_VERSION(tmpfs, 1);