2 * Copyright (c) 1989, 1991, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95
34 * $FreeBSD: src/sys/ufs/ffs/ffs_vfsops.c,v 1.117.2.10 2002/06/23 22:34:52 iedowse Exp $
37 #include "opt_quota.h"
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/namei.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode.h>
45 #include <sys/mount.h>
48 #include <sys/fcntl.h>
49 #include <sys/disklabel.h>
50 #include <sys/malloc.h>
52 #include <ufs/ufs/quota.h>
53 #include <ufs/ufs/ufsmount.h>
54 #include <ufs/ufs/inode.h>
55 #include <ufs/ufs/ufs_extern.h>
57 #include <ufs/ffs/fs.h>
58 #include <ufs/ffs/ffs_extern.h>
61 #include <vm/vm_page.h>
62 #include <vm/vm_zone.h>
64 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
66 static int ffs_sbupdate __P((struct ufsmount *, int));
67 static int ffs_reload __P((struct mount *,struct ucred *,struct proc *));
68 static int ffs_oldfscompat __P((struct fs *));
69 static int ffs_mount __P((struct mount *, char *, caddr_t,
70 struct nameidata *, struct proc *));
71 static int ffs_init __P((struct vfsconf *));
73 static struct vfsops ufs_vfsops = {
90 VFS_SET(ufs_vfsops, ufs, 0);
95 * Called when mounting local physical media
99 * mp mount point structure
100 * path NULL (flag for root mount!!!)
103 * p process (user credentials check [statfs])
106 * mp mount point structure
107 * path path to mount point
108 * data pointer to argument struct in user space
109 * ndp mount point namei() return (used for
110 * credentials on reload), reused to look
112 * p process (user credentials check)
115 * !0 error number (errno.h)
120 * mount point is locked
122 * mount point is locked
125 * A NULL path can be used for a flag since the mount
126 * system call will fail with EFAULT in copyinstr in
127 * namei() if it is a genuine NULL from the user.
130 ffs_mount( mp, path, data, ndp, p)
131 struct mount *mp; /* mount struct pointer*/
132 char *path; /* path to mount point*/
133 caddr_t data; /* arguments to FS specific mount*/
134 struct nameidata *ndp; /* mount point credentials*/
135 struct proc *p; /* process requesting mount*/
141 struct ufs_args args;
142 struct ufsmount *ump = 0;
143 register struct fs *fs;
144 int error, flags, ronly = 0;
148 * Use NULL path to flag a root mount
153 * Mounting root file system
157 if ((err = bdevvp(rootdev, &rootvp))) {
158 printf("ffs_mountroot: can't find rootvp\n");
162 if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
163 /* fs specific cleanup (if any)*/
167 goto dostatfs; /* success*/
173 * Mounting non-root file system or updating a file system
177 /* copy in user arguments*/
178 err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
180 goto error_1; /* can't get arguments*/
183 * If updating, check whether changing from read-only to
184 * read/write; if there is no device name, that's all we do.
186 if (mp->mnt_flag & MNT_UPDATE) {
189 devvp = ump->um_devvp;
191 ronly = fs->fs_ronly; /* MNT_RELOAD might change this */
192 if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
194 * Flush any dirty data.
196 VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
198 * Check for and optionally get rid of files open
202 if (mp->mnt_flag & MNT_FORCE)
204 if (mp->mnt_flag & MNT_SOFTDEP) {
205 err = softdep_flushfiles(mp, flags, p);
207 err = ffs_flushfiles(mp, flags, p);
211 if (!err && (mp->mnt_flag & MNT_RELOAD))
212 err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
216 if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
218 * If upgrade to read-write by non-root, then verify
219 * that user has necessary permissions on the device.
221 if (p->p_ucred->cr_uid != 0) {
222 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
223 if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
224 p->p_ucred, p)) != 0) {
225 VOP_UNLOCK(devvp, 0, p);
228 VOP_UNLOCK(devvp, 0, p);
231 fs->fs_flags &= ~FS_UNCLEAN;
232 if (fs->fs_clean == 0) {
233 fs->fs_flags |= FS_UNCLEAN;
234 if (mp->mnt_flag & MNT_FORCE) {
236 "WARNING: %s was not properly dismounted\n",
240 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
247 /* check to see if we need to start softdep */
248 if (fs->fs_flags & FS_DOSOFTDEP) {
249 err = softdep_mount(devvp, mp, fs, p->p_ucred);
257 * Soft updates is incompatible with "async",
258 * so if we are doing softupdates stop the user
259 * from setting the async flag in an update.
260 * Softdep_mount() clears it in an initial mount
263 if (mp->mnt_flag & MNT_SOFTDEP) {
264 mp->mnt_flag &= ~MNT_ASYNC;
266 /* if not updating name...*/
267 if (args.fspec == 0) {
269 * Process export requests. Jumping to "success"
270 * will return the vfs_export() error code.
272 err = vfs_export(mp, &ump->um_export, &args.export);
278 * Not an update, or updating the name: look up the name
279 * and verify that it refers to a sensible block device.
281 NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
284 /* can't get devvp!*/
288 NDFREE(ndp, NDF_ONLY_PNBUF);
291 if (!vn_isdisk(devvp, &err))
295 * If mount by non-root, then verify that user has necessary
296 * permissions on the device.
298 if (p->p_ucred->cr_uid != 0) {
300 if ((mp->mnt_flag & MNT_RDONLY) == 0)
301 accessmode |= VWRITE;
302 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
303 if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
307 VOP_UNLOCK(devvp, 0, p);
310 if (mp->mnt_flag & MNT_UPDATE) {
314 * If it's not the same vnode, or at least the same device
315 * then it's not correct.
319 if (devvp != ump->um_devvp) {
320 if ( devvp->v_rdev == ump->um_devvp->v_rdev) {
323 err = EINVAL; /* needs translation */
328 * Update device name only on success
331 /* Save "mounted from" info for mount point (NULL pad)*/
332 copyinstr( args.fspec,
333 mp->mnt_stat.f_mntfromname,
336 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
346 * Since this is a new mount, we want the names for
347 * the device and the mount point copied in. If an
348 * error occurs, the mountpoint is discarded by the
351 /* Save "last mounted on" info for mount point (NULL pad)*/
352 copyinstr( path, /* mount point*/
353 mp->mnt_stat.f_mntonname, /* save area*/
354 MNAMELEN - 1, /* max size*/
355 &size); /* real size*/
356 bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
358 /* Save "mounted from" info for mount point (NULL pad)*/
359 copyinstr( args.fspec, /* device name*/
360 mp->mnt_stat.f_mntfromname, /* save area*/
361 MNAMELEN - 1, /* max size*/
362 &size); /* real size*/
363 bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
365 err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
373 * Initialize FS stat information in mount struct; uses both
374 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
376 * This code is common to root and non-root mounts
378 (void)VFS_STATFS(mp, &mp->mnt_stat, p);
383 error_2: /* error with devvp held*/
385 /* release devvp before failing*/
388 error_1: /* no state to back out*/
391 if (!err && path && (mp->mnt_flag & MNT_UPDATE)) {
392 /* Update clean flag after changing read-onlyness. */
394 if (ronly != fs->fs_ronly) {
395 fs->fs_ronly = ronly;
396 fs->fs_clean = ronly &&
397 (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
398 ffs_sbupdate(ump, MNT_WAIT);
405 * Reload all incore data for a filesystem (used after running fsck on
406 * the root filesystem and finding things to fix). The filesystem must
407 * be mounted read-only.
409 * Things to do to update the mount:
410 * 1) invalidate all cached meta-data.
411 * 2) re-read superblock from disk.
412 * 3) re-read summary information from disk.
413 * 4) invalidate all inactive vnodes.
414 * 5) invalidate all cached file data.
415 * 6) re-read inode data for all active vnodes.
418 ffs_reload(mp, cred, p)
419 register struct mount *mp;
423 register struct vnode *vp, *nvp, *devvp;
427 struct fs *fs, *newfs;
428 struct partinfo dpart;
430 int i, blks, size, error;
433 if ((mp->mnt_flag & MNT_RDONLY) == 0)
436 * Step 1: invalidate all cached meta-data.
438 devvp = VFSTOUFS(mp)->um_devvp;
439 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
440 error = vinvalbuf(devvp, 0, cred, p, 0, 0);
441 VOP_UNLOCK(devvp, 0, p);
443 panic("ffs_reload: dirty1");
448 * Only VMIO the backing device if the backing device is a real
449 * block device. See ffs_mountmfs() for more details.
451 if (devvp->v_tag != VT_MFS && vn_isdisk(devvp, NULL)) {
452 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
453 vfs_object_create(devvp, p, p->p_ucred);
454 simple_lock(&devvp->v_interlock);
455 VOP_UNLOCK(devvp, LK_INTERLOCK, p);
459 * Step 2: re-read superblock from disk.
461 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
464 size = dpart.disklab->d_secsize;
465 if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0)
467 newfs = (struct fs *)bp->b_data;
468 if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
469 newfs->fs_bsize < sizeof(struct fs)) {
471 return (EIO); /* XXX needs translation */
473 fs = VFSTOUFS(mp)->um_fs;
475 * Copy pointer fields back into superblock before copying in XXX
476 * new superblock. These should really be in the ufsmount. XXX
477 * Note that important parameters (eg fs_ncg) are unchanged.
479 newfs->fs_csp = fs->fs_csp;
480 newfs->fs_maxcluster = fs->fs_maxcluster;
481 newfs->fs_contigdirs = fs->fs_contigdirs;
482 bcopy(newfs, fs, (u_int)fs->fs_sbsize);
483 if (fs->fs_sbsize < SBSIZE)
484 bp->b_flags |= B_INVAL;
486 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
488 /* An old fsck may have zeroed these fields, so recheck them. */
489 if (fs->fs_avgfilesize <= 0) /* XXX */
490 fs->fs_avgfilesize = AVFILESIZ; /* XXX */
491 if (fs->fs_avgfpdir <= 0) /* XXX */
492 fs->fs_avgfpdir = AFPDIR; /* XXX */
495 * Step 3: re-read summary information from disk.
497 blks = howmany(fs->fs_cssize, fs->fs_fsize);
499 for (i = 0; i < blks; i += fs->fs_frag) {
501 if (i + fs->fs_frag > blks)
502 size = (blks - i) * fs->fs_fsize;
503 error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
507 bcopy(bp->b_data, space, (u_int)size);
508 space = (char *)space + size;
512 * We no longer know anything about clusters per cylinder group.
514 if (fs->fs_contigsumsize > 0) {
515 lp = fs->fs_maxcluster;
516 for (i = 0; i < fs->fs_ncg; i++)
517 *lp++ = fs->fs_contigsumsize;
521 simple_lock(&mntvnode_slock);
522 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
523 if (vp->v_mount != mp) {
524 simple_unlock(&mntvnode_slock);
527 nvp = TAILQ_NEXT(vp, v_nmntvnodes);
529 * Step 4: invalidate all inactive vnodes.
531 if (vrecycle(vp, &mntvnode_slock, p))
534 * Step 5: invalidate all cached file data.
536 simple_lock(&vp->v_interlock);
537 simple_unlock(&mntvnode_slock);
538 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
541 if (vinvalbuf(vp, 0, cred, p, 0, 0))
542 panic("ffs_reload: dirty2");
544 * Step 6: re-read inode data for all active vnodes.
548 bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
549 (int)fs->fs_bsize, NOCRED, &bp);
554 ip->i_din = *((struct dinode *)bp->b_data +
555 ino_to_fsbo(fs, ip->i_number));
556 ip->i_effnlink = ip->i_nlink;
559 simple_lock(&mntvnode_slock);
561 simple_unlock(&mntvnode_slock);
566 * Common code for mount and mountroot
569 ffs_mountfs(devvp, mp, p, malloctype)
570 register struct vnode *devvp;
573 struct malloc_type *malloctype;
575 register struct ufsmount *ump;
577 register struct fs *fs;
579 struct partinfo dpart;
581 int error, i, blks, size, ronly;
584 u_int64_t maxfilesize; /* XXX */
589 cred = p ? p->p_ucred : NOCRED;
591 * Disallow multiple mounts of the same device.
592 * Disallow mounting of a device that is currently in use
593 * (except for root, which might share swap device for miniroot).
594 * Flush out any old buffers remaining from a previous use.
596 error = vfs_mountedon(devvp);
599 ncount = vcount(devvp);
601 if (ncount > 1 && devvp != rootvp)
603 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
604 error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
605 VOP_UNLOCK(devvp, 0, p);
610 * Only VMIO the backing device if the backing device is a real
611 * block device. This excludes the original MFS implementation.
612 * Note that it is optional that the backing device be VMIOed. This
613 * increases the opportunity for metadata caching.
615 if (devvp->v_tag != VT_MFS && vn_isdisk(devvp, NULL)) {
616 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
617 vfs_object_create(devvp, p, p->p_ucred);
618 simple_lock(&devvp->v_interlock);
619 VOP_UNLOCK(devvp, LK_INTERLOCK, p);
622 ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
623 vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
624 error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
625 VOP_UNLOCK(devvp, 0, p);
628 if (devvp->v_rdev->si_iosize_max != 0)
629 mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max;
630 if (mp->mnt_iosize_max > MAXPHYS)
631 mp->mnt_iosize_max = MAXPHYS;
633 if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
636 size = dpart.disklab->d_secsize;
640 if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
642 fs = (struct fs *)bp->b_data;
643 if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
644 fs->fs_bsize < sizeof(struct fs)) {
645 error = EINVAL; /* XXX needs translation */
649 fs->fs_flags &= ~FS_UNCLEAN;
650 if (fs->fs_clean == 0) {
651 fs->fs_flags |= FS_UNCLEAN;
652 if (ronly || (mp->mnt_flag & MNT_FORCE)) {
654 "WARNING: %s was not properly dismounted\n",
658 "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
664 /* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
665 if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
666 error = EROFS; /* needs translation */
669 ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
670 bzero((caddr_t)ump, sizeof *ump);
671 ump->um_malloctype = malloctype;
672 ump->um_i_effnlink_valid = 1;
673 ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
675 ump->um_blkatoff = ffs_blkatoff;
676 ump->um_truncate = ffs_truncate;
677 ump->um_update = ffs_update;
678 ump->um_valloc = ffs_valloc;
679 ump->um_vfree = ffs_vfree;
680 bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
681 if (fs->fs_sbsize < SBSIZE)
682 bp->b_flags |= B_INVAL;
686 fs->fs_ronly = ronly;
687 size = fs->fs_cssize;
688 blks = howmany(size, fs->fs_fsize);
689 if (fs->fs_contigsumsize > 0)
690 size += fs->fs_ncg * sizeof(int32_t);
691 size += fs->fs_ncg * sizeof(u_int8_t);
692 space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
694 for (i = 0; i < blks; i += fs->fs_frag) {
696 if (i + fs->fs_frag > blks)
697 size = (blks - i) * fs->fs_fsize;
698 if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
700 free(fs->fs_csp, M_UFSMNT);
703 bcopy(bp->b_data, space, (u_int)size);
704 space = (char *)space + size;
708 if (fs->fs_contigsumsize > 0) {
709 fs->fs_maxcluster = lp = space;
710 for (i = 0; i < fs->fs_ncg; i++)
711 *lp++ = fs->fs_contigsumsize;
714 size = fs->fs_ncg * sizeof(u_int8_t);
715 fs->fs_contigdirs = (u_int8_t *)space;
716 bzero(fs->fs_contigdirs, size);
717 /* Compatibility for old filesystems XXX */
718 if (fs->fs_avgfilesize <= 0) /* XXX */
719 fs->fs_avgfilesize = AVFILESIZ; /* XXX */
720 if (fs->fs_avgfpdir <= 0) /* XXX */
721 fs->fs_avgfpdir = AFPDIR; /* XXX */
722 mp->mnt_data = (qaddr_t)ump;
723 mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
724 mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
725 if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 ||
726 vfs_getvfs(&mp->mnt_stat.f_fsid))
728 mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
729 mp->mnt_flag |= MNT_LOCAL;
732 ump->um_devvp = devvp;
733 ump->um_nindir = fs->fs_nindir;
734 ump->um_bptrtodb = fs->fs_fsbtodb;
735 ump->um_seqinc = fs->fs_frag;
736 for (i = 0; i < MAXQUOTAS; i++)
737 ump->um_quotas[i] = NULLVP;
738 devvp->v_specmountpoint = mp;
742 * Set FS local "last mounted on" information (NULL pad)
744 copystr( mp->mnt_stat.f_mntonname, /* mount point*/
745 fs->fs_fsmnt, /* copy area*/
746 sizeof(fs->fs_fsmnt) - 1, /* max size*/
747 &strsize); /* real size*/
748 bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
750 if( mp->mnt_flag & MNT_ROOTFS) {
752 * Root mount; update timestamp in mount structure.
753 * this will be used by the common root mount code
754 * to update the system clock.
756 mp->mnt_time = fs->fs_time;
759 ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
760 maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1; /* XXX */
761 /* Enforce limit caused by vm object backing (32 bits vm_pindex_t). */
762 if (maxfilesize > (u_int64_t)0x80000000u * PAGE_SIZE - 1)
763 maxfilesize = (u_int64_t)0x80000000u * PAGE_SIZE - 1;
764 if (fs->fs_maxfilesize > maxfilesize) /* XXX */
765 fs->fs_maxfilesize = maxfilesize; /* XXX */
767 if ((fs->fs_flags & FS_DOSOFTDEP) &&
768 (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
769 free(fs->fs_csp, M_UFSMNT);
774 (void) ffs_sbupdate(ump, MNT_WAIT);
778 devvp->v_specmountpoint = NULL;
781 (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
783 free(ump->um_fs, M_UFSMNT);
785 mp->mnt_data = (qaddr_t)0;
791 * Sanity checks for old file systems.
793 * XXX - goes away some day.
800 fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */
801 fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */
802 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
803 fs->fs_nrpos = 8; /* XXX */
804 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
807 u_int64_t sizepb = fs->fs_bsize; /* XXX */
809 fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */
810 for (i = 0; i < NIADDR; i++) { /* XXX */
811 sizepb *= NINDIR(fs); /* XXX */
812 fs->fs_maxfilesize += sizepb; /* XXX */
815 fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
816 fs->fs_qbmask = ~fs->fs_bmask; /* XXX */
817 fs->fs_qfmask = ~fs->fs_fmask; /* XXX */
823 * unmount system call
826 ffs_unmount(mp, mntflags, p)
831 register struct ufsmount *ump;
832 register struct fs *fs;
836 if (mntflags & MNT_FORCE) {
839 if (mp->mnt_flag & MNT_SOFTDEP) {
840 if ((error = softdep_flushfiles(mp, flags, p)) != 0)
843 if ((error = ffs_flushfiles(mp, flags, p)) != 0)
848 if (fs->fs_ronly == 0) {
849 fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1;
850 error = ffs_sbupdate(ump, MNT_WAIT);
856 ump->um_devvp->v_specmountpoint = NULL;
858 vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0);
859 error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
862 vrele(ump->um_devvp);
864 free(fs->fs_csp, M_UFSMNT);
867 mp->mnt_data = (qaddr_t)0;
868 mp->mnt_flag &= ~MNT_LOCAL;
873 * Flush out all the files in a filesystem.
876 ffs_flushfiles(mp, flags, p)
877 register struct mount *mp;
881 register struct ufsmount *ump;
886 if (mp->mnt_flag & MNT_QUOTA) {
888 error = vflush(mp, 0, SKIPSYSTEM|flags);
891 for (i = 0; i < MAXQUOTAS; i++) {
892 if (ump->um_quotas[i] == NULLVP)
897 * Here we fall through to vflush again to ensure
898 * that we have gotten rid of all the system vnodes.
903 * Flush all the files.
905 if ((error = vflush(mp, 0, flags)) != 0)
908 * Flush filesystem metadata.
910 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
911 error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
912 VOP_UNLOCK(ump->um_devvp, 0, p);
917 * Get file system statistics.
920 ffs_statfs(mp, sbp, p)
922 register struct statfs *sbp;
925 register struct ufsmount *ump;
926 register struct fs *fs;
930 if (fs->fs_magic != FS_MAGIC)
932 sbp->f_bsize = fs->fs_fsize;
933 sbp->f_iosize = fs->fs_bsize;
934 sbp->f_blocks = fs->fs_dsize;
935 sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
936 fs->fs_cstotal.cs_nffree;
937 sbp->f_bavail = freespace(fs, fs->fs_minfree);
938 sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
939 sbp->f_ffree = fs->fs_cstotal.cs_nifree;
940 if (sbp != &mp->mnt_stat) {
941 sbp->f_type = mp->mnt_vfc->vfc_typenum;
942 bcopy((caddr_t)mp->mnt_stat.f_mntonname,
943 (caddr_t)&sbp->f_mntonname[0], MNAMELEN);
944 bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
945 (caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
951 * Go through the disk queues to initiate sandbagged IO;
952 * go through the inodes to write those that have been modified;
953 * initiate the writing of the super block if it has been modified.
955 * Note: we are always called with the filesystem marked `MPBUSY'.
958 ffs_sync(mp, waitfor, cred, p)
964 struct vnode *nvp, *vp;
966 struct ufsmount *ump = VFSTOUFS(mp);
968 int error, allerror = 0;
971 if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */
972 printf("fs = %s\n", fs->fs_fsmnt);
973 panic("ffs_sync: rofs mod");
976 * Write back each (modified) inode.
978 simple_lock(&mntvnode_slock);
980 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
982 * If the vnode that we are about to sync is no longer
983 * associated with this mount point, start over.
985 if (vp->v_mount != mp)
989 * Depend on the mntvnode_slock to keep things stable enough
990 * for a quick test. Since there might be hundreds of
991 * thousands of vnodes, we cannot afford even a subroutine
992 * call unless there's a good chance that we have work to do.
994 nvp = TAILQ_NEXT(vp, v_nmntvnodes);
996 if (vp->v_type == VNON || ((ip->i_flag &
997 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
998 TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
1001 if (vp->v_type != VCHR) {
1002 simple_unlock(&mntvnode_slock);
1003 error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT, p);
1005 simple_lock(&mntvnode_slock);
1006 if (error == ENOENT)
1009 if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
1011 VOP_UNLOCK(vp, 0, p);
1013 simple_lock(&mntvnode_slock);
1017 * We must reference the vp to prevent it from
1018 * getting ripped out from under UFS_UPDATE, since
1019 * we are not holding a vnode lock. XXX why aren't
1020 * we holding a vnode lock?
1023 simple_unlock(&mntvnode_slock);
1024 /* UFS_UPDATE(vp, waitfor == MNT_WAIT); */
1027 simple_lock(&mntvnode_slock);
1029 if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
1032 simple_unlock(&mntvnode_slock);
1034 * Force stale file system control information to be flushed.
1036 if (waitfor != MNT_LAZY) {
1037 if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
1038 waitfor = MNT_NOWAIT;
1039 vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
1040 if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
1042 VOP_UNLOCK(ump->um_devvp, 0, p);
1048 * Write back modified superblock.
1050 if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
1056 * Look up a FFS dinode number to find its incore vnode, otherwise read it
1057 * in from disk. If it is in core, wait for the lock bit to clear, then
1058 * return the inode locked. Detection and handling of mount points must be
1059 * done by the calling routine.
1061 static int ffs_inode_hash_lock;
1064 ffs_vget(mp, ino, vpp)
1071 struct ufsmount *ump;
1080 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
1085 * Lock out the creation of new entries in the FFS hash table in
1086 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
1089 if (ffs_inode_hash_lock) {
1090 while (ffs_inode_hash_lock) {
1091 ffs_inode_hash_lock = -1;
1092 tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
1096 ffs_inode_hash_lock = 1;
1099 * If this MALLOC() is performed after the getnewvnode()
1100 * it might block, leaving a vnode with a NULL v_data to be
1101 * found by ffs_sync() if a sync happens to fire right then,
1102 * which will cause a panic because ffs_sync() blindly
1103 * dereferences vp->v_data (as well it should).
1105 MALLOC(ip, struct inode *, sizeof(struct inode),
1106 ump->um_malloctype, M_WAITOK);
1108 /* Allocate a new vnode/inode. */
1109 error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
1111 if (ffs_inode_hash_lock < 0)
1112 wakeup(&ffs_inode_hash_lock);
1113 ffs_inode_hash_lock = 0;
1115 FREE(ip, ump->um_malloctype);
1118 bzero((caddr_t)ip, sizeof(struct inode));
1119 lockinit(&ip->i_lock, PINOD, "inode", VLKTIMEOUT, LK_CANRECURSE);
1122 * FFS supports lock sharing in the stack of vnodes
1124 vp->v_vnlock = &ip->i_lock;
1126 ip->i_fs = fs = ump->um_fs;
1132 for (i = 0; i < MAXQUOTAS; i++)
1133 ip->i_dquot[i] = NODQUOT;
1137 * Put it onto its hash chain and lock it so that other requests for
1138 * this inode will block if they arrive while we are sleeping waiting
1139 * for old data structures to be purged or for the contents of the
1140 * disk portion of this inode to be read.
1144 if (ffs_inode_hash_lock < 0)
1145 wakeup(&ffs_inode_hash_lock);
1146 ffs_inode_hash_lock = 0;
1148 /* Read in the disk contents for the inode, copy into the inode. */
1149 error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
1150 (int)fs->fs_bsize, NOCRED, &bp);
1153 * The inode does not contain anything useful, so it would
1154 * be misleading to leave it on its hash chain. With mode
1155 * still zero, it will be unlinked and returned to the free
1163 ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
1164 if (DOINGSOFTDEP(vp))
1165 softdep_load_inodeblock(ip);
1167 ip->i_effnlink = ip->i_nlink;
1171 * Initialize the vnode from the inode, check for aliases.
1172 * Note that the underlying vnode may have changed.
1174 error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
1181 * Finish inode initialization now that aliasing has been resolved.
1183 ip->i_devvp = ump->um_devvp;
1186 * Set up a generation number for this inode if it does not
1187 * already have one. This should only happen on old filesystems.
1189 if (ip->i_gen == 0) {
1190 ip->i_gen = random() / 2 + 1;
1191 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
1192 ip->i_flag |= IN_MODIFIED;
1195 * Ensure that uid and gid are correct. This is a temporary
1196 * fix until fsck has been changed to do the update.
1198 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1199 ip->i_uid = ip->i_din.di_ouid; /* XXX */
1200 ip->i_gid = ip->i_din.di_ogid; /* XXX */
1208 * File handle to vnode
1210 * Have to be really careful about stale file handles:
1211 * - check that the inode number is valid
1212 * - call ffs_vget() to get the locked inode
1213 * - check for an unallocated inode (i_mode == 0)
1214 * - check that the given client host has export rights and return
1215 * those rights via. exflagsp and credanonp
1218 ffs_fhtovp(mp, fhp, vpp)
1219 register struct mount *mp;
1223 register struct ufid *ufhp;
1226 ufhp = (struct ufid *)fhp;
1227 fs = VFSTOUFS(mp)->um_fs;
1228 if (ufhp->ufid_ino < ROOTINO ||
1229 ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
1231 return (ufs_fhtovp(mp, ufhp, vpp));
1235 * Vnode pointer to File handle
1243 register struct inode *ip;
1244 register struct ufid *ufhp;
1247 ufhp = (struct ufid *)fhp;
1248 ufhp->ufid_len = sizeof(struct ufid);
1249 ufhp->ufid_ino = ip->i_number;
1250 ufhp->ufid_gen = ip->i_gen;
1255 * Initialize the filesystem; just use ufs_init.
1259 struct vfsconf *vfsp;
1262 softdep_initialize();
1263 return (ufs_init(vfsp));
1267 * Write a superblock and associated information back to disk.
1270 ffs_sbupdate(mp, waitfor)
1271 struct ufsmount *mp;
1274 register struct fs *dfs, *fs = mp->um_fs;
1275 register struct buf *bp;
1278 int i, size, error, allerror = 0;
1281 * First write back the summary information.
1283 blks = howmany(fs->fs_cssize, fs->fs_fsize);
1285 for (i = 0; i < blks; i += fs->fs_frag) {
1286 size = fs->fs_bsize;
1287 if (i + fs->fs_frag > blks)
1288 size = (blks - i) * fs->fs_fsize;
1289 bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
1291 bcopy(space, bp->b_data, (u_int)size);
1292 space = (char *)space + size;
1293 if (waitfor != MNT_WAIT)
1295 else if ((error = bwrite(bp)) != 0)
1299 * Now write back the superblock itself. If any errors occurred
1300 * up to this point, then fail so that the superblock avoids
1301 * being written out as clean.
1305 bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
1307 fs->fs_time = time_second;
1308 bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
1309 /* Restore compatibility to old file systems. XXX */
1310 dfs = (struct fs *)bp->b_data; /* XXX */
1311 if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
1312 dfs->fs_nrpos = -1; /* XXX */
1313 if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
1314 int32_t *lp, tmp; /* XXX */
1316 lp = (int32_t *)&dfs->fs_qbmask; /* XXX */
1317 tmp = lp[4]; /* XXX */
1318 for (i = 4; i > 0; i--) /* XXX */
1319 lp[i] = lp[i-1]; /* XXX */
1320 lp[0] = tmp; /* XXX */
1322 dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
1323 if (waitfor != MNT_WAIT)
1325 else if ((error = bwrite(bp)) != 0)