2 * Copyright (c) 1997-1999 Erez Zadok
3 * Copyright (c) 1990 Jan-Simon Pendry
4 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
5 * Copyright (c) 1990 The Regents of the University of California.
8 * This code is derived from software contributed to Berkeley by
9 * Jan-Simon Pendry at Imperial College, London.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgment:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
41 * $Id: map.c,v 1.5 1999/08/22 05:12:51 ezk Exp $
47 #endif /* HAVE_CONFIG_H */
51 #define smallest_t(t1, t2) (t1 != NEVER ? (t2 != NEVER ? (t1 < t2 ? t1 : t2) : t1) : t2)
52 #define IGNORE_FLAGS (MFF_MOUNTING|MFF_UNMOUNTING|MFF_RESTART)
53 #define NEVER (time_t) 0
54 #define new_gen() (am_gen++)
59 * Generation numbers are allocated to every node created
60 * by amd. When a filehandle is computed and sent to the
61 * kernel, the generation number makes sure that it is safe
62 * to reallocate a node slot even when the kernel has a cached
63 * reference to its old incarnation.
64 * No garbage collection is done, since it is assumed that
65 * there is no way that 2^32 generation numbers could ever
66 * be allocated by a single run of amd - there is simply
67 * not enough cpu time available.
69 static u_int am_gen = 2; /* Initial generation number */
70 static int timeout_mp_id; /* Id from last call to timeout */
72 am_node *root_node; /* The root of the mount tree */
73 am_node **exported_ap = (am_node **) 0;
74 int exported_ap_size = 0;
75 int first_free_map = 0; /* First available free slot */
76 int last_used_map = -1; /* Last unavailable used slot */
80 * This is the default attributes field which
81 * is copied into every new node to be created.
82 * The individual filesystem fs_init() routines
83 * patch the copy to represent the particular
84 * details for the relevant filesystem type
86 static nfsfattr gen_fattr =
89 NFSMODE_LNK | 0777, /* mode */
104 /* forward declarations */
105 static int unmount_node(am_node *mp);
106 static void exported_ap_free(am_node *mp);
107 static void remove_am(am_node *mp);
111 * Resize exported_ap map
114 exported_ap_realloc_map(int nsize)
117 * this shouldn't happen, but...
119 if (nsize < 0 || nsize == exported_ap_size)
122 exported_ap = (am_node **) xrealloc((voidp) exported_ap, nsize * sizeof(am_node *));
124 if (nsize > exported_ap_size)
125 memset((char *) (exported_ap + exported_ap_size), 0,
126 (nsize - exported_ap_size) * sizeof(am_node *));
127 exported_ap_size = nsize;
134 * Allocate a new mount slot and create
136 * Fills in the map number of the node,
137 * but leaves everything else uninitialized.
140 exported_ap_alloc(void)
145 * First check if there are any slots left, realloc if needed
147 if (first_free_map >= exported_ap_size)
148 if (!exported_ap_realloc_map(exported_ap_size + NEXP_AP))
152 * Grab the next free slot
154 mpp = exported_ap + first_free_map;
155 mp = *mpp = ALLOC(struct am_node);
156 memset((char *) mp, 0, sizeof(*mp));
158 mp->am_mapno = first_free_map++;
161 * Update free pointer
163 while (first_free_map < exported_ap_size && exported_ap[first_free_map])
166 if (first_free_map > last_used_map)
167 last_used_map = first_free_map - 1;
177 exported_ap_free(am_node *mp)
186 * Zero the slot pointer to avoid double free's
188 exported_ap[mp->am_mapno] = 0;
191 * Update the free and last_used indices
193 if (mp->am_mapno == last_used_map)
194 while (last_used_map >= 0 && exported_ap[last_used_map] == 0)
197 if (first_free_map > mp->am_mapno)
198 first_free_map = mp->am_mapno;
201 * Free the mount node
208 * Insert mp into the correct place,
209 * where p_mp is its parent node.
210 * A new node gets placed as the youngest sibling
211 * of any other children, and the parent's child
212 * pointer is adjusted to point to the new child node.
215 insert_am(am_node *mp, am_node *p_mp)
218 * If this is going in at the root then flag it
219 * so that it cannot be unmounted by amq.
221 if (p_mp == root_node)
222 mp->am_flags |= AMF_ROOT;
224 * Fill in n-way links
226 mp->am_parent = p_mp;
227 mp->am_osib = p_mp->am_child;
229 mp->am_osib->am_ysib = mp;
235 * Remove am from its place in the mount tree
238 remove_am(am_node *mp)
241 * 1. Consistency check
243 if (mp->am_child && mp->am_parent) {
244 plog(XLOG_WARNING, "children of \"%s\" still exist - deleting anyway", mp->am_path);
248 * 2. Update parent's child pointer
250 if (mp->am_parent && mp->am_parent->am_child == mp)
251 mp->am_parent->am_child = mp->am_osib;
254 * 3. Unlink from sibling chain
257 mp->am_ysib->am_osib = mp->am_osib;
259 mp->am_osib->am_ysib = mp->am_ysib;
264 * Compute a new time to live value for a node.
270 mp->am_ttl = clocktime();
271 mp->am_fattr.na_atime.nt_seconds = mp->am_ttl;
272 mp->am_ttl += mp->am_timeo; /* sun's -tl option */
277 mk_fattr(am_node *mp, nfsftype vntype)
281 mp->am_fattr.na_type = NFDIR;
282 mp->am_fattr.na_mode = NFSMODE_DIR | 0555;
283 mp->am_fattr.na_nlink = 2;
284 mp->am_fattr.na_size = 512;
287 mp->am_fattr.na_type = NFLNK;
288 mp->am_fattr.na_mode = NFSMODE_LNK | 0777;
289 mp->am_fattr.na_nlink = 1;
290 mp->am_fattr.na_size = 0;
293 plog(XLOG_FATAL, "Unknown fattr type %d - ignored", vntype);
300 * Initialize an allocated mount node.
301 * It is assumed that the mount node was b-zero'd
302 * before getting here so anything that would
303 * be set to zero isn't done here.
306 init_map(am_node *mp, char *dir)
309 * mp->am_mapno is initialized by exported_ap_alloc
310 * other fields don't need to be set to zero.
312 mp->am_mnt = new_mntfs();
313 mp->am_name = strdup(dir);
314 mp->am_path = strdup(dir);
315 mp->am_gen = new_gen();
317 mp->am_timeo = gopt.am_timeo;
318 mp->am_attr.ns_status = NFS_OK;
319 mp->am_fattr = gen_fattr;
320 mp->am_fattr.na_fsid = 42;
321 mp->am_fattr.na_fileid = 0;
322 mp->am_fattr.na_atime.nt_seconds = clocktime();
323 mp->am_fattr.na_atime.nt_useconds = 0;
324 mp->am_fattr.na_mtime = mp->am_fattr.na_ctime = mp->am_fattr.na_atime;
327 mp->am_stats.s_mtime = mp->am_fattr.na_atime.nt_seconds;
333 * The node must be already unmounted.
336 free_map(am_node *mp)
350 XFREE(mp->am_transp);
353 free_mntfs(mp->am_mnt);
355 exported_ap_free(mp);
360 * Convert from file handle to automount node.
363 fh_to_mp3(am_nfs_fh *fhp, int *rp, int c_or_d)
365 struct am_fh *fp = (struct am_fh *) fhp;
369 * Check process id matches
370 * If it doesn't then it is probably
371 * from an old kernel cached filehandle
372 * which is now out of date.
374 if (fp->fhh_pid != am_mypid)
378 * Make sure the index is valid before
379 * exported_ap is referenced.
381 if (fp->fhh_id < 0 || fp->fhh_id >= exported_ap_size)
385 * Get hold of the supposed mount node
387 ap = exported_ap[fp->fhh_id];
390 * If it exists then maybe...
394 * Check the generation number in the node
395 * matches the one from the kernel. If not
396 * then the old node has been timed out and
397 * a new one allocated.
399 if (ap->am_gen != fp->fhh_gen) {
404 * If the node is hung then locate a new node
405 * for it. This implements the replicated filesystem
408 if (ap->am_mnt && FSRV_ISDOWN(ap->am_mnt->mf_server) && ap->am_parent) {
410 am_node *orig_ap = ap;
413 dlog("fh_to_mp3: %s (%s) is hung:- call lookup",
414 orig_ap->am_path, orig_ap->am_mnt->mf_info);
418 * Update modify time of parent node.
419 * With any luck the kernel will re-stat
420 * the child node and get new information.
422 orig_ap->am_fattr.na_mtime.nt_seconds = clocktime();
425 * Call the parent's lookup routine for an object
426 * with the same name. This may return -1 in error
427 * if a mount is in progress. In any case, if no
428 * mount node is returned the error code is propagated
431 if (c_or_d == VLOOK_CREATE) {
432 ap = (*orig_ap->am_parent->am_mnt->mf_ops->lookuppn)
433 (orig_ap->am_parent, orig_ap->am_name, &error, c_or_d);
439 if (error < 0 && amd_state == Finishing)
446 * Update last access to original node. This
447 * avoids timing it out and so sending ESTALE
448 * back to the kernel.
449 * XXX - Not sure we need this anymore (jsp, 90/10/6).
456 * Disallow references to objects being unmounted, unless
457 * they are automount points.
459 if (ap->am_mnt && (ap->am_mnt->mf_flags & MFF_UNMOUNTING) &&
460 !(ap->am_flags & AMF_ROOT)) {
461 if (amd_state == Finishing)
471 if (!ap || !ap->am_mnt) {
473 * If we are shutting down then it is likely
474 * that this node has disappeared because of
475 * a fast timeout. To avoid things thrashing
476 * just pretend it doesn't exist at all. If
477 * ESTALE is returned, some NFS clients just
478 * keep retrying (stupid or what - if it's
479 * stale now, what's it going to be in 5 minutes?)
481 if (amd_state == Finishing)
493 fh_to_mp(am_nfs_fh *fhp)
497 return fh_to_mp2(fhp, &dummy);
502 * Convert from automount node to file handle.
505 mp_to_fh(am_node *mp, am_nfs_fh *fhp)
507 struct am_fh *fp = (struct am_fh *) fhp;
509 memset((char *) fhp, 0, sizeof(am_nfs_fh));
512 * Take the process id
514 fp->fhh_pid = am_mypid;
519 fp->fhh_id = mp->am_mapno;
522 * ... and the generation number
524 fp->fhh_gen = mp->am_gen;
527 * ... to make a "unique" triple that will never
528 * be reallocated except across reboots (which doesn't matter)
529 * or if we are unlucky enough to be given the same
530 * pid as a previous amd (very unlikely).
536 find_ap2(char *dir, am_node *mp)
540 if (STREQ(mp->am_path, dir))
543 if ((mp->am_mnt->mf_flags & MFF_MOUNTED) &&
544 STREQ(mp->am_mnt->mf_mount, dir))
547 mp2 = find_ap2(dir, mp->am_osib);
550 return find_ap2(dir, mp->am_child);
558 * Find the mount node corresponding to dir. dir can match either the
559 * automount path or, if the node is mounted, the mount location.
566 for (i = last_used_map; i >= 0; --i) {
567 am_node *mp = exported_ap[i];
568 if (mp && (mp->am_flags & AMF_ROOT)) {
569 mp = find_ap2(dir, exported_ap[i]);
581 * Find the mount node corresponding
582 * to the mntfs structure.
589 for (i = last_used_map; i >= 0; --i) {
590 am_node *mp = exported_ap[i];
591 if (mp && mp->am_mnt == mf)
600 * Get the filehandle for a particular named directory.
601 * This is used during the bootstrap to tell the kernel
602 * the filehandles of the initial automount points.
607 static am_nfs_fh nfh;
608 am_node *mp = root_ap(dir, TRUE);
612 * Patch up PID to match main server...
615 long pid = getppid();
616 ((struct am_fh *) &nfh)->fhh_pid = pid;
618 dlog("root_fh substitutes pid %ld", (long) pid);
625 * Should never get here...
627 plog(XLOG_ERROR, "Can't find root filehandle for %s", dir);
634 root_ap(char *dir, int path)
636 am_node *mp = find_ap(dir);
638 if (mp && mp->am_parent == root_node)
646 * Timeout all nodes waiting on
650 map_flush_srvr(fserver *fs)
655 for (i = last_used_map; i >= 0; --i) {
656 am_node *mp = exported_ap[i];
657 if (mp && mp->am_mnt && mp->am_mnt->mf_server == fs) {
658 plog(XLOG_INFO, "Flushed %s; dependent on %s", mp->am_path, fs->fs_host);
659 mp->am_ttl = clocktime();
664 reschedule_timeout_mp();
669 * Mount a top level automount node
670 * by calling lookup in the parent
671 * (root) node which will cause the
672 * automount node to be automounted.
675 mount_auto_node(char *dir, voidp arg)
679 (void) amfs_auto_ops.lookuppn((am_node *) arg, dir, &error, VLOOK_CREATE);
681 errno = error; /* XXX */
682 plog(XLOG_ERROR, "Could not mount %s: %m", dir);
689 * Cause all the top-level mount nodes
696 * Iterate over all the nodes to be started
698 return root_keyiter((void (*)P((char *, voidp))) mount_auto_node, root_node);
703 * Construct top-level node
709 char *rootmap = ROOT_MAP;
710 root_node = exported_ap_alloc();
715 init_map(root_node, "");
718 * Allocate a new mounted filesystem
720 root_mnt = find_mntfs(&amfs_root_ops, (am_opts *) 0, "", rootmap, "", "", "");
723 * Replace the initial null reference
725 free_mntfs(root_node->am_mnt);
726 root_node->am_mnt = root_mnt;
729 * Initialize the root
731 if (root_mnt->mf_ops->fs_init)
732 (*root_mnt->mf_ops->fs_init) (root_mnt);
737 root_mnt->mf_error = (*root_mnt->mf_ops->mount_fs) (root_node);
742 * Cause all the nodes to be unmounted by timing
746 umount_exported(void)
750 for (i = last_used_map; i >= 0; --i) {
751 am_node *mp = exported_ap[i];
754 mntfs *mf = mp->am_mnt;
755 if (mf->mf_flags & MFF_UNMOUNTING) {
757 * If this node is being unmounted then just ignore it. However,
758 * this could prevent amd from finishing if the unmount gets blocked
759 * since the am_node will never be free'd. am_unmounted needs
760 * telling about this possibility. - XXX
765 if (mf && !(mf->mf_ops->fs_flags & FS_DIRECTORY)) {
767 * When shutting down this had better
768 * look like a directory, otherwise it
769 * can't be unmounted!
774 if ((--immediate_abort < 0 &&
775 !(mp->am_flags & AMF_ROOT) && mp->am_parent) ||
776 (mf->mf_flags & MFF_RESTART)) {
779 * Just throw this node away without bothering to unmount it. If
780 * the server is not known to be up then don't discard the mounted
781 * on directory or Amd might hang...
784 (mf->mf_server->fs_flags & (FSF_DOWN | FSF_VALID)) != FSF_VALID)
785 mf->mf_flags &= ~MFF_MKMNT;
786 if (gopt.flags & CFM_UNMOUNT_ON_EXIT) {
787 plog(XLOG_INFO, "on-exit attempt to unmount %s", mf->mf_mount);
794 * Any other node gets forcibly timed out.
796 mp->am_flags &= ~AMF_NOTIMEOUT;
797 mp->am_mnt->mf_flags &= ~MFF_RSTKEEP;
808 unmount_node(am_node *mp)
810 mntfs *mf = mp->am_mnt;
813 if ((mf->mf_flags & MFF_ERROR) || mf->mf_refc > 1) {
818 if (mf->mf_flags & MFF_ERROR)
819 dlog("No-op unmount of error node %s", mf->mf_info);
824 dlog("Unmounting %s (%s)", mf->mf_mount, mf->mf_info);
826 error = (*mf->mf_ops->umount_fs) (mp);
830 errno = error; /* XXX */
832 dlog("%s: unmount: %m", mf->mf_mount);
841 unmount_node_wrap(voidp vp)
843 return unmount_node((am_node *) vp);
846 * Below is the comment left from the old code
847 * that was dependent on the macro FLUSH_KERNEL_NAME_CACHE
850 * This code should just say:
851 * return unmount_node((am_node *) vp);
854 * The kernel keeps a cached copy of filehandles,
855 * and doesn't ever uncache them (apparently). So
856 * when Amd times out a node the kernel will have a
857 * stale filehandle. When the kernel next uses the
858 * filehandle it gets ESTALE.
861 * Arrange that when a node is removed an unlink or
862 * rmdir is done on that path so that the kernel
863 * cache is done. Yes - yuck.
865 * This can all be removed (and the background
866 * unmount flag in amfs_link_ops) if/when the kernel does
869 * If the unlink or rmdir failed then just log a warning,
870 * don't fail the unmount. This can occur if the kernel
871 * client code decides that the object is still referenced
872 * and should be renamed rather than discarded.
874 * There is still a race condition here...
875 * if another process is trying to access the same
876 * filesystem at the time we get here, then
877 * it will block, since the MF_UNMOUNTING flag will
878 * be set. That may, or may not, cause the entire
879 * system to deadlock. Hmmm...
885 free_map_if_success(int rc, int term, voidp closure)
887 am_node *mp = (am_node *) closure;
888 mntfs *mf = mp->am_mnt;
891 * Not unmounting any more
893 mf->mf_flags &= ~MFF_UNMOUNTING;
896 * If a timeout was deferred because the underlying filesystem
897 * was busy then arrange for a timeout as soon as possible.
899 if (mf->mf_flags & MFF_WANTTIMO) {
900 mf->mf_flags &= ~MFF_WANTTIMO;
901 reschedule_timeout_mp();
904 plog(XLOG_ERROR, "unmount for %s got signal %d", mp->am_path, term);
905 #if defined(DEBUG) && defined(SIGTRAP)
907 * dbx likes to put a trap on exit().
908 * Pretend it succeeded for now...
910 if (term == SIGTRAP) {
916 if (mf->mf_ops == &amfs_program_ops || rc == EBUSY) {
917 plog(XLOG_STATS, "\"%s\" on %s still active", mp->am_path, mf->mf_mount);
919 errno = rc; /* XXX */
920 plog(XLOG_ERROR, "%s: unmount: %m", mp->am_path);
928 * Wakeup anything waiting for this mount
935 unmount_mp(am_node *mp)
937 int was_backgrounded = 0;
938 mntfs *mf = mp->am_mnt;
941 plog(XLOG_INFO, "\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
944 if ((mf->mf_ops->fs_flags & FS_UBACKGROUND) &&
945 (mf->mf_flags & MFF_MOUNTED)) {
946 if (mf->mf_refc == 1 && !FSRV_ISUP(mf->mf_server)) {
948 * Don't try to unmount from a server that is known to be down
950 if (!(mf->mf_flags & MFF_LOGDOWN)) {
951 /* Only log this once, otherwise gets a bit boring */
952 plog(XLOG_STATS, "file server %s is down - timeout of \"%s\" ignored", mf->mf_server->fs_host, mp->am_path);
953 mf->mf_flags |= MFF_LOGDOWN;
956 /* Clear logdown flag - since the server must be up */
957 mf->mf_flags &= ~MFF_LOGDOWN;
959 dlog("\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
960 /* dlog("Will background the unmount attempt"); */
963 * Note that we are unmounting this node
965 mf->mf_flags |= MFF_UNMOUNTING;
966 run_task(unmount_node_wrap, (voidp) mp,
967 free_map_if_success, (voidp) mp);
968 was_backgrounded = 1;
970 dlog("unmount attempt backgrounded");
975 dlog("\"%s\" on %s timed out", mp->am_path, mp->am_mnt->mf_mount);
976 dlog("Trying unmount in foreground");
978 mf->mf_flags |= MFF_UNMOUNTING;
979 free_map_if_success(unmount_node(mp), 0, (voidp) mp);
981 dlog("unmount attempt done");
985 return was_backgrounded;
994 time_t now = clocktime();
995 int backoff = NumChild / 4;
998 dlog("Timing out automount points...");
1001 for (i = last_used_map; i >= 0; --i) {
1002 am_node *mp = exported_ap[i];
1006 * Just continue if nothing mounted, or can't be timed out.
1008 if (!mp || (mp->am_flags & AMF_NOTIMEOUT))
1012 * Pick up mounted filesystem
1019 * Don't delete last reference to a restarted filesystem.
1021 if ((mf->mf_flags & MFF_RSTKEEP) && mf->mf_refc == 1)
1025 * If there is action on this filesystem then ignore it
1027 if (!(mf->mf_flags & IGNORE_FLAGS)) {
1029 mf->mf_flags &= ~MFF_WANTTIMO;
1030 if (now >= mp->am_ttl) {
1035 * Move the ttl forward to avoid thrashing effects
1036 * on the next call to timeout!
1038 /* sun's -tw option */
1039 if (mp->am_timeo_w < 4 * gopt.am_timeo_w)
1040 mp->am_timeo_w += gopt.am_timeo_w;
1041 mp->am_ttl = now + mp->am_timeo_w;
1045 * Just backoff this unmount for
1046 * a couple of seconds to avoid
1047 * many multiple unmounts being
1048 * started in parallel.
1050 mp->am_ttl = now + backoff + 1;
1055 * If the next ttl is smallest, use that
1057 t = smallest_t(t, mp->am_ttl);
1059 if (!mp->am_child && mf->mf_error >= 0 && expired) {
1061 * If the unmount was backgrounded then
1062 * bump the backoff counter.
1064 if (unmount_mp(mp)) {
1068 } else if (mf->mf_flags & MFF_UNMOUNTING) {
1069 mf->mf_flags |= MFF_WANTTIMO;
1075 dlog("No further timeouts");
1081 * Sanity check to avoid runaways.
1082 * Absolutely should never get this but
1083 * if you do without this trap amd will thrash.
1086 t = now + 6; /* XXX */
1087 plog(XLOG_ERROR, "Got a zero interval in timeout_mp()!");
1091 * XXX - when shutting down, make things happen faster
1093 if ((int) amd_state >= (int) Finishing)
1096 dlog("Next mount timeout in %lds", (long) (t - now));
1099 timeout_mp_id = timeout(t - now, timeout_mp, 0);
1104 * Cause timeout_mp to be called soonest
1107 reschedule_timeout_mp(void)
1110 untimeout(timeout_mp_id);
1111 timeout_mp_id = timeout(0, timeout_mp, 0);