Remove advertising clause from all that isn't contrib or userland bin.
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
33  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
34  */
35
36 /*
37  * nfs version 2 and 3 server calls to vnode ops
38  * - these routines generally have 3 phases
39  *   1 - break down and validate rpc request in mbuf list
40  *   2 - do the vnode ops for the request
41  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
42  *   3 - build the rpc reply in an mbuf list
43  *   nb:
44  *      - do not mix the phases, since the nfsm_?? macros can return failures
45  *        on a bad rpc or similar and do not do any vrele() or vput()'s
46  *
47  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
48  *      error number iff error != 0 whereas
49  *      returning an error from the server function implies a fatal error
50  *      such as a badly constructed rpc request that should be dropped without
51  *      a reply.
52  *      For Version 3, nfsm_reply() does not return for the error case, since
53  *      most version 3 rpcs return more than the status for error cases.
54  *
55  * Other notes:
56  *      Warning: always pay careful attention to resource cleanup on return
57  *      and note that nfsm_*() macros can terminate a procedure on certain
58  *      errors.
59  */
60
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/proc.h>
64 #include <sys/priv.h>
65 #include <sys/nlookup.h>
66 #include <sys/namei.h>
67 #include <sys/unistd.h>
68 #include <sys/vnode.h>
69 #include <sys/mount.h>
70 #include <sys/socket.h>
71 #include <sys/socketvar.h>
72 #include <sys/malloc.h>
73 #include <sys/mbuf.h>
74 #include <sys/dirent.h>
75 #include <sys/stat.h>
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <sys/buf.h>
79
80 #include <vm/vm.h>
81 #include <vm/vm_extern.h>
82 #include <vm/vm_object.h>
83
84 #include <sys/buf2.h>
85
86 #include <sys/thread2.h>
87
88 #include "nfsproto.h"
89 #include "rpcv2.h"
90 #include "nfs.h"
91 #include "xdr_subs.h"
92 #include "nfsm_subs.h"
93
94 #ifdef NFSRV_DEBUG
95 #define nfsdbprintf(info)       kprintf info
96 #else
97 #define nfsdbprintf(info)
98 #endif
99
100 #define MAX_REORDERED_RPC       (16)
101 #define MAX_COMMIT_COUNT        (1024 * 1024)
102
103 #define NUM_HEURISTIC           1031
104 #define NHUSE_INIT              64
105 #define NHUSE_INC               16
106 #define NHUSE_MAX               2048
107
108 static struct nfsheur {
109     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
110     off_t nh_nextoff;           /* next offset for sequential detection */
111     int nh_use;                 /* use count for selection */
112     int nh_seqcount;            /* heuristic */
113 } nfsheur[NUM_HEURISTIC];
114
115 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
116                       NFFIFO, NFNON };
117 #ifndef NFS_NOSERVER 
118 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
119                       NFCHR, NFNON };
120
121 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
122 int nfsrvw_procrastinate_v3 = 0;
123
124 static struct timespec  nfsver;
125
126 SYSCTL_DECL(_vfs_nfs);
127
128 int nfs_async;
129 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
130     "Enable unstable and fast writes");
131 static int nfs_commit_blks;
132 static int nfs_commit_miss;
133 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
134     "Number of committed blocks");
135 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
136     "Number of nfs blocks committed from dirty buffers");
137
138 static int nfsrv_access (struct mount *, struct vnode *, int,
139                         struct ucred *, int, struct thread *, int);
140 static void nfsrvw_coalesce (struct nfsrv_descript *,
141                 struct nfsrv_descript *);
142
143 /*
144  * Heuristic to detect sequential operation.
145  */
146 static struct nfsheur *
147 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp, int writeop)
148 {
149         struct nfsheur *nh;
150         int hi, try;
151
152         /* Locate best candidate */
153         try = 32;
154         hi = ((int)(vm_offset_t) vp / sizeof(struct vnode)) % NUM_HEURISTIC;
155         nh = &nfsheur[hi];
156
157         while (try--) {
158                 if (nfsheur[hi].nh_vp == vp) {
159                         nh = &nfsheur[hi];
160                         break;
161                 }
162                 if (nfsheur[hi].nh_use > 0)
163                         --nfsheur[hi].nh_use;
164                 hi = (hi + 1) % NUM_HEURISTIC;
165                 if (nfsheur[hi].nh_use < nh->nh_use)
166                         nh = &nfsheur[hi];
167         }
168
169         /* Initialize hint if this is a new file */
170         if (nh->nh_vp != vp) {
171                 nh->nh_vp = vp;
172                 nh->nh_nextoff = uio->uio_offset;
173                 nh->nh_use = NHUSE_INIT;
174                 if (uio->uio_offset == 0)
175                         nh->nh_seqcount = 4;
176                 else
177                         nh->nh_seqcount = 1;
178         }
179
180         /*
181          * Calculate heuristic
182          *
183          * See vfs_vnops.c:sequential_heuristic().
184          */
185         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
186             uio->uio_offset == nh->nh_nextoff) {
187                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
188                 if (nh->nh_seqcount > IO_SEQMAX)
189                         nh->nh_seqcount = IO_SEQMAX;
190         } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
191                 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
192                     /* Probably a reordered RPC, leave seqcount alone. */
193         } else if (nh->nh_seqcount > 1) {
194                 nh->nh_seqcount /= 2;
195         } else {
196                 nh->nh_seqcount = 0;
197         }
198         nh->nh_use += NHUSE_INC;
199         if (nh->nh_use > NHUSE_MAX)
200                 nh->nh_use = NHUSE_MAX;
201         return (nh);
202 }
203
204 /*
205  * nfs v3 access service
206  */
207 int
208 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
209               struct thread *td, struct mbuf **mrq)
210 {
211         struct sockaddr *nam = nfsd->nd_nam;
212         struct ucred *cred = &nfsd->nd_cr;
213         struct vnode *vp = NULL;
214         struct mount *mp = NULL;
215         nfsfh_t nfh;
216         fhandle_t *fhp;
217         int error = 0, rdonly, getret;
218         struct vattr vattr, *vap = &vattr;
219         u_long testmode, nfsmode;
220         struct nfsm_info info;
221         u_int32_t *tl;
222
223         info.dpos = nfsd->nd_dpos;
224         info.md = nfsd->nd_md;
225         info.mrep = nfsd->nd_mrep;
226         info.mreq = NULL;
227
228         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
229         fhp = &nfh.fh_generic;
230         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
231         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
232         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
233             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
234         if (error) {
235                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
236                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
237                 error = 0;
238                 goto nfsmout;
239         }
240         nfsmode = fxdr_unsigned(u_int32_t, *tl);
241         if ((nfsmode & NFSV3ACCESS_READ) &&
242                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
243                 nfsmode &= ~NFSV3ACCESS_READ;
244         if (vp->v_type == VDIR)
245                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
246                         NFSV3ACCESS_DELETE);
247         else
248                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
249         if ((nfsmode & testmode) &&
250                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
251                 nfsmode &= ~testmode;
252         if (vp->v_type == VDIR)
253                 testmode = NFSV3ACCESS_LOOKUP;
254         else
255                 testmode = NFSV3ACCESS_EXECUTE;
256         if ((nfsmode & testmode) &&
257                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
258                 nfsmode &= ~testmode;
259         getret = VOP_GETATTR(vp, vap);
260         vput(vp);
261         vp = NULL;
262         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
263                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
264         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
265         tl = nfsm_build(&info, NFSX_UNSIGNED);
266         *tl = txdr_unsigned(nfsmode);
267 nfsmout:
268         *mrq = info.mreq;
269         if (vp)
270                 vput(vp);
271         return(error);
272 }
273
274 /*
275  * nfs getattr service
276  */
277 int
278 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
279               struct thread *td, struct mbuf **mrq)
280 {
281         struct sockaddr *nam = nfsd->nd_nam;
282         struct ucred *cred = &nfsd->nd_cr;
283         struct nfs_fattr *fp;
284         struct vattr va;
285         struct vattr *vap = &va;
286         struct vnode *vp = NULL;
287         struct mount *mp = NULL;
288         nfsfh_t nfh;
289         fhandle_t *fhp;
290         int error = 0, rdonly;
291         struct nfsm_info info;
292
293         info.mrep = nfsd->nd_mrep;
294         info.md = nfsd->nd_md;
295         info.dpos = nfsd->nd_dpos;
296         info.mreq = NULL;
297
298         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
299         fhp = &nfh.fh_generic;
300         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
301         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
302                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
303         if (error) {
304                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
305                 error = 0;
306                 goto nfsmout;
307         }
308         error = VOP_GETATTR(vp, vap);
309         vput(vp);
310         vp = NULL;
311         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
312                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
313         if (error) {
314                 error = 0;
315                 goto nfsmout;
316         }
317         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
318         nfsm_srvfattr(nfsd, vap, fp);
319         /* fall through */
320
321 nfsmout:
322         *mrq = info.mreq;
323         if (vp)
324                 vput(vp);
325         return(error);
326 }
327
328 /*
329  * nfs setattr service
330  */
331 int
332 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
333               struct thread *td, struct mbuf **mrq)
334 {
335         struct sockaddr *nam = nfsd->nd_nam;
336         struct ucred *cred = &nfsd->nd_cr;
337         struct vattr va, preat;
338         struct vattr *vap = &va;
339         struct nfsv2_sattr *sp;
340         struct nfs_fattr *fp;
341         struct vnode *vp = NULL;
342         struct mount *mp = NULL;
343         nfsfh_t nfh;
344         fhandle_t *fhp;
345         u_int32_t *tl;
346         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
347         int gcheck = 0;
348         struct timespec guard;
349         struct nfsm_info info;
350
351         info.mrep = nfsd->nd_mrep;
352         info.mreq = NULL;
353         info.md = nfsd->nd_md;
354         info.dpos = nfsd->nd_dpos;
355         info.v3 = (nfsd->nd_flag & ND_NFSV3);
356
357         guard.tv_sec = 0;       /* fix compiler warning */
358         guard.tv_nsec = 0;
359
360         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
361         fhp = &nfh.fh_generic;
362         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
363         VATTR_NULL(vap);
364         if (info.v3) {
365                 ERROROUT(nfsm_srvsattr(&info, vap));
366                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
367                 gcheck = fxdr_unsigned(int, *tl);
368                 if (gcheck) {
369                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
370                         fxdr_nfsv3time(tl, &guard);
371                 }
372         } else {
373                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
374                 /*
375                  * Nah nah nah nah na nah
376                  * There is a bug in the Sun client that puts 0xffff in the mode
377                  * field of sattr when it should put in 0xffffffff. The u_short
378                  * doesn't sign extend.
379                  * --> check the low order 2 bytes for 0xffff
380                  */
381                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
382                         vap->va_mode = nfstov_mode(sp->sa_mode);
383                 if (sp->sa_uid != nfs_xdrneg1)
384                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
385                 if (sp->sa_gid != nfs_xdrneg1)
386                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
387                 if (sp->sa_size != nfs_xdrneg1)
388                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
389                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
390 #ifdef notyet
391                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
392 #else
393                         vap->va_atime.tv_sec =
394                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
395                         vap->va_atime.tv_nsec = 0;
396 #endif
397                 }
398                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
399                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
400
401         }
402
403         /*
404          * Now that we have all the fields, lets do it.
405          */
406         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
407                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
408         if (error) {
409                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
410                                       2 * NFSX_UNSIGNED, &error));
411                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
412                                  postat_ret, vap);
413                 error = 0;
414                 goto nfsmout;
415         }
416
417         /*
418          * vp now an active resource, pay careful attention to cleanup
419          */
420
421         if (info.v3) {
422                 error = preat_ret = VOP_GETATTR(vp, &preat);
423                 if (!error && gcheck &&
424                         (preat.va_ctime.tv_sec != guard.tv_sec ||
425                          preat.va_ctime.tv_nsec != guard.tv_nsec))
426                         error = NFSERR_NOT_SYNC;
427                 if (error) {
428                         vput(vp);
429                         vp = NULL;
430                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
431                                               NFSX_WCCDATA(info.v3), &error));
432                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
433                                          postat_ret, vap);
434                         error = 0;
435                         goto nfsmout;
436                 }
437         }
438
439         /*
440          * If the size is being changed write acces is required, otherwise
441          * just check for a read only file system.
442          */
443         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
444                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
445                         error = EROFS;
446                         goto out;
447                 }
448         } else {
449                 if (vp->v_type == VDIR) {
450                         error = EISDIR;
451                         goto out;
452                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
453                             td, 0)) != 0){ 
454                         goto out;
455                 }
456         }
457         error = VOP_SETATTR(vp, vap, cred);
458         postat_ret = VOP_GETATTR(vp, vap);
459         if (!error)
460                 error = postat_ret;
461 out:
462         vput(vp);
463         vp = NULL;
464         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
465                    NFSX_WCCORFATTR(info.v3), &error));
466         if (info.v3) {
467                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
468                                  postat_ret, vap);
469                 error = 0;
470                 goto nfsmout;
471         } else {
472                 fp = nfsm_build(&info, NFSX_V2FATTR);
473                 nfsm_srvfattr(nfsd, vap, fp);
474         }
475         /* fall through */
476
477 nfsmout:
478         *mrq = info.mreq;
479         if (vp)
480                 vput(vp);
481         return(error);
482 }
483
484 /*
485  * nfs lookup rpc
486  */
487 int
488 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
489              struct thread *td, struct mbuf **mrq)
490 {
491         struct sockaddr *nam = nfsd->nd_nam;
492         struct ucred *cred = &nfsd->nd_cr;
493         struct nfs_fattr *fp;
494         struct nlookupdata nd;
495         struct vnode *vp;
496         struct vnode *dirp;
497         struct nchandle nch;
498         nfsfh_t nfh;
499         fhandle_t *fhp;
500         int error = 0, len, dirattr_ret = 1;
501         int pubflag;
502         struct vattr va, dirattr, *vap = &va;
503         struct nfsm_info info;
504
505         info.mrep = nfsd->nd_mrep;
506         info.mreq = NULL;
507         info.md = nfsd->nd_md;
508         info.dpos = nfsd->nd_dpos;
509         info.v3 = (nfsd->nd_flag & ND_NFSV3);
510
511         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
512         nlookup_zero(&nd);
513         dirp = NULL;
514         vp = NULL;
515
516         fhp = &nfh.fh_generic;
517         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
518         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
519
520         pubflag = nfs_ispublicfh(fhp);
521
522         error = nfs_namei(&nd, cred, 0, NULL, &vp,
523                 fhp, len, slp, nam, &info.md, &info.dpos,
524                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
525
526         /*
527          * namei failure, only dirp to cleanup.  Clear out garbarge from
528          * structure in case macros jump to nfsmout.
529          */
530
531         if (error) {
532                 if (dirp) {
533                         if (info.v3)
534                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
535                         vrele(dirp);
536                         dirp = NULL;
537                 }
538                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
539                                       NFSX_POSTOPATTR(info.v3), &error));
540                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
541                 error = 0;
542                 goto nfsmout;
543         }
544
545         /*
546          * Locate index file for public filehandle
547          *
548          * error is 0 on entry and 0 on exit from this block.
549          */
550
551         if (pubflag) {
552                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
553                         /*
554                          * Setup call to lookup() to see if we can find
555                          * the index file. Arguably, this doesn't belong
556                          * in a kernel.. Ugh.  If an error occurs, do not
557                          * try to install an index file and then clear the
558                          * error.
559                          *
560                          * When we replace nd with ind and redirect ndp,
561                          * maintenance of ni_startdir and ni_vp shift to
562                          * ind and we have to clean them up in the old nd.
563                          * However, the cnd resource continues to be maintained
564                          * via the original nd.  Confused?  You aren't alone!
565                          */
566                         vn_unlock(vp);
567                         cache_copy(&nd.nl_nch, &nch);
568                         nlookup_done(&nd);
569                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
570                                                 UIO_SYSSPACE, 0, cred, &nch);
571                         cache_drop(&nch);
572                         if (error == 0)
573                                 error = nlookup(&nd);
574
575                         if (error == 0) {
576                                 /*
577                                  * Found an index file. Get rid of
578                                  * the old references.  transfer vp and
579                                  * load up the new vp.  Fortunately we do
580                                  * not have to deal with dvp, that would be
581                                  * a huge mess.
582                                  */
583                                 if (dirp)       
584                                         vrele(dirp);
585                                 dirp = vp;
586                                 vp = NULL;
587                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
588                                                         LK_EXCLUSIVE, &vp);
589                                 KKASSERT(error == 0);
590                         }
591                         error = 0;
592                 }
593                 /*
594                  * If the public filehandle was used, check that this lookup
595                  * didn't result in a filehandle outside the publicly exported
596                  * filesystem.  We clear the poor vp here to avoid lockups due
597                  * to NFS I/O.
598                  */
599
600                 if (vp->v_mount != nfs_pub.np_mount) {
601                         vput(vp);
602                         vp = NULL;
603                         error = EPERM;
604                 }
605         }
606
607         if (dirp) {
608                 if (info.v3)
609                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
610                 vrele(dirp);
611                 dirp = NULL;
612         }
613
614         /*
615          * Resources at this point:
616          *      ndp->ni_vp      may not be NULL
617          *
618          */
619
620         if (error) {
621                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
622                                       NFSX_POSTOPATTR(info.v3), &error));
623                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
624                 error = 0;
625                 goto nfsmout;
626         }
627
628         /*
629          * Clear out some resources prior to potentially blocking.  This
630          * is not as critical as ni_dvp resources in other routines, but
631          * it helps.
632          */
633         nlookup_done(&nd);
634
635         /*
636          * Get underlying attribute, then release remaining resources ( for
637          * the same potential blocking reason ) and reply.
638          */
639         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
640         error = VFS_VPTOFH(vp, &fhp->fh_fid);
641         if (!error)
642                 error = VOP_GETATTR(vp, vap);
643
644         vput(vp);
645         vp = NULL;
646         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
647                               NFSX_SRVFH(info.v3) +
648                               NFSX_POSTOPORFATTR(info.v3) +
649                               NFSX_POSTOPATTR(info.v3),
650                               &error));
651         if (error) {
652                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
653                 error = 0;
654                 goto nfsmout;
655         }
656         nfsm_srvfhtom(&info, fhp);
657         if (info.v3) {
658                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
659                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
660         } else {
661                 fp = nfsm_build(&info, NFSX_V2FATTR);
662                 nfsm_srvfattr(nfsd, vap, fp);
663         }
664
665 nfsmout:
666         *mrq = info.mreq;
667         if (dirp)
668                 vrele(dirp);
669         nlookup_done(&nd);              /* may be called twice */
670         if (vp)
671                 vput(vp);
672         return (error);
673 }
674
675 /*
676  * nfs readlink service
677  */
678 int
679 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
680                struct thread *td, struct mbuf **mrq)
681 {
682         struct sockaddr *nam = nfsd->nd_nam;
683         struct ucred *cred = &nfsd->nd_cr;
684         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
685         struct iovec *ivp = iv;
686         u_int32_t *tl;
687         int error = 0, rdonly, i, tlen, len, getret;
688         struct mbuf *mp1, *mp2, *mp3;
689         struct vnode *vp = NULL;
690         struct mount *mp = NULL;
691         struct vattr attr;
692         nfsfh_t nfh;
693         fhandle_t *fhp;
694         struct uio io, *uiop = &io;
695         struct nfsm_info info;
696
697         info.mrep = nfsd->nd_mrep;
698         info.mreq = NULL;
699         info.md = nfsd->nd_md;
700         info.dpos = nfsd->nd_dpos;
701         info.v3 = (nfsd->nd_flag & ND_NFSV3);
702
703         bzero(&io, sizeof(struct uio));
704
705         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
706 #ifndef nolint
707         mp2 = NULL;
708 #endif
709         mp3 = NULL;
710         fhp = &nfh.fh_generic;
711         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
712         len = 0;
713         i = 0;
714         while (len < NFS_MAXPATHLEN) {
715                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
716                 mp1->m_len = MCLBYTES;
717                 if (len == 0)
718                         mp3 = mp2 = mp1;
719                 else {
720                         mp2->m_next = mp1;
721                         mp2 = mp1;
722                 }
723                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
724                         mp1->m_len = NFS_MAXPATHLEN-len;
725                         len = NFS_MAXPATHLEN;
726                 } else
727                         len += mp1->m_len;
728                 ivp->iov_base = mtod(mp1, caddr_t);
729                 ivp->iov_len = mp1->m_len;
730                 i++;
731                 ivp++;
732         }
733         uiop->uio_iov = iv;
734         uiop->uio_iovcnt = i;
735         uiop->uio_offset = 0;
736         uiop->uio_resid = len;
737         uiop->uio_rw = UIO_READ;
738         uiop->uio_segflg = UIO_SYSSPACE;
739         uiop->uio_td = NULL;
740         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
741                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
742         if (error) {
743                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
744                                       2 * NFSX_UNSIGNED, &error));
745                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
746                 error = 0;
747                 goto nfsmout;
748         }
749         if (vp->v_type != VLNK) {
750                 if (info.v3)
751                         error = EINVAL;
752                 else
753                         error = ENXIO;
754                 goto out;
755         }
756         error = VOP_READLINK(vp, uiop, cred);
757 out:
758         getret = VOP_GETATTR(vp, &attr);
759         vput(vp);
760         vp = NULL;
761         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
762                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
763                              &error));
764         if (info.v3) {
765                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
766                 if (error) {
767                         error = 0;
768                         goto nfsmout;
769                 }
770         }
771         if (uiop->uio_resid > 0) {
772                 len -= uiop->uio_resid;
773                 tlen = nfsm_rndup(len);
774                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
775         }
776         tl = nfsm_build(&info, NFSX_UNSIGNED);
777         *tl = txdr_unsigned(len);
778         info.mb->m_next = mp3;
779         mp3 = NULL;
780 nfsmout:
781         *mrq = info.mreq;
782         if (mp3)
783                 m_freem(mp3);
784         if (vp)
785                 vput(vp);
786         return(error);
787 }
788
789 /*
790  * nfs read service
791  */
792 int
793 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
794            struct thread *td, struct mbuf **mrq)
795 {
796         struct nfsm_info info;
797         struct sockaddr *nam = nfsd->nd_nam;
798         struct ucred *cred = &nfsd->nd_cr;
799         struct iovec *iv;
800         struct iovec *iv2;
801         struct mbuf *m;
802         struct nfs_fattr *fp;
803         u_int32_t *tl;
804         int i;
805         int reqlen;
806         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
807         struct mbuf *m2;
808         struct vnode *vp = NULL;
809         struct mount *mp = NULL;
810         nfsfh_t nfh;
811         fhandle_t *fhp;
812         struct uio io, *uiop = &io;
813         struct vattr va, *vap = &va;
814         struct nfsheur *nh;
815         off_t off;
816         int ioflag = 0;
817
818         info.mrep = nfsd->nd_mrep;
819         info.mreq = NULL;
820         info.md = nfsd->nd_md;
821         info.dpos = nfsd->nd_dpos;
822         info.v3 = (nfsd->nd_flag & ND_NFSV3);
823
824         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
825         fhp = &nfh.fh_generic;
826         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
827         if (info.v3) {
828                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
829                 off = fxdr_hyper(tl);
830         } else {
831                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
832                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
833         }
834         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
835                                             NFS_SRVMAXDATA(nfsd), &error));
836
837         /*
838          * Reference vp.  If an error occurs, vp will be invalid, but we
839          * have to NULL it just in case.  The macros might goto nfsmout
840          * as well.
841          */
842
843         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
844                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
845         if (error) {
846                 vp = NULL;
847                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
848                                       2 * NFSX_UNSIGNED, &error));
849                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
850                 error = 0;
851                 goto nfsmout;
852         }
853
854         if (vp->v_type != VREG) {
855                 if (info.v3)
856                         error = EINVAL;
857                 else
858                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
859         }
860         if (!error) {
861             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
862                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
863         }
864         getret = VOP_GETATTR(vp, vap);
865         if (!error)
866                 error = getret;
867         if (error) {
868                 vput(vp);
869                 vp = NULL;
870                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
871                                       NFSX_POSTOPATTR(info.v3), &error));
872                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
873                 error = 0;
874                 goto nfsmout;
875         }
876
877         /*
878          * Calculate byte count to read
879          */
880
881         if (off >= vap->va_size)
882                 cnt = 0;
883         else if ((off + reqlen) > vap->va_size)
884                 cnt = vap->va_size - off;
885         else
886                 cnt = reqlen;
887
888         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
889                               NFSX_POSTOPORFATTR(info.v3) +
890                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
891                               &error));
892         if (info.v3) {
893                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
894                 *tl++ = nfs_true;
895                 fp = (struct nfs_fattr *)tl;
896                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
897         } else {
898                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
899                 fp = (struct nfs_fattr *)tl;
900                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
901         }
902         len = left = nfsm_rndup(cnt);
903         if (cnt > 0) {
904                 /*
905                  * Generate the mbuf list with the uio_iov ref. to it.
906                  */
907                 i = 0;
908                 m = m2 = info.mb;
909                 while (left > 0) {
910                         siz = min(M_TRAILINGSPACE(m), left);
911                         if (siz > 0) {
912                                 left -= siz;
913                                 i++;
914                         }
915                         if (left > 0) {
916                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
917                                 m->m_len = 0;
918                                 m2->m_next = m;
919                                 m2 = m;
920                         }
921                 }
922                 iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
923                 uiop->uio_iov = iv2 = iv;
924                 m = info.mb;
925                 left = len;
926                 i = 0;
927                 while (left > 0) {
928                         if (m == NULL)
929                                 panic("nfsrv_read iov");
930                         siz = min(M_TRAILINGSPACE(m), left);
931                         if (siz > 0) {
932                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
933                                 iv->iov_len = siz;
934                                 m->m_len += siz;
935                                 left -= siz;
936                                 iv++;
937                                 i++;
938                         }
939                         m = m->m_next;
940                 }
941                 uiop->uio_iovcnt = i;
942                 uiop->uio_offset = off;
943                 uiop->uio_resid = len;
944                 uiop->uio_rw = UIO_READ;
945                 uiop->uio_segflg = UIO_SYSSPACE;
946                 nh = nfsrv_sequential_heuristic(uiop, vp, 0);
947                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
948                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
949                 if (error == 0) {
950                         off = uiop->uio_offset;
951                         nh->nh_nextoff = off;
952                 }
953                 kfree((caddr_t)iv2, M_TEMP);
954                 if (error || (getret = VOP_GETATTR(vp, vap))) {
955                         if (!error)
956                                 error = getret;
957                         m_freem(info.mreq);
958                         info.mreq = NULL;
959                         vput(vp);
960                         vp = NULL;
961                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
962                                               NFSX_POSTOPATTR(info.v3),
963                                               &error));
964                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
965                         error = 0;
966                         goto nfsmout;
967                 }
968         } else {
969                 uiop->uio_resid = 0;
970         }
971         vput(vp);
972         vp = NULL;
973         nfsm_srvfattr(nfsd, vap, fp);
974         tlen = len - uiop->uio_resid;
975         cnt = cnt < tlen ? cnt : tlen;
976         tlen = nfsm_rndup(cnt);
977         if (len != tlen || tlen != cnt)
978                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
979         if (info.v3) {
980                 *tl++ = txdr_unsigned(cnt);
981                 if (cnt < reqlen)
982                         *tl++ = nfs_true;
983                 else
984                         *tl++ = nfs_false;
985         }
986         *tl = txdr_unsigned(cnt);
987 nfsmout:
988         *mrq = info.mreq;
989         if (vp)
990                 vput(vp);
991         return(error);
992 }
993
994 /*
995  * nfs write service
996  */
997 int
998 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
999             struct thread *td, struct mbuf **mrq)
1000 {
1001         struct sockaddr *nam = nfsd->nd_nam;
1002         struct ucred *cred = &nfsd->nd_cr;
1003         struct iovec *ivp;
1004         int i, cnt;
1005         struct mbuf *mp1;
1006         struct nfs_fattr *fp;
1007         struct iovec *iv;
1008         struct vattr va, forat;
1009         struct vattr *vap = &va;
1010         u_int32_t *tl;
1011         int error = 0, rdonly, len, forat_ret = 1;
1012         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1013         int stable = NFSV3WRITE_FILESYNC;
1014         struct vnode *vp = NULL;
1015         struct mount *mp = NULL;
1016         struct nfsheur *nh;
1017         nfsfh_t nfh;
1018         fhandle_t *fhp;
1019         struct uio io, *uiop = &io;
1020         struct nfsm_info info;
1021         off_t off;
1022
1023         info.mrep = nfsd->nd_mrep;
1024         info.mreq = NULL;
1025         info.md = nfsd->nd_md;
1026         info.dpos = nfsd->nd_dpos;
1027         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1028
1029         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1030         if (info.mrep == NULL) {
1031                 error = 0;
1032                 goto nfsmout;
1033         }
1034         fhp = &nfh.fh_generic;
1035         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1036         if (info.v3) {
1037                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1038                 off = fxdr_hyper(tl);
1039                 tl += 3;
1040                 stable = fxdr_unsigned(int, *tl++);
1041         } else {
1042                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1043                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1044                 tl += 2;
1045                 if (nfs_async)
1046                         stable = NFSV3WRITE_UNSTABLE;
1047         }
1048         retlen = len = fxdr_unsigned(int32_t, *tl);
1049         cnt = i = 0;
1050
1051         /*
1052          * For NFS Version 2, it is not obvious what a write of zero length
1053          * should do, but I might as well be consistent with Version 3,
1054          * which is to return ok so long as there are no permission problems.
1055          */
1056         if (len > 0) {
1057             zeroing = 1;
1058             mp1 = info.mrep;
1059             while (mp1) {
1060                 if (mp1 == info.md) {
1061                         zeroing = 0;
1062                         adjust = info.dpos - mtod(mp1, caddr_t);
1063                         mp1->m_len -= adjust;
1064                         if (mp1->m_len > 0 && adjust > 0)
1065                                 mp1->m_data += adjust;
1066                 }
1067                 if (zeroing)
1068                         mp1->m_len = 0;
1069                 else if (mp1->m_len > 0) {
1070                         i += mp1->m_len;
1071                         if (i > len) {
1072                                 mp1->m_len -= (i - len);
1073                                 zeroing = 1;
1074                         }
1075                         if (mp1->m_len > 0)
1076                                 cnt++;
1077                 }
1078                 mp1 = mp1->m_next;
1079             }
1080         }
1081         if (len > NFS_MAXDATA || len < 0 || i < len) {
1082                 error = EIO;
1083                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1084                                       2 * NFSX_UNSIGNED, &error));
1085                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1086                                  aftat_ret, vap);
1087                 error = 0;
1088                 goto nfsmout;
1089         }
1090         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1091                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1092         if (error) {
1093                 vp = NULL;
1094                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1095                                       2 * NFSX_UNSIGNED, &error));
1096                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1097                                  aftat_ret, vap);
1098                 error = 0;
1099                 goto nfsmout;
1100         }
1101         if (info.v3)
1102                 forat_ret = VOP_GETATTR(vp, &forat);
1103         if (vp->v_type != VREG) {
1104                 if (info.v3)
1105                         error = EINVAL;
1106                 else
1107                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1108         }
1109         if (!error) {
1110                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1111         }
1112         if (error) {
1113                 vput(vp);
1114                 vp = NULL;
1115                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1116                                       NFSX_WCCDATA(info.v3), &error));
1117                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1118                                  aftat_ret, vap);
1119                 error = 0;
1120                 goto nfsmout;
1121         }
1122
1123         if (len > 0) {
1124             ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1125             uiop->uio_iov = iv = ivp;
1126             uiop->uio_iovcnt = cnt;
1127             mp1 = info.mrep;
1128             while (mp1) {
1129                 if (mp1->m_len > 0) {
1130                         ivp->iov_base = mtod(mp1, caddr_t);
1131                         ivp->iov_len = mp1->m_len;
1132                         ivp++;
1133                 }
1134                 mp1 = mp1->m_next;
1135             }
1136
1137             /*
1138              * XXX
1139              * The IO_METASYNC flag indicates that all metadata (and not just
1140              * enough to ensure data integrity) mus be written to stable storage
1141              * synchronously.
1142              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1143              */
1144             if (stable == NFSV3WRITE_UNSTABLE)
1145                 ioflags = IO_NODELOCKED;
1146             else if (stable == NFSV3WRITE_DATASYNC)
1147                 ioflags = (IO_SYNC | IO_NODELOCKED);
1148             else
1149                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1150             uiop->uio_resid = len;
1151             uiop->uio_rw = UIO_WRITE;
1152             uiop->uio_segflg = UIO_SYSSPACE;
1153             uiop->uio_td = NULL;
1154             uiop->uio_offset = off;
1155             nh = nfsrv_sequential_heuristic(uiop, vp, 1);
1156             ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1157             error = VOP_WRITE(vp, uiop, ioflags, cred);
1158             if (error == 0)
1159                 nh->nh_nextoff = uiop->uio_offset;
1160             nfsstats.srvvop_writes++;
1161             kfree((caddr_t)iv, M_TEMP);
1162         }
1163         aftat_ret = VOP_GETATTR(vp, vap);
1164         vput(vp);
1165         vp = NULL;
1166         if (!error)
1167                 error = aftat_ret;
1168         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1169                               NFSX_PREOPATTR(info.v3) +
1170                               NFSX_POSTOPORFATTR(info.v3) +
1171                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1172                               &error));
1173         if (info.v3) {
1174                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1175                                  aftat_ret, vap);
1176                 if (error) {
1177                         error = 0;
1178                         goto nfsmout;
1179                 }
1180                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1181                 *tl++ = txdr_unsigned(retlen);
1182                 /*
1183                  * If nfs_async is set, then pretend the write was FILESYNC.
1184                  */
1185                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1186                         *tl++ = txdr_unsigned(stable);
1187                 else
1188                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1189                 /*
1190                  * Actually, there is no need to txdr these fields,
1191                  * but it may make the values more human readable,
1192                  * for debugging purposes.
1193                  */
1194                 if (nfsver.tv_sec == 0)
1195                         nfsver = boottime;
1196                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1197                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1198         } else {
1199                 fp = nfsm_build(&info, NFSX_V2FATTR);
1200                 nfsm_srvfattr(nfsd, vap, fp);
1201         }
1202 nfsmout:
1203         *mrq = info.mreq;
1204         if (vp)
1205                 vput(vp);
1206         return(error);
1207 }
1208
1209 /*
1210  * NFS write service with write gathering support. Called when
1211  * nfsrvw_procrastinate > 0.
1212  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1213  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1214  * Jan. 1994.
1215  */
1216 int
1217 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1218                   struct thread *td, struct mbuf **mrq)
1219 {
1220         struct iovec *ivp;
1221         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1222         struct nfs_fattr *fp;
1223         int i;
1224         struct iovec *iov;
1225         struct nfsrvw_delayhash *wpp;
1226         struct ucred *cred;
1227         struct vattr va, forat;
1228         u_int32_t *tl;
1229         int error = 0, rdonly, len, forat_ret = 1;
1230         int ioflags, aftat_ret = 1, adjust, zeroing;
1231         struct mbuf *mp1;
1232         struct vnode *vp = NULL;
1233         struct mount *mp = NULL;
1234         struct uio io, *uiop = &io;
1235         u_quad_t cur_usec;
1236         struct nfsm_info info;
1237
1238         info.mreq = NULL;
1239
1240         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1241 #ifndef nolint
1242         i = 0;
1243         len = 0;
1244 #endif
1245         if (*ndp) {
1246             nfsd = *ndp;
1247             *ndp = NULL;
1248             info.mrep = nfsd->nd_mrep;
1249             info.mreq = NULL;
1250             info.md = nfsd->nd_md;
1251             info.dpos = nfsd->nd_dpos;
1252             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1253             cred = &nfsd->nd_cr;
1254             LIST_INIT(&nfsd->nd_coalesce);
1255             nfsd->nd_mreq = NULL;
1256             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1257             cur_usec = nfs_curusec();
1258             nfsd->nd_time = cur_usec +
1259                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1260     
1261             /*
1262              * Now, get the write header..
1263              */
1264             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1265             if (info.v3) {
1266                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1267                 nfsd->nd_off = fxdr_hyper(tl);
1268                 tl += 3;
1269                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1270             } else {
1271                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1272                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1273                 tl += 2;
1274                 if (nfs_async)
1275                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1276             }
1277             len = fxdr_unsigned(int32_t, *tl);
1278             nfsd->nd_len = len;
1279             nfsd->nd_eoff = nfsd->nd_off + len;
1280     
1281             /*
1282              * Trim the header out of the mbuf list and trim off any trailing
1283              * junk so that the mbuf list has only the write data.
1284              */
1285             zeroing = 1;
1286             i = 0;
1287             mp1 = info.mrep;
1288             while (mp1) {
1289                 if (mp1 == info.md) {
1290                     zeroing = 0;
1291                     adjust = info.dpos - mtod(mp1, caddr_t);
1292                     mp1->m_len -= adjust;
1293                     if (mp1->m_len > 0 && adjust > 0)
1294                         mp1->m_data += adjust;
1295                 }
1296                 if (zeroing)
1297                     mp1->m_len = 0;
1298                 else {
1299                     i += mp1->m_len;
1300                     if (i > len) {
1301                         mp1->m_len -= (i - len);
1302                         zeroing = 1;
1303                     }
1304                 }
1305                 mp1 = mp1->m_next;
1306             }
1307             if (len > NFS_MAXDATA || len < 0  || i < len) {
1308 nfsmout:
1309                 m_freem(info.mrep);
1310                 info.mrep = NULL;
1311                 error = EIO;
1312                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1313                 if (info.v3) {
1314                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1315                                      aftat_ret, &va);
1316                 }
1317                 nfsd->nd_mreq = info.mreq;
1318                 nfsd->nd_mrep = NULL;
1319                 nfsd->nd_time = 0;
1320             }
1321     
1322             /*
1323              * Add this entry to the hash and time queues.
1324              */
1325             owp = NULL;
1326             wp = slp->ns_tq.lh_first;
1327             while (wp && wp->nd_time < nfsd->nd_time) {
1328                 owp = wp;
1329                 wp = wp->nd_tq.le_next;
1330             }
1331             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1332             if (owp) {
1333                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1334             } else {
1335                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1336             }
1337             if (nfsd->nd_mrep) {
1338                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1339                 owp = NULL;
1340                 wp = wpp->lh_first;
1341                 while (wp &&
1342                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1343                     owp = wp;
1344                     wp = wp->nd_hash.le_next;
1345                 }
1346                 while (wp && wp->nd_off < nfsd->nd_off &&
1347                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1348                     owp = wp;
1349                     wp = wp->nd_hash.le_next;
1350                 }
1351                 if (owp) {
1352                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1353
1354                     /*
1355                      * Search the hash list for overlapping entries and
1356                      * coalesce.
1357                      */
1358                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1359                         wp = nfsd->nd_hash.le_next;
1360                         if (NFSW_SAMECRED(owp, nfsd))
1361                             nfsrvw_coalesce(owp, nfsd);
1362                     }
1363                 } else {
1364                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1365                 }
1366             }
1367         }
1368     
1369         /*
1370          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1371          * and generate the associated reply mbuf list(s).
1372          */
1373 loop1:
1374         cur_usec = nfs_curusec();
1375         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1376                 owp = nfsd->nd_tq.le_next;
1377                 if (nfsd->nd_time > cur_usec)
1378                     break;
1379                 if (nfsd->nd_mreq)
1380                     continue;
1381                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1382                 LIST_REMOVE(nfsd, nd_tq);
1383                 LIST_REMOVE(nfsd, nd_hash);
1384                 info.mrep = nfsd->nd_mrep;
1385                 info.mreq = NULL;
1386                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1387                 nfsd->nd_mrep = NULL;
1388                 cred = &nfsd->nd_cr;
1389                 forat_ret = aftat_ret = 1;
1390                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1391                                      nfsd->nd_nam, &rdonly,
1392                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1393                 if (!error) {
1394                     if (info.v3)
1395                         forat_ret = VOP_GETATTR(vp, &forat);
1396                     if (vp->v_type != VREG) {
1397                         if (info.v3)
1398                             error = EINVAL;
1399                         else
1400                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1401                     }
1402                 } else {
1403                     vp = NULL;
1404                 }
1405                 if (!error) {
1406                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1407                 }
1408     
1409                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1410                     ioflags = IO_NODELOCKED;
1411                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1412                     ioflags = (IO_SYNC | IO_NODELOCKED);
1413                 else
1414                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1415                 uiop->uio_rw = UIO_WRITE;
1416                 uiop->uio_segflg = UIO_SYSSPACE;
1417                 uiop->uio_td = NULL;
1418                 uiop->uio_offset = nfsd->nd_off;
1419                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1420                 if (uiop->uio_resid > 0) {
1421                     mp1 = info.mrep;
1422                     i = 0;
1423                     while (mp1) {
1424                         if (mp1->m_len > 0)
1425                             i++;
1426                         mp1 = mp1->m_next;
1427                     }
1428                     uiop->uio_iovcnt = i;
1429                     iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1430                     uiop->uio_iov = ivp = iov;
1431                     mp1 = info.mrep;
1432                     while (mp1) {
1433                         if (mp1->m_len > 0) {
1434                             ivp->iov_base = mtod(mp1, caddr_t);
1435                             ivp->iov_len = mp1->m_len;
1436                             ivp++;
1437                         }
1438                         mp1 = mp1->m_next;
1439                     }
1440                     if (!error) {
1441                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1442                         nfsstats.srvvop_writes++;
1443                     }
1444                     kfree((caddr_t)iov, M_TEMP);
1445                 }
1446                 m_freem(info.mrep);
1447                 info.mrep = NULL;
1448                 if (vp) {
1449                     aftat_ret = VOP_GETATTR(vp, &va);
1450                     vput(vp);
1451                     vp = NULL;
1452                 }
1453
1454                 /*
1455                  * Loop around generating replies for all write rpcs that have
1456                  * now been completed.
1457                  */
1458                 swp = nfsd;
1459                 do {
1460                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1461                     if (error) {
1462                         nfsm_writereply(&info, nfsd, slp, error,
1463                                         NFSX_WCCDATA(info.v3));
1464                         if (info.v3) {
1465                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1466                                              aftat_ret, &va);
1467                         }
1468                     } else {
1469                         nfsm_writereply(&info, nfsd, slp, error,
1470                                         NFSX_PREOPATTR(info.v3) +
1471                                         NFSX_POSTOPORFATTR(info.v3) +
1472                                         2 * NFSX_UNSIGNED +
1473                                         NFSX_WRITEVERF(info.v3));
1474                         if (info.v3) {
1475                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1476                                              aftat_ret, &va);
1477                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1478                             *tl++ = txdr_unsigned(nfsd->nd_len);
1479                             *tl++ = txdr_unsigned(swp->nd_stable);
1480                             /*
1481                              * Actually, there is no need to txdr these fields,
1482                              * but it may make the values more human readable,
1483                              * for debugging purposes.
1484                              */
1485                             if (nfsver.tv_sec == 0)
1486                                     nfsver = boottime;
1487                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1488                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1489                         } else {
1490                             fp = nfsm_build(&info, NFSX_V2FATTR);
1491                             nfsm_srvfattr(nfsd, &va, fp);
1492                         }
1493                     }
1494                     nfsd->nd_mreq = info.mreq;
1495                     if (nfsd->nd_mrep)
1496                         panic("nfsrv_write: nd_mrep not free");
1497
1498                     /*
1499                      * Done. Put it at the head of the timer queue so that
1500                      * the final phase can return the reply.
1501                      */
1502                     if (nfsd != swp) {
1503                         nfsd->nd_time = 0;
1504                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1505                     }
1506                     nfsd = swp->nd_coalesce.lh_first;
1507                     if (nfsd) {
1508                         LIST_REMOVE(nfsd, nd_tq);
1509                     }
1510                 } while (nfsd);
1511                 swp->nd_time = 0;
1512                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1513                 goto loop1;
1514         }
1515
1516         /*
1517          * Search for a reply to return.
1518          */
1519         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1520                 if (nfsd->nd_mreq) {
1521                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1522                     LIST_REMOVE(nfsd, nd_tq);
1523                     break;
1524                 }
1525         }
1526         if (nfsd) {
1527                 *ndp = nfsd;
1528                 *mrq = nfsd->nd_mreq;
1529         } else {
1530                 *ndp = NULL;
1531                 *mrq = NULL;
1532         }
1533         return (0);
1534 }
1535
1536 /*
1537  * Coalesce the write request nfsd into owp. To do this we must:
1538  * - remove nfsd from the queues
1539  * - merge nfsd->nd_mrep into owp->nd_mrep
1540  * - update the nd_eoff and nd_stable for owp
1541  * - put nfsd on owp's nd_coalesce list
1542  * NB: Must be called at splsoftclock().
1543  */
1544 static void
1545 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1546 {
1547         int overlap;
1548         struct mbuf *mp1;
1549         struct nfsrv_descript *p;
1550
1551         NFS_DPF(WG, ("C%03x-%03x",
1552                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1553         LIST_REMOVE(nfsd, nd_hash);
1554         LIST_REMOVE(nfsd, nd_tq);
1555         if (owp->nd_eoff < nfsd->nd_eoff) {
1556             overlap = owp->nd_eoff - nfsd->nd_off;
1557             if (overlap < 0)
1558                 panic("nfsrv_coalesce: bad off");
1559             if (overlap > 0)
1560                 m_adj(nfsd->nd_mrep, overlap);
1561             mp1 = owp->nd_mrep;
1562             while (mp1->m_next)
1563                 mp1 = mp1->m_next;
1564             mp1->m_next = nfsd->nd_mrep;
1565             owp->nd_eoff = nfsd->nd_eoff;
1566         } else
1567             m_freem(nfsd->nd_mrep);
1568         nfsd->nd_mrep = NULL;
1569         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1570             owp->nd_stable = NFSV3WRITE_FILESYNC;
1571         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1572             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1573             owp->nd_stable = NFSV3WRITE_DATASYNC;
1574         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1575
1576         /*
1577          * If nfsd had anything else coalesced into it, transfer them
1578          * to owp, otherwise their replies will never get sent.
1579          */
1580         for (p = nfsd->nd_coalesce.lh_first; p;
1581              p = nfsd->nd_coalesce.lh_first) {
1582             LIST_REMOVE(p, nd_tq);
1583             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1584         }
1585 }
1586
1587 /*
1588  * nfs create service
1589  * now does a truncate to 0 length via. setattr if it already exists
1590  */
1591 int
1592 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1593              struct thread *td, struct mbuf **mrq)
1594 {
1595         struct sockaddr *nam = nfsd->nd_nam;
1596         struct ucred *cred = &nfsd->nd_cr;
1597         struct nfs_fattr *fp;
1598         struct vattr va, dirfor, diraft;
1599         struct vattr *vap = &va;
1600         struct nfsv2_sattr *sp;
1601         u_int32_t *tl;
1602         struct nlookupdata nd;
1603         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1604         udev_t rdev = NOUDEV;
1605         caddr_t cp;
1606         int how, exclusive_flag = 0;
1607         struct vnode *dirp;
1608         struct vnode *dvp;
1609         struct vnode *vp;
1610         struct mount *mp;
1611         nfsfh_t nfh;
1612         fhandle_t *fhp;
1613         u_quad_t tempsize;
1614         u_char cverf[NFSX_V3CREATEVERF];
1615         struct nfsm_info info;
1616
1617         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1618         nlookup_zero(&nd);
1619         dirp = NULL;
1620         dvp = NULL;
1621         vp = NULL;
1622
1623         info.mrep = nfsd->nd_mrep;
1624         info.mreq = NULL;
1625         info.md = nfsd->nd_md;
1626         info.dpos = nfsd->nd_dpos;
1627         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1628
1629         fhp = &nfh.fh_generic;
1630         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1631         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1632
1633         /*
1634          * Call namei and do initial cleanup to get a few things
1635          * out of the way.  If we get an initial error we cleanup
1636          * and return here to avoid special-casing the invalid nd
1637          * structure through the rest of the case.  dirp may be
1638          * set even if an error occurs, but the nd structure will not
1639          * be valid at all if an error occurs so we have to invalidate it
1640          * prior to calling nfsm_reply ( which might goto nfsmout ).
1641          */
1642         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1643                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1644                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1645         mp = vfs_getvfs(&fhp->fh_fsid);
1646
1647         if (dirp) {
1648                 if (info.v3) {
1649                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1650                 } else {
1651                         vrele(dirp);
1652                         dirp = NULL;
1653                 }
1654         }
1655         if (error) {
1656                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1657                                       NFSX_WCCDATA(info.v3), &error));
1658                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1659                                  diraft_ret, &diraft);
1660                 error = 0;
1661                 goto nfsmout;
1662         }
1663
1664         /*
1665          * No error.  Continue.  State:
1666          *
1667          *      dirp            may be valid
1668          *      vp              may be valid or NULL if the target does not
1669          *                      exist.
1670          *      dvp             is valid
1671          *
1672          * The error state is set through the code and we may also do some
1673          * opportunistic releasing of vnodes to avoid holding locks through
1674          * NFS I/O.  The cleanup at the end is a catch-all
1675          */
1676
1677         VATTR_NULL(vap);
1678         if (info.v3) {
1679                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1680                 how = fxdr_unsigned(int, *tl);
1681                 switch (how) {
1682                 case NFSV3CREATE_GUARDED:
1683                         if (vp) {
1684                                 error = EEXIST;
1685                                 break;
1686                         }
1687                         /* fall through */
1688                 case NFSV3CREATE_UNCHECKED:
1689                         ERROROUT(nfsm_srvsattr(&info, vap));
1690                         break;
1691                 case NFSV3CREATE_EXCLUSIVE:
1692                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1693                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1694                         exclusive_flag = 1;
1695                         break;
1696                 };
1697                 vap->va_type = VREG;
1698         } else {
1699                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1700                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1701                 if (vap->va_type == VNON)
1702                         vap->va_type = VREG;
1703                 vap->va_mode = nfstov_mode(sp->sa_mode);
1704                 switch (vap->va_type) {
1705                 case VREG:
1706                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1707                         if (tsize != -1)
1708                                 vap->va_size = (u_quad_t)tsize;
1709                         break;
1710                 case VCHR:
1711                 case VBLK:
1712                 case VFIFO:
1713                         rdev = fxdr_unsigned(long, sp->sa_size);
1714                         break;
1715                 default:
1716                         break;
1717                 };
1718         }
1719
1720         /*
1721          * Iff doesn't exist, create it
1722          * otherwise just truncate to 0 length
1723          *   should I set the mode too ?
1724          *
1725          * The only possible error we can have at this point is EEXIST. 
1726          * nd.ni_vp will also be non-NULL in that case.
1727          */
1728         if (vp == NULL) {
1729                 if (vap->va_mode == (mode_t)VNOVAL)
1730                         vap->va_mode = 0;
1731                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1732                         vn_unlock(dvp);
1733                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1734                                             nd.nl_cred, vap);
1735                         vrele(dvp);
1736                         dvp = NULL;
1737                         if (error == 0) {
1738                                 if (exclusive_flag) {
1739                                         exclusive_flag = 0;
1740                                         VATTR_NULL(vap);
1741                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1742                                                 NFSX_V3CREATEVERF);
1743                                         error = VOP_SETATTR(vp, vap, cred);
1744                                 }
1745                         }
1746                 } else if (
1747                         vap->va_type == VCHR || 
1748                         vap->va_type == VBLK ||
1749                         vap->va_type == VFIFO
1750                 ) {
1751                         /*
1752                          * Handle SysV FIFO node special cases.  All other
1753                          * devices require super user to access.
1754                          */
1755                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1756                                 vap->va_type = VFIFO;
1757                         if (vap->va_type != VFIFO &&
1758                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1759                                 goto nfsmreply0;
1760                         }
1761                         vap->va_rmajor = umajor(rdev);
1762                         vap->va_rminor = uminor(rdev);
1763
1764                         vn_unlock(dvp);
1765                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1766                         vrele(dvp);
1767                         dvp = NULL;
1768                         if (error)
1769                                 goto nfsmreply0;
1770 #if 0
1771                         /*
1772                          * XXX what is this junk supposed to do ?
1773                          */
1774
1775                         vput(vp);
1776                         vp = NULL;
1777
1778                         /*
1779                          * release dvp prior to lookup
1780                          */
1781                         vput(dvp);
1782                         dvp = NULL;
1783
1784                         /*
1785                          * Setup for lookup. 
1786                          *
1787                          * Even though LOCKPARENT was cleared, ni_dvp may
1788                          * be garbage. 
1789                          */
1790                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1791                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1792                         nd.ni_cnd.cn_td = td;
1793                         nd.ni_cnd.cn_cred = cred;
1794
1795                         error = lookup(&nd);
1796                         nd.ni_dvp = NULL;
1797
1798                         if (error != 0) {
1799                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1800                                                       0, &error));
1801                                 /* fall through on certain errors */
1802                         }
1803                         nfsrv_object_create(nd.ni_vp);
1804                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1805                                 error = EINVAL;
1806                                 goto nfsmreply0;
1807                         }
1808 #endif
1809                 } else {
1810                         error = ENXIO;
1811                 }
1812         } else {
1813                 if (vap->va_size != -1) {
1814                         error = nfsrv_access(mp, vp, VWRITE, cred,
1815                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1816                         if (!error) {
1817                                 tempsize = vap->va_size;
1818                                 VATTR_NULL(vap);
1819                                 vap->va_size = tempsize;
1820                                 error = VOP_SETATTR(vp, vap, cred);
1821                         }
1822                 }
1823         }
1824
1825         if (!error) {
1826                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1827                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1828                 if (!error)
1829                         error = VOP_GETATTR(vp, vap);
1830         }
1831         if (info.v3) {
1832                 if (exclusive_flag && !error &&
1833                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1834                         error = EEXIST;
1835                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1836                 vrele(dirp);
1837                 dirp = NULL;
1838         }
1839         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1840                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1841                               NFSX_WCCDATA(info.v3),
1842                               &error));
1843         if (info.v3) {
1844                 if (!error) {
1845                         nfsm_srvpostop_fh(&info, fhp);
1846                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1847                 }
1848                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1849                                  diraft_ret, &diraft);
1850                 error = 0;
1851         } else {
1852                 nfsm_srvfhtom(&info, fhp);
1853                 fp = nfsm_build(&info, NFSX_V2FATTR);
1854                 nfsm_srvfattr(nfsd, vap, fp);
1855         }
1856         goto nfsmout;
1857
1858 nfsmreply0:
1859         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1860         error = 0;
1861         /* fall through */
1862
1863 nfsmout:
1864         *mrq = info.mreq;
1865         if (dirp)
1866                 vrele(dirp);
1867         nlookup_done(&nd);
1868         if (dvp) {
1869                 if (dvp == vp)
1870                         vrele(dvp);
1871                 else
1872                         vput(dvp);
1873         }
1874         if (vp)
1875                 vput(vp);
1876         return (error);
1877 }
1878
1879 /*
1880  * nfs v3 mknod service
1881  */
1882 int
1883 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1884             struct thread *td, struct mbuf **mrq)
1885 {
1886         struct sockaddr *nam = nfsd->nd_nam;
1887         struct ucred *cred = &nfsd->nd_cr;
1888         struct vattr va, dirfor, diraft;
1889         struct vattr *vap = &va;
1890         u_int32_t *tl;
1891         struct nlookupdata nd;
1892         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1893         enum vtype vtyp;
1894         struct vnode *dirp;
1895         struct vnode *dvp;
1896         struct vnode *vp;
1897         nfsfh_t nfh;
1898         fhandle_t *fhp;
1899         struct nfsm_info info;
1900
1901         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1902         nlookup_zero(&nd);
1903         dirp = NULL;
1904         dvp = NULL;
1905         vp = NULL;
1906
1907         info.mrep = nfsd->nd_mrep;
1908         info.mreq = NULL;
1909         info.md = nfsd->nd_md;
1910         info.dpos = nfsd->nd_dpos;
1911
1912         fhp = &nfh.fh_generic;
1913         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1914         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1915
1916         /*
1917          * Handle nfs_namei() call.  If an error occurs, the nd structure
1918          * is not valid.  However, nfsm_*() routines may still jump to
1919          * nfsmout.
1920          */
1921
1922         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1923                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1924                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1925         if (dirp)
1926                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1927         if (error) {
1928                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1929                            NFSX_WCCDATA(1), &error));
1930                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1931                                  diraft_ret, &diraft);
1932                 error = 0;
1933                 goto nfsmout;
1934         }
1935         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1936         vtyp = nfsv3tov_type(*tl);
1937         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1938                 error = NFSERR_BADTYPE;
1939                 goto out;
1940         }
1941         VATTR_NULL(vap);
1942         ERROROUT(nfsm_srvsattr(&info, vap));
1943         if (vtyp == VCHR || vtyp == VBLK) {
1944                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1945                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1946                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1947         }
1948
1949         /*
1950          * Iff doesn't exist, create it.
1951          */
1952         if (vp) {
1953                 error = EEXIST;
1954                 goto out;
1955         }
1956         vap->va_type = vtyp;
1957         if (vap->va_mode == (mode_t)VNOVAL)
1958                 vap->va_mode = 0;
1959         if (vtyp == VSOCK) {
1960                 vn_unlock(dvp);
1961                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1962                 vrele(dvp);
1963                 dvp = NULL;
1964         } else {
1965                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1966                         goto out;
1967
1968                 vn_unlock(dvp);
1969                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1970                 vrele(dvp);
1971                 dvp = NULL;
1972                 if (error)
1973                         goto out;
1974         }
1975
1976         /*
1977          * send response, cleanup, return.
1978          */
1979 out:
1980         nlookup_done(&nd);
1981         if (dvp) {
1982                 if (dvp == vp)
1983                         vrele(dvp);
1984                 else
1985                         vput(dvp);
1986                 dvp = NULL;
1987         }
1988         if (!error) {
1989                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1990                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1991                 if (!error)
1992                         error = VOP_GETATTR(vp, vap);
1993         }
1994         if (vp) {
1995                 vput(vp);
1996                 vp = NULL;
1997         }
1998         diraft_ret = VOP_GETATTR(dirp, &diraft);
1999         if (dirp) {
2000                 vrele(dirp);
2001                 dirp = NULL;
2002         }
2003         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2004                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
2005                               NFSX_WCCDATA(1), &error));
2006         if (!error) {
2007                 nfsm_srvpostop_fh(&info, fhp);
2008                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2009         }
2010         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2011                          diraft_ret, &diraft);
2012         *mrq = info.mreq;
2013         return (0);
2014 nfsmout:
2015         *mrq = info.mreq;
2016         if (dirp)
2017                 vrele(dirp);
2018         nlookup_done(&nd);
2019         if (dvp) {
2020                 if (dvp == vp)
2021                         vrele(dvp);
2022                 else
2023                         vput(dvp);
2024         }
2025         if (vp)
2026                 vput(vp);
2027         return (error);
2028 }
2029
2030 /*
2031  * nfs remove service
2032  */
2033 int
2034 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2035              struct thread *td, struct mbuf **mrq)
2036 {
2037         struct sockaddr *nam = nfsd->nd_nam;
2038         struct ucred *cred = &nfsd->nd_cr;
2039         struct nlookupdata nd;
2040         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2041         struct vnode *dirp;
2042         struct vnode *dvp;
2043         struct vnode *vp;
2044         struct vattr dirfor, diraft;
2045         nfsfh_t nfh;
2046         fhandle_t *fhp;
2047         struct nfsm_info info;
2048
2049         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2050         nlookup_zero(&nd);
2051         dirp = NULL;
2052         dvp = NULL;
2053         vp = NULL;
2054
2055         info.mrep = nfsd->nd_mrep;
2056         info.mreq = NULL;
2057         info.md = nfsd->nd_md;
2058         info.dpos = nfsd->nd_dpos;
2059         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2060
2061         fhp = &nfh.fh_generic;
2062         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2063         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2064
2065         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2066                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2067                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2068         if (dirp) {
2069                 if (info.v3)
2070                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2071         }
2072         if (error == 0) {
2073                 if (vp->v_type == VDIR) {
2074                         error = EPERM;          /* POSIX */
2075                         goto out;
2076                 }
2077                 /*
2078                  * The root of a mounted filesystem cannot be deleted.
2079                  */
2080                 if (vp->v_flag & VROOT) {
2081                         error = EBUSY;
2082                         goto out;
2083                 }
2084 out:
2085                 if (!error) {
2086                         if (dvp != vp)
2087                                 vn_unlock(dvp);
2088                         if (vp) {
2089                                 vput(vp);
2090                                 vp = NULL;
2091                         }
2092                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2093                         vrele(dvp);
2094                         dvp = NULL;
2095                 }
2096         }
2097         if (dirp && info.v3)
2098                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2099         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2100         if (info.v3) {
2101                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2102                                  diraft_ret, &diraft);
2103                 error = 0;
2104         }
2105 nfsmout:
2106         *mrq = info.mreq;
2107         nlookup_done(&nd);
2108         if (dirp)
2109                 vrele(dirp);
2110         if (dvp) {
2111                 if (dvp == vp)
2112                         vrele(dvp);
2113                 else
2114                         vput(dvp);
2115         }
2116         if (vp)
2117                 vput(vp);
2118         return(error);
2119 }
2120
2121 /*
2122  * nfs rename service
2123  */
2124 int
2125 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2126              struct thread *td, struct mbuf **mrq)
2127 {
2128         struct sockaddr *nam = nfsd->nd_nam;
2129         struct ucred *cred = &nfsd->nd_cr;
2130         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2131         int tdirfor_ret = 1, tdiraft_ret = 1;
2132         struct nlookupdata fromnd, tond;
2133         struct vnode *fvp, *fdirp, *fdvp;
2134         struct vnode *tvp, *tdirp, *tdvp;
2135         struct namecache *ncp;
2136         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2137         nfsfh_t fnfh, tnfh;
2138         fhandle_t *ffhp, *tfhp;
2139         uid_t saved_uid;
2140         struct nfsm_info info;
2141
2142         info.mrep = nfsd->nd_mrep;
2143         info.mreq = NULL;
2144         info.md = nfsd->nd_md;
2145         info.dpos = nfsd->nd_dpos;
2146         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2147
2148         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2149 #ifndef nolint
2150         fvp = NULL;
2151 #endif
2152         ffhp = &fnfh.fh_generic;
2153         tfhp = &tnfh.fh_generic;
2154
2155         /*
2156          * Clear fields incase goto nfsmout occurs from macro.
2157          */
2158
2159         nlookup_zero(&fromnd);
2160         nlookup_zero(&tond);
2161         fdirp = NULL;
2162         tdirp = NULL;
2163
2164         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2165         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2166
2167         /*
2168          * Remember our original uid so that we can reset cr_uid before
2169          * the second nfs_namei() call, in case it is remapped.
2170          */
2171         saved_uid = cred->cr_uid;
2172         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2173                           NULL, NULL,
2174                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2175                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2176         if (fdirp) {
2177                 if (info.v3)
2178                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2179         }
2180         if (error) {
2181                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2182                                       2 * NFSX_WCCDATA(info.v3), &error));
2183                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2184                                  fdiraft_ret, &fdiraft);
2185                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2186                                  tdiraft_ret, &tdiraft);
2187                 error = 0;
2188                 goto nfsmout;
2189         }
2190
2191         /*
2192          * We have to unlock the from ncp before we can safely lookup
2193          * the target ncp.
2194          */
2195         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2196         cache_unlock(&fromnd.nl_nch);
2197         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2198         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2199         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2200         cred->cr_uid = saved_uid;
2201
2202         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2203                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2204                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2205         if (tdirp) {
2206                 if (info.v3)
2207                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2208         }
2209         if (error)
2210                 goto out1;
2211
2212         /*
2213          * relock the source
2214          */
2215         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2216                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2217         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2218                 cache_lock(&fromnd.nl_nch);
2219                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2220         } else {
2221                 cache_unlock(&tond.nl_nch);
2222                 cache_lock(&fromnd.nl_nch);
2223                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2224                 cache_lock(&tond.nl_nch);
2225                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2226         }
2227         fromnd.nl_flags |= NLC_NCPISLOCKED;
2228
2229         fvp = fromnd.nl_nch.ncp->nc_vp;
2230         tvp = tond.nl_nch.ncp->nc_vp;
2231
2232         /*
2233          * Set fdvp and tdvp.  We haven't done all the topology checks
2234          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2235          * point).  If we get through the checks these will be guarenteed
2236          * to be non-NULL.
2237          *
2238          * Holding the children ncp's should be sufficient to prevent
2239          * fdvp and tdvp ripouts.
2240          */
2241         if (fromnd.nl_nch.ncp->nc_parent)
2242                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2243         else
2244                 fdvp = NULL;
2245         if (tond.nl_nch.ncp->nc_parent)
2246                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2247         else
2248                 tdvp = NULL;
2249
2250         if (tvp != NULL) {
2251                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2252                         if (info.v3)
2253                                 error = EEXIST;
2254                         else
2255                                 error = EISDIR;
2256                         goto out;
2257                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2258                         if (info.v3)
2259                                 error = EEXIST;
2260                         else
2261                                 error = ENOTDIR;
2262                         goto out;
2263                 }
2264                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2265                         if (info.v3)
2266                                 error = EXDEV;
2267                         else
2268                                 error = ENOTEMPTY;
2269                         goto out;
2270                 }
2271         }
2272         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2273                 if (info.v3)
2274                         error = EXDEV;
2275                 else
2276                         error = ENOTEMPTY;
2277                 goto out;
2278         }
2279         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2280                 if (info.v3)
2281                         error = EXDEV;
2282                 else
2283                         error = ENOTEMPTY;
2284                 goto out;
2285         }
2286         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2287                 if (info.v3)
2288                         error = EINVAL;
2289                 else
2290                         error = ENOTEMPTY;
2291         }
2292
2293         /*
2294          * You cannot rename a source into itself or a subdirectory of itself.
2295          * We check this by travsering the target directory upwards looking
2296          * for a match against the source.
2297          */
2298         if (error == 0) {
2299                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2300                         if (fromnd.nl_nch.ncp == ncp) {
2301                                 error = EINVAL;
2302                                 break;
2303                         }
2304                 }
2305         }
2306
2307         /*
2308          * If source is the same as the destination (that is the
2309          * same vnode with the same name in the same directory),
2310          * then there is nothing to do.
2311          */
2312         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2313                 error = -1;
2314 out:
2315         if (!error) {
2316                 /*
2317                  * The VOP_NRENAME function releases all vnode references &
2318                  * locks prior to returning so we need to clear the pointers
2319                  * to bypass cleanup code later on.
2320                  */
2321                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2322                                     fdvp, tdvp, tond.nl_cred);
2323         } else {
2324                 if (error == -1)
2325                         error = 0;
2326         }
2327         /* fall through */
2328
2329 out1:
2330         if (fdirp)
2331                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2332         if (tdirp)
2333                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2334         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2335                               2 * NFSX_WCCDATA(info.v3), &error));
2336         if (info.v3) {
2337                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2338                                  fdiraft_ret, &fdiraft);
2339                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2340                                  tdiraft_ret, &tdiraft);
2341         }
2342         error = 0;
2343         /* fall through */
2344
2345 nfsmout:
2346         *mrq = info.mreq;
2347         if (tdirp)
2348                 vrele(tdirp);
2349         nlookup_done(&tond);
2350         if (fdirp)
2351                 vrele(fdirp);
2352         nlookup_done(&fromnd);
2353         return (error);
2354 }
2355
2356 /*
2357  * nfs link service
2358  */
2359 int
2360 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2361            struct thread *td, struct mbuf **mrq)
2362 {
2363         struct sockaddr *nam = nfsd->nd_nam;
2364         struct ucred *cred = &nfsd->nd_cr;
2365         struct nlookupdata nd;
2366         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2367         int getret = 1;
2368         struct vnode *dirp;
2369         struct vnode *dvp;
2370         struct vnode *vp;
2371         struct vnode *xp;
2372         struct mount *xmp;
2373         struct vattr dirfor, diraft, at;
2374         nfsfh_t nfh, dnfh;
2375         fhandle_t *fhp, *dfhp;
2376         struct nfsm_info info;
2377
2378         info.mrep = nfsd->nd_mrep;
2379         info.mreq = NULL;
2380         info.md = nfsd->nd_md;
2381         info.dpos = nfsd->nd_dpos;
2382         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2383
2384         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2385         nlookup_zero(&nd);
2386         dirp = dvp = vp = xp = NULL;
2387         xmp = NULL;
2388
2389         fhp = &nfh.fh_generic;
2390         dfhp = &dnfh.fh_generic;
2391         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2392         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2393         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2394
2395         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2396                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2397         if (error) {
2398                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2399                                       NFSX_POSTOPATTR(info.v3) +
2400                                       NFSX_WCCDATA(info.v3),
2401                                       &error));
2402                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2403                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2404                                  diraft_ret, &diraft);
2405                 xp = NULL;
2406                 error = 0;
2407                 goto nfsmout;
2408         }
2409         if (xp->v_type == VDIR) {
2410                 error = EPERM;          /* POSIX */
2411                 goto out1;
2412         }
2413
2414         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2415                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2416                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2417         if (dirp) {
2418                 if (info.v3)
2419                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2420         }
2421         if (error)
2422                 goto out1;
2423
2424         if (vp != NULL) {
2425                 error = EEXIST;
2426                 goto out;
2427         }
2428         if (xp->v_mount != dvp->v_mount)
2429                 error = EXDEV;
2430 out:
2431         if (!error) {
2432                 vn_unlock(dvp);
2433                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2434                 vrele(dvp);
2435                 dvp = NULL;
2436         }
2437         /* fall through */
2438
2439 out1:
2440         if (info.v3)
2441                 getret = VOP_GETATTR(xp, &at);
2442         if (dirp)
2443                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2444         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2445                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2446                               &error));
2447         if (info.v3) {
2448                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2449                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2450                                  diraft_ret, &diraft);
2451                 error = 0;
2452         }
2453         /* fall through */
2454
2455 nfsmout:
2456         *mrq = info.mreq;
2457         nlookup_done(&nd);
2458         if (dirp)
2459                 vrele(dirp);
2460         if (xp)
2461                 vrele(xp);
2462         if (dvp) {
2463                 if (dvp == vp)
2464                         vrele(dvp);
2465                 else
2466                         vput(dvp);
2467         }
2468         if (vp)
2469                 vput(vp);
2470         return(error);
2471 }
2472
2473 /*
2474  * nfs symbolic link service
2475  */
2476 int
2477 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2478               struct thread *td, struct mbuf **mrq)
2479 {
2480         struct sockaddr *nam = nfsd->nd_nam;
2481         struct ucred *cred = &nfsd->nd_cr;
2482         struct vattr va, dirfor, diraft;
2483         struct nlookupdata nd;
2484         struct vattr *vap = &va;
2485         struct nfsv2_sattr *sp;
2486         char *pathcp = NULL;
2487         struct uio io;
2488         struct iovec iv;
2489         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2490         struct vnode *dirp;
2491         struct vnode *vp;
2492         struct vnode *dvp;
2493         nfsfh_t nfh;
2494         fhandle_t *fhp;
2495         struct nfsm_info info;
2496
2497         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2498         nlookup_zero(&nd);
2499         dirp = NULL;
2500         dvp = NULL;
2501         vp = NULL;
2502
2503         info.mrep = nfsd->nd_mrep;
2504         info.mreq =  NULL;
2505         info.md = nfsd->nd_md;
2506         info.dpos = nfsd->nd_dpos;
2507         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2508
2509         fhp = &nfh.fh_generic;
2510         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2511         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2512
2513         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2514                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2515                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2516         if (dirp) {
2517                 if (info.v3)
2518                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2519         }
2520         if (error)
2521                 goto out;
2522
2523         VATTR_NULL(vap);
2524         if (info.v3) {
2525                 ERROROUT(nfsm_srvsattr(&info, vap));
2526         }
2527         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2528         pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2529         iv.iov_base = pathcp;
2530         iv.iov_len = len2;
2531         io.uio_resid = len2;
2532         io.uio_offset = 0;
2533         io.uio_iov = &iv;
2534         io.uio_iovcnt = 1;
2535         io.uio_segflg = UIO_SYSSPACE;
2536         io.uio_rw = UIO_READ;
2537         io.uio_td = NULL;
2538         ERROROUT(nfsm_mtouio(&info, &io, len2));
2539         if (info.v3 == 0) {
2540                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2541                 vap->va_mode = nfstov_mode(sp->sa_mode);
2542         }
2543         *(pathcp + len2) = '\0';
2544         if (vp) {
2545                 error = EEXIST;
2546                 goto out;
2547         }
2548
2549         if (vap->va_mode == (mode_t)VNOVAL)
2550                 vap->va_mode = 0;
2551         if (dvp != vp)
2552                 vn_unlock(dvp);
2553         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2554         vrele(dvp);
2555         dvp = NULL;
2556         if (error == 0) {
2557                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2558                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2559                 if (!error)
2560                         error = VOP_GETATTR(vp, vap);
2561         }
2562
2563 out:
2564         if (dvp) {
2565                 if (dvp == vp)
2566                         vrele(dvp);
2567                 else
2568                         vput(dvp);
2569         }
2570         if (vp) {
2571                 vput(vp);
2572                 vp = NULL;
2573         }
2574         if (pathcp) {
2575                 kfree(pathcp, M_TEMP);
2576                 pathcp = NULL;
2577         }
2578         if (dirp) {
2579                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2580                 vrele(dirp);
2581                 dirp = NULL;
2582         }
2583         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2584                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2585                               NFSX_WCCDATA(info.v3),
2586                               &error));
2587         if (info.v3) {
2588                 if (!error) {
2589                         nfsm_srvpostop_fh(&info, fhp);
2590                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2591                 }
2592                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2593                                  diraft_ret, &diraft);
2594         }
2595         error = 0;
2596         /* fall through */
2597
2598 nfsmout:
2599         *mrq = info.mreq;
2600         nlookup_done(&nd);
2601         if (vp)
2602                 vput(vp);
2603         if (dirp)
2604                 vrele(dirp);
2605         if (pathcp)
2606                 kfree(pathcp, M_TEMP);
2607         return (error);
2608 }
2609
2610 /*
2611  * nfs mkdir service
2612  */
2613 int
2614 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2615             struct thread *td, struct mbuf **mrq)
2616 {
2617         struct sockaddr *nam = nfsd->nd_nam;
2618         struct ucred *cred = &nfsd->nd_cr;
2619         struct vattr va, dirfor, diraft;
2620         struct vattr *vap = &va;
2621         struct nfs_fattr *fp;
2622         struct nlookupdata nd;
2623         u_int32_t *tl;
2624         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2625         struct vnode *dirp;
2626         struct vnode *dvp;
2627         struct vnode *vp;
2628         nfsfh_t nfh;
2629         fhandle_t *fhp;
2630         struct nfsm_info info;
2631
2632         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2633         nlookup_zero(&nd);
2634         dirp = NULL;
2635         dvp = NULL;
2636         vp = NULL;
2637
2638         info.dpos = nfsd->nd_dpos;
2639         info.mrep = nfsd->nd_mrep;
2640         info.mreq =  NULL;
2641         info.md = nfsd->nd_md;
2642         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2643
2644         fhp = &nfh.fh_generic;
2645         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2646         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2647
2648         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2649                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2650                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2651         if (dirp) {
2652                 if (info.v3)
2653                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2654         }
2655         if (error) {
2656                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2657                                       NFSX_WCCDATA(info.v3), &error));
2658                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2659                                  diraft_ret, &diraft);
2660                 error = 0;
2661                 goto nfsmout;
2662         }
2663         VATTR_NULL(vap);
2664         if (info.v3) {
2665                 ERROROUT(nfsm_srvsattr(&info, vap));
2666         } else {
2667                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2668                 vap->va_mode = nfstov_mode(*tl++);
2669         }
2670
2671         /*
2672          * At this point nd.ni_dvp is referenced and exclusively locked and
2673          * nd.ni_vp, if it exists, is referenced but not locked.
2674          */
2675
2676         vap->va_type = VDIR;
2677         if (vp != NULL) {
2678                 error = EEXIST;
2679                 goto out;
2680         }
2681
2682         /*
2683          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2684          * component is freed by the VOP call.  This will fill-in
2685          * nd.ni_vp, reference, and exclusively lock it.
2686          */
2687         if (vap->va_mode == (mode_t)VNOVAL)
2688                 vap->va_mode = 0;
2689         vn_unlock(dvp);
2690         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2691         vrele(dvp);
2692         dvp = NULL;
2693
2694         if (error == 0) {
2695                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2696                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2697                 if (error == 0)
2698                         error = VOP_GETATTR(vp, vap);
2699         }
2700 out:
2701         if (dirp)
2702                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2703         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2704                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2705                               NFSX_WCCDATA(info.v3),
2706                               &error));
2707         if (info.v3) {
2708                 if (!error) {
2709                         nfsm_srvpostop_fh(&info, fhp);
2710                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2711                 }
2712                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2713                                  diraft_ret, &diraft);
2714         } else {
2715                 nfsm_srvfhtom(&info, fhp);
2716                 fp = nfsm_build(&info, NFSX_V2FATTR);
2717                 nfsm_srvfattr(nfsd, vap, fp);
2718         }
2719         error = 0;
2720         /* fall through */
2721
2722 nfsmout:
2723         *mrq = info.mreq;
2724         nlookup_done(&nd);
2725         if (dirp)
2726                 vrele(dirp);
2727         if (dvp) {
2728                 if (dvp == vp)
2729                         vrele(dvp);
2730                 else
2731                         vput(dvp);
2732         }
2733         if (vp)
2734                 vput(vp);
2735         return (error);
2736 }
2737
2738 /*
2739  * nfs rmdir service
2740  */
2741 int
2742 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2743             struct thread *td, struct mbuf **mrq)
2744 {
2745         struct sockaddr *nam = nfsd->nd_nam;
2746         struct ucred *cred = &nfsd->nd_cr;
2747         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2748         struct vnode *dirp;
2749         struct vnode *dvp;
2750         struct vnode *vp;
2751         struct vattr dirfor, diraft;
2752         nfsfh_t nfh;
2753         fhandle_t *fhp;
2754         struct nlookupdata nd;
2755         struct nfsm_info info;
2756
2757         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2758         nlookup_zero(&nd);
2759         dirp = NULL;
2760         dvp = NULL;
2761         vp = NULL;
2762
2763         info.mrep = nfsd->nd_mrep;
2764         info.mreq = NULL;
2765         info.md = nfsd->nd_md;
2766         info.dpos = nfsd->nd_dpos;
2767         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2768
2769         fhp = &nfh.fh_generic;
2770         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2771         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2772
2773         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2774                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2775                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2776         if (dirp) {
2777                 if (info.v3)
2778                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2779         }
2780         if (error) {
2781                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2782                                       NFSX_WCCDATA(info.v3), &error));
2783                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2784                                  diraft_ret, &diraft);
2785                 error = 0;
2786                 goto nfsmout;
2787         }
2788         if (vp->v_type != VDIR) {
2789                 error = ENOTDIR;
2790                 goto out;
2791         }
2792
2793         /*
2794          * The root of a mounted filesystem cannot be deleted.
2795          */
2796         if (vp->v_flag & VROOT)
2797                 error = EBUSY;
2798 out:
2799         /*
2800          * Issue or abort op.  Since SAVESTART is not set, path name
2801          * component is freed by the VOP after either.
2802          */
2803         if (!error) {
2804                 if (dvp != vp)
2805                         vn_unlock(dvp);
2806                 vput(vp);
2807                 vp = NULL;
2808                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2809                 vrele(dvp);
2810                 dvp = NULL;
2811         }
2812         nlookup_done(&nd);
2813
2814         if (dirp)
2815                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2816         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2817         if (info.v3) {
2818                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2819                                  diraft_ret, &diraft);
2820                 error = 0;
2821         }
2822         /* fall through */
2823
2824 nfsmout:
2825         *mrq = info.mreq;
2826         if (dvp) {
2827                 if (dvp == vp)
2828                         vrele(dvp);
2829                 else
2830                         vput(dvp);
2831         }
2832         nlookup_done(&nd);
2833         if (dirp)
2834                 vrele(dirp);
2835         if (vp)
2836                 vput(vp);
2837         return(error);
2838 }
2839
2840 /*
2841  * nfs readdir service
2842  * - mallocs what it thinks is enough to read
2843  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2844  * - calls VOP_READDIR()
2845  * - loops around building the reply
2846  *      if the output generated exceeds count break out of loop
2847  *      The nfsm_clget macro is used here so that the reply will be packed
2848  *      tightly in mbuf clusters.
2849  * - it only knows that it has encountered eof when the VOP_READDIR()
2850  *      reads nothing
2851  * - as such one readdir rpc will return eof false although you are there
2852  *      and then the next will return eof
2853  * - it trims out records with d_fileno == 0
2854  *      this doesn't matter for Unix clients, but they might confuse clients
2855  *      for other os'.
2856  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2857  *      than requested, but this may not apply to all filesystems. For
2858  *      example, client NFS does not { although it is never remote mounted
2859  *      anyhow }
2860  *     The alternate call nfsrv_readdirplus() does lookups as well.
2861  * PS: The NFS protocol spec. does not clarify what the "count" byte
2862  *      argument is a count of.. just name strings and file id's or the
2863  *      entire reply rpc or ...
2864  *      I tried just file name and id sizes and it confused the Sun client,
2865  *      so I am using the full rpc size now. The "paranoia.." comment refers
2866  *      to including the status longwords that are not a part of the dir.
2867  *      "entry" structures, but are in the rpc.
2868  */
2869 struct flrep {
2870         nfsuint64       fl_off;
2871         u_int32_t       fl_postopok;
2872         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2873         u_int32_t       fl_fhok;
2874         u_int32_t       fl_fhsize;
2875         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2876 };
2877
2878 int
2879 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2880               struct thread *td, struct mbuf **mrq)
2881 {
2882         struct sockaddr *nam = nfsd->nd_nam;
2883         struct ucred *cred = &nfsd->nd_cr;
2884         char *bp, *be;
2885         struct dirent *dp;
2886         caddr_t cp;
2887         u_int32_t *tl;
2888         struct mbuf *mp1, *mp2;
2889         char *cpos, *cend, *rbuf;
2890         struct vnode *vp = NULL;
2891         struct mount *mp = NULL;
2892         struct vattr at;
2893         nfsfh_t nfh;
2894         fhandle_t *fhp;
2895         struct uio io;
2896         struct iovec iv;
2897         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2898         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2899         u_quad_t off, toff;
2900 #if 0
2901         u_quad_t verf;
2902 #endif
2903         off_t *cookies = NULL, *cookiep;
2904         struct nfsm_info info;
2905
2906         info.mrep = nfsd->nd_mrep;
2907         info.mreq = NULL;
2908         info.md = nfsd->nd_md;
2909         info.dpos = nfsd->nd_dpos;
2910         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2911
2912         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2913         fhp = &nfh.fh_generic;
2914         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2915         if (info.v3) {
2916                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2917                 toff = fxdr_hyper(tl);
2918                 tl += 2;
2919 #if 0
2920                 verf = fxdr_hyper(tl);
2921 #endif
2922                 tl += 2;
2923         } else {
2924                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2925                 toff = fxdr_unsigned(u_quad_t, *tl++);
2926 #if 0
2927                 verf = 0;       /* shut up gcc */
2928 #endif
2929         }
2930         off = toff;
2931         cnt = fxdr_unsigned(int, *tl);
2932         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2933         xfer = NFS_SRVMAXDATA(nfsd);
2934         if ((unsigned)cnt > xfer)
2935                 cnt = xfer;
2936         if ((unsigned)siz > xfer)
2937                 siz = xfer;
2938         fullsiz = siz;
2939         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2940                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2941         if (!error && vp->v_type != VDIR) {
2942                 error = ENOTDIR;
2943                 vput(vp);
2944                 vp = NULL;
2945         }
2946         if (error) {
2947                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2948                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2949                 error = 0;
2950                 goto nfsmout;
2951         }
2952
2953         /*
2954          * Obtain lock on vnode for this section of the code
2955          */
2956
2957         if (info.v3) {
2958                 error = getret = VOP_GETATTR(vp, &at);
2959 #if 0
2960                 /*
2961                  * XXX This check may be too strict for Solaris 2.5 clients.
2962                  */
2963                 if (!error && toff && verf && verf != at.va_filerev)
2964                         error = NFSERR_BAD_COOKIE;
2965 #endif
2966         }
2967         if (!error)
2968                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2969         if (error) {
2970                 vput(vp);
2971                 vp = NULL;
2972                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2973                                       NFSX_POSTOPATTR(info.v3), &error));
2974                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2975                 error = 0;
2976                 goto nfsmout;
2977         }
2978         vn_unlock(vp);
2979
2980         /*
2981          * end section.  Allocate rbuf and continue
2982          */
2983         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2984 again:
2985         iv.iov_base = rbuf;
2986         iv.iov_len = fullsiz;
2987         io.uio_iov = &iv;
2988         io.uio_iovcnt = 1;
2989         io.uio_offset = (off_t)off;
2990         io.uio_resid = fullsiz;
2991         io.uio_segflg = UIO_SYSSPACE;
2992         io.uio_rw = UIO_READ;
2993         io.uio_td = NULL;
2994         eofflag = 0;
2995         if (cookies) {
2996                 kfree((caddr_t)cookies, M_TEMP);
2997                 cookies = NULL;
2998         }
2999         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3000         off = (off_t)io.uio_offset;
3001         if (!cookies && !error)
3002                 error = NFSERR_PERM;
3003         if (info.v3) {
3004                 getret = VOP_GETATTR(vp, &at);
3005                 if (!error)
3006                         error = getret;
3007         }
3008         if (error) {
3009                 vrele(vp);
3010                 vp = NULL;
3011                 kfree((caddr_t)rbuf, M_TEMP);
3012                 if (cookies)
3013                         kfree((caddr_t)cookies, M_TEMP);
3014                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3015                                       NFSX_POSTOPATTR(info.v3), &error));
3016                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3017                 error = 0;
3018                 goto nfsmout;
3019         }
3020         if (io.uio_resid) {
3021                 siz -= io.uio_resid;
3022
3023                 /*
3024                  * If nothing read, return eof
3025                  * rpc reply
3026                  */
3027                 if (siz == 0) {
3028                         vrele(vp);
3029                         vp = NULL;
3030                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3031                                               NFSX_POSTOPATTR(info.v3) +
3032                                               NFSX_COOKIEVERF(info.v3) +
3033                                               2 * NFSX_UNSIGNED,
3034                                               &error));
3035                         if (info.v3) {
3036                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3037                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3038                                 txdr_hyper(at.va_filerev, tl);
3039                                 tl += 2;
3040                         } else
3041                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3042                         *tl++ = nfs_false;
3043                         *tl = nfs_true;
3044                         kfree((caddr_t)rbuf, M_TEMP);
3045                         kfree((caddr_t)cookies, M_TEMP);
3046                         error = 0;
3047                         goto nfsmout;
3048                 }
3049         }
3050
3051         /*
3052          * Check for degenerate cases of nothing useful read.
3053          * If so go try again
3054          */
3055         cpos = rbuf;
3056         cend = rbuf + siz;
3057         dp = (struct dirent *)cpos;
3058         cookiep = cookies;
3059         /*
3060          * For some reason FreeBSD's ufs_readdir() chooses to back the
3061          * directory offset up to a block boundary, so it is necessary to
3062          * skip over the records that preceed the requested offset. This
3063          * requires the assumption that file offset cookies monotonically
3064          * increase.
3065          */
3066         while (cpos < cend && ncookies > 0 &&
3067                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3068                  ((u_quad_t)(*cookiep)) <= toff)) {
3069                 dp = _DIRENT_NEXT(dp);
3070                 cpos = (char *)dp;
3071                 cookiep++;
3072                 ncookies--;
3073         }
3074         if (cpos >= cend || ncookies == 0) {
3075                 toff = off;
3076                 siz = fullsiz;
3077                 goto again;
3078         }
3079
3080         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3081         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3082                               NFSX_POSTOPATTR(info.v3) +
3083                               NFSX_COOKIEVERF(info.v3) + siz,
3084                               &error));
3085         if (info.v3) {
3086                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3087                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3088                 txdr_hyper(at.va_filerev, tl);
3089         }
3090         mp1 = mp2 = info.mb;
3091         bp = info.bpos;
3092         be = bp + M_TRAILINGSPACE(mp1);
3093
3094         /* Loop through the records and build reply */
3095         while (cpos < cend && ncookies > 0) {
3096                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3097                         nlen = dp->d_namlen;
3098                         rem = nfsm_rndup(nlen) - nlen;
3099                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3100                         if (info.v3)
3101                                 len += 2 * NFSX_UNSIGNED;
3102                         if (len > cnt) {
3103                                 eofflag = 0;
3104                                 break;
3105                         }
3106                         /*
3107                          * Build the directory record xdr from
3108                          * the dirent entry.
3109                          */
3110                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3111                         *tl = nfs_true;
3112                         bp += NFSX_UNSIGNED;
3113                         if (info.v3) {
3114                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3115                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3116                                 bp += NFSX_UNSIGNED;
3117                         }
3118                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3119                         *tl = txdr_unsigned(dp->d_ino);
3120                         bp += NFSX_UNSIGNED;
3121                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3122                         *tl = txdr_unsigned(nlen);
3123                         bp += NFSX_UNSIGNED;
3124
3125                         /* And loop around copying the name */
3126                         xfer = nlen;
3127                         cp = dp->d_name;
3128                         while (xfer > 0) {
3129                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3130                                 if ((bp+xfer) > be)
3131                                         tsiz = be-bp;
3132                                 else
3133                                         tsiz = xfer;
3134                                 bcopy(cp, bp, tsiz);
3135                                 bp += tsiz;
3136                                 xfer -= tsiz;
3137                                 if (xfer > 0)
3138                                         cp += tsiz;
3139                         }
3140                         /* And null pad to a int32_t boundary */
3141                         for (i = 0; i < rem; i++)
3142                                 *bp++ = '\0';
3143                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3144
3145                         /* Finish off the record */
3146                         if (info.v3) {
3147                                 *tl = txdr_unsigned(*cookiep >> 32);
3148                                 bp += NFSX_UNSIGNED;
3149                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3150                         }
3151                         *tl = txdr_unsigned(*cookiep);
3152                         bp += NFSX_UNSIGNED;
3153                 }
3154                 dp = _DIRENT_NEXT(dp);
3155                 cpos = (char *)dp;
3156                 cookiep++;
3157                 ncookies--;
3158         }
3159         vrele(vp);
3160         vp = NULL;
3161         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3162         *tl = nfs_false;
3163         bp += NFSX_UNSIGNED;
3164         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3165         if (eofflag)
3166                 *tl = nfs_true;
3167         else
3168                 *tl = nfs_false;
3169         bp += NFSX_UNSIGNED;
3170         if (mp1 != info.mb) {
3171                 if (bp < be)
3172                         mp1->m_len = bp - mtod(mp1, caddr_t);
3173         } else
3174                 mp1->m_len += bp - info.bpos;
3175         kfree((caddr_t)rbuf, M_TEMP);
3176         kfree((caddr_t)cookies, M_TEMP);
3177
3178 nfsmout:
3179         *mrq = info.mreq;
3180         if (vp)
3181                 vrele(vp);
3182         return(error);
3183 }
3184
3185 int
3186 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3187                   struct thread *td, struct mbuf **mrq)
3188 {
3189         struct sockaddr *nam = nfsd->nd_nam;
3190         struct ucred *cred = &nfsd->nd_cr;
3191         char *bp, *be;
3192         struct dirent *dp;
3193         caddr_t cp;
3194         u_int32_t *tl;
3195         struct mbuf *mp1, *mp2;
3196         char *cpos, *cend, *rbuf;
3197         struct vnode *vp = NULL, *nvp;
3198         struct mount *mp = NULL;
3199         struct flrep fl;
3200         nfsfh_t nfh;
3201         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3202         struct uio io;
3203         struct iovec iv;
3204         struct vattr va, at, *vap = &va;
3205         struct nfs_fattr *fp;
3206         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3207         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3208         u_quad_t off, toff;
3209 #if 0
3210         u_quad_t verf;
3211 #endif
3212         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3213         struct nfsm_info info;
3214
3215         info.mrep = nfsd->nd_mrep;
3216         info.mreq = NULL;
3217         info.md = nfsd->nd_md;
3218         info.dpos = nfsd->nd_dpos;
3219         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3220
3221         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3222         fhp = &nfh.fh_generic;
3223         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3224         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3225         toff = fxdr_hyper(tl);
3226         tl += 2;
3227 #if 0
3228         verf = fxdr_hyper(tl);
3229 #endif
3230         tl += 2;
3231         siz = fxdr_unsigned(int, *tl++);
3232         cnt = fxdr_unsigned(int, *tl);
3233         off = toff;
3234         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3235         xfer = NFS_SRVMAXDATA(nfsd);
3236         if ((unsigned)cnt > xfer)
3237                 cnt = xfer;
3238         if ((unsigned)siz > xfer)
3239                 siz = xfer;
3240         fullsiz = siz;
3241         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3242                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3243         if (!error && vp->v_type != VDIR) {
3244                 error = ENOTDIR;
3245                 vput(vp);
3246                 vp = NULL;
3247         }
3248         if (error) {
3249                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3250                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3251                 error = 0;
3252                 goto nfsmout;
3253         }
3254         error = getret = VOP_GETATTR(vp, &at);
3255 #if 0
3256         /*
3257          * XXX This check may be too strict for Solaris 2.5 clients.
3258          */
3259         if (!error && toff && verf && verf != at.va_filerev)
3260                 error = NFSERR_BAD_COOKIE;
3261 #endif
3262         if (!error) {
3263                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3264         }
3265         if (error) {
3266                 vput(vp);
3267                 vp = NULL;
3268                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3269                                       NFSX_V3POSTOPATTR, &error));
3270                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3271                 error = 0;
3272                 goto nfsmout;
3273         }
3274         vn_unlock(vp);
3275         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3276 again:
3277         iv.iov_base = rbuf;
3278         iv.iov_len = fullsiz;
3279         io.uio_iov = &iv;
3280         io.uio_iovcnt = 1;
3281         io.uio_offset = (off_t)off;
3282         io.uio_resid = fullsiz;
3283         io.uio_segflg = UIO_SYSSPACE;
3284         io.uio_rw = UIO_READ;
3285         io.uio_td = NULL;
3286         eofflag = 0;
3287         if (cookies) {
3288                 kfree((caddr_t)cookies, M_TEMP);
3289                 cookies = NULL;
3290         }
3291         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3292         off = (u_quad_t)io.uio_offset;
3293         getret = VOP_GETATTR(vp, &at);
3294         if (!cookies && !error)
3295                 error = NFSERR_PERM;
3296         if (!error)
3297                 error = getret;
3298         if (error) {
3299                 vrele(vp);
3300                 vp = NULL;
3301                 if (cookies)
3302                         kfree((caddr_t)cookies, M_TEMP);
3303                 kfree((caddr_t)rbuf, M_TEMP);
3304                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3305                                       NFSX_V3POSTOPATTR, &error));
3306                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3307                 error = 0;
3308                 goto nfsmout;
3309         }
3310         if (io.uio_resid) {
3311                 siz -= io.uio_resid;
3312
3313                 /*
3314                  * If nothing read, return eof
3315                  * rpc reply
3316                  */
3317                 if (siz == 0) {
3318                         vrele(vp);
3319                         vp = NULL;
3320                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3321                                               NFSX_V3POSTOPATTR +
3322                                               NFSX_V3COOKIEVERF +
3323                                               2 * NFSX_UNSIGNED,
3324                                               &error));
3325                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3326                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3327                         txdr_hyper(at.va_filerev, tl);
3328                         tl += 2;
3329                         *tl++ = nfs_false;
3330                         *tl = nfs_true;
3331                         kfree((caddr_t)cookies, M_TEMP);
3332                         kfree((caddr_t)rbuf, M_TEMP);
3333                         error = 0;
3334                         goto nfsmout;
3335                 }
3336         }
3337
3338         /*
3339          * Check for degenerate cases of nothing useful read.
3340          * If so go try again
3341          */
3342         cpos = rbuf;
3343         cend = rbuf + siz;
3344         dp = (struct dirent *)cpos;
3345         cookiep = cookies;
3346         /*
3347          * For some reason FreeBSD's ufs_readdir() chooses to back the
3348          * directory offset up to a block boundary, so it is necessary to
3349          * skip over the records that preceed the requested offset. This
3350          * requires the assumption that file offset cookies monotonically
3351          * increase.
3352          */
3353         while (cpos < cend && ncookies > 0 &&
3354                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3355                  ((u_quad_t)(*cookiep)) <= toff)) {
3356                 dp = _DIRENT_NEXT(dp);
3357                 cpos = (char *)dp;
3358                 cookiep++;
3359                 ncookies--;
3360         }
3361         if (cpos >= cend || ncookies == 0) {
3362                 toff = off;
3363                 siz = fullsiz;
3364                 goto again;
3365         }
3366
3367         /*
3368          * Probe one of the directory entries to see if the filesystem
3369          * supports VGET.
3370          */
3371         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3372                 error = NFSERR_NOTSUPP;
3373                 vrele(vp);
3374                 vp = NULL;
3375                 kfree((caddr_t)cookies, M_TEMP);
3376                 kfree((caddr_t)rbuf, M_TEMP);
3377                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3378                                       NFSX_V3POSTOPATTR, &error));
3379                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3380                 error = 0;
3381                 goto nfsmout;
3382         }
3383         if (nvp) {
3384                 vput(nvp);
3385                 nvp = NULL;
3386         }
3387             
3388         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3389                         2 * NFSX_UNSIGNED;
3390         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3391         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3392         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3393         txdr_hyper(at.va_filerev, tl);
3394         mp1 = mp2 = info.mb;
3395         bp = info.bpos;
3396         be = bp + M_TRAILINGSPACE(mp1);
3397
3398         /* Loop through the records and build reply */
3399         while (cpos < cend && ncookies > 0) {
3400                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3401                         nlen = dp->d_namlen;
3402                         rem = nfsm_rndup(nlen) - nlen;
3403
3404                         /*
3405                          * For readdir_and_lookup get the vnode using
3406                          * the file number.
3407                          */
3408                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3409                                 goto invalid;
3410                         bzero((caddr_t)nfhp, NFSX_V3FH);
3411                         nfhp->fh_fsid = fhp->fh_fsid;
3412                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3413                                 vput(nvp);
3414                                 nvp = NULL;
3415                                 goto invalid;
3416                         }
3417                         if (VOP_GETATTR(nvp, vap)) {
3418                                 vput(nvp);
3419                                 nvp = NULL;
3420                                 goto invalid;
3421                         }
3422                         vput(nvp);
3423                         nvp = NULL;
3424
3425                         /*
3426                          * If either the dircount or maxcount will be
3427                          * exceeded, get out now. Both of these lengths
3428                          * are calculated conservatively, including all
3429                          * XDR overheads.
3430                          */
3431                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3432                                 NFSX_V3POSTOPATTR);
3433                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3434                         if (len > cnt || dirlen > fullsiz) {
3435                                 eofflag = 0;
3436                                 break;
3437                         }
3438
3439                         /*
3440                          * Build the directory record xdr from
3441                          * the dirent entry.
3442                          */
3443                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3444                         nfsm_srvfattr(nfsd, vap, fp);
3445                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3446                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3447                         fl.fl_postopok = nfs_true;
3448                         fl.fl_fhok = nfs_true;
3449                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3450
3451                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3452                         *tl = nfs_true;
3453                         bp += NFSX_UNSIGNED;
3454                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3455                         *tl = txdr_unsigned(dp->d_ino >> 32);
3456                         bp += NFSX_UNSIGNED;
3457                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3458                         *tl = txdr_unsigned(dp->d_ino);
3459                         bp += NFSX_UNSIGNED;
3460                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3461                         *tl = txdr_unsigned(nlen);
3462                         bp += NFSX_UNSIGNED;
3463
3464                         /* And loop around copying the name */
3465                         xfer = nlen;
3466                         cp = dp->d_name;
3467                         while (xfer > 0) {
3468                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3469                                 if ((bp + xfer) > be)
3470                                         tsiz = be - bp;
3471                                 else
3472                                         tsiz = xfer;
3473                                 bcopy(cp, bp, tsiz);
3474                                 bp += tsiz;
3475                                 xfer -= tsiz;
3476                                 cp += tsiz;
3477                         }
3478                         /* And null pad to a int32_t boundary */
3479                         for (i = 0; i < rem; i++)
3480                                 *bp++ = '\0';
3481         
3482                         /*
3483                          * Now copy the flrep structure out.
3484                          */
3485                         xfer = sizeof (struct flrep);
3486                         cp = (caddr_t)&fl;
3487                         while (xfer > 0) {
3488                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3489                                 if ((bp + xfer) > be)
3490                                         tsiz = be - bp;
3491                                 else
3492                                         tsiz = xfer;
3493                                 bcopy(cp, bp, tsiz);
3494                                 bp += tsiz;
3495                                 xfer -= tsiz;
3496                                 cp += tsiz;
3497                         }
3498                 }
3499 invalid:
3500                 dp = _DIRENT_NEXT(dp);
3501                 cpos = (char *)dp;
3502                 cookiep++;
3503                 ncookies--;
3504         }
3505         vrele(vp);
3506         vp = NULL;
3507         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3508         *tl = nfs_false;
3509         bp += NFSX_UNSIGNED;
3510         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3511         if (eofflag)
3512                 *tl = nfs_true;
3513         else
3514                 *tl = nfs_false;
3515         bp += NFSX_UNSIGNED;
3516         if (mp1 != info.mb) {
3517                 if (bp < be)
3518                         mp1->m_len = bp - mtod(mp1, caddr_t);
3519         } else
3520                 mp1->m_len += bp - info.bpos;
3521         kfree((caddr_t)cookies, M_TEMP);
3522         kfree((caddr_t)rbuf, M_TEMP);
3523 nfsmout:
3524         *mrq = info.mreq;
3525         if (vp)
3526                 vrele(vp);
3527         return(error);
3528 }
3529
3530 /*
3531  * nfs commit service
3532  */
3533 int
3534 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3535              struct thread *td, struct mbuf **mrq)
3536 {
3537         struct sockaddr *nam = nfsd->nd_nam;
3538         struct ucred *cred = &nfsd->nd_cr;
3539         struct vattr bfor, aft;
3540         struct vnode *vp = NULL;
3541         struct mount *mp = NULL;
3542         nfsfh_t nfh;
3543         fhandle_t *fhp;
3544         u_int32_t *tl;
3545         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3546         u_quad_t off;
3547         struct nfsm_info info;
3548
3549         info.mrep = nfsd->nd_mrep;
3550         info.mreq = NULL;
3551         info.md = nfsd->nd_md;
3552         info.dpos = nfsd->nd_dpos;
3553
3554         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3555         fhp = &nfh.fh_generic;
3556         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3557         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3558
3559         /*
3560          * XXX At this time VOP_FSYNC() does not accept offset and byte
3561          * count parameters, so these arguments are useless (someday maybe).
3562          */
3563         off = fxdr_hyper(tl);
3564         tl += 2;
3565         cnt = fxdr_unsigned(int, *tl);
3566         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3567                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3568         if (error) {
3569                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3570                                       2 * NFSX_UNSIGNED, &error));
3571                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3572                                  aft_ret, &aft);
3573                 error = 0;
3574                 goto nfsmout;
3575         }
3576         for_ret = VOP_GETATTR(vp, &bfor);
3577
3578         /*
3579          * RFC 1813 3.3.21: If count is 0, a flush from offset to the end of
3580          * file is done. At this time VOP_FSYNC does not accept offset and
3581          * byte count parameters, so call VOP_FSYNC the whole file for now.
3582          */
3583         if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
3584                 /*
3585                  * Give up and do the whole thing
3586                  */
3587                 if (vp->v_object &&
3588                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3589                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3590                 }
3591                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3592         } else {
3593                 /*
3594                  * Locate and synchronously write any buffers that fall
3595                  * into the requested range.  Note:  we are assuming that
3596                  * f_iosize is a power of 2.
3597                  */
3598                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3599                 int iomask = iosize - 1;
3600                 off_t loffset;
3601
3602                 /*
3603                  * Align to iosize boundry, super-align to page boundry.
3604                  */
3605                 if (off & iomask) {
3606                         cnt += off & iomask;
3607                         off &= ~(u_quad_t)iomask;
3608                 }
3609                 if (off & PAGE_MASK) {
3610                         cnt += off & PAGE_MASK;
3611                         off &= ~(u_quad_t)PAGE_MASK;
3612                 }
3613                 loffset = off;
3614
3615                 if (vp->v_object &&
3616                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3617                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3618                             (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3619                 }
3620
3621                 crit_enter();
3622                 while (error == 0 || cnt > 0) {
3623                         struct buf *bp;
3624
3625                         /*
3626                          * If we have a buffer and it is marked B_DELWRI we
3627                          * have to lock and write it.  Otherwise the prior
3628                          * write is assumed to have already been committed.
3629                          *
3630                          * WARNING: FINDBLK_TEST buffers represent stable
3631                          *          storage but not necessarily stable
3632                          *          content.  It is ok in this case.
3633                          */
3634                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3635                                 if (bp->b_flags & B_DELWRI)
3636                                         bp = findblk(vp, loffset, 0);
3637                                 else
3638                                         bp = NULL;
3639                         }
3640                         if (bp) {
3641                                 if (bp->b_flags & B_DELWRI) {
3642                                         bremfree(bp);
3643                                         error = bwrite(bp);
3644                                         ++nfs_commit_miss;
3645                                 } else {
3646                                         BUF_UNLOCK(bp);
3647                                 }
3648                         }
3649                         ++nfs_commit_blks;
3650                         if (cnt < iosize)
3651                                 break;
3652                         cnt -= iosize;
3653                         loffset += iosize;
3654                 }
3655                 crit_exit();
3656         }
3657
3658         aft_ret = VOP_GETATTR(vp, &aft);
3659         vput(vp);
3660         vp = NULL;
3661         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3662                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3663                               &error));
3664         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3665                          aft_ret, &aft);
3666         if (!error) {
3667                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3668                 if (nfsver.tv_sec == 0)
3669                         nfsver = boottime;
3670                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3671                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3672         } else {
3673                 error = 0;
3674         }
3675 nfsmout:
3676         *mrq = info.mreq;
3677         if (vp)
3678                 vput(vp);
3679         return(error);
3680 }
3681
3682 /*
3683  * nfs statfs service
3684  */
3685 int
3686 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3687              struct thread *td, struct mbuf **mrq)
3688 {
3689         struct sockaddr *nam = nfsd->nd_nam;
3690         struct ucred *cred = &nfsd->nd_cr;
3691         struct statfs *sf;
3692         struct nfs_statfs *sfp;
3693         int error = 0, rdonly, getret = 1;
3694         struct vnode *vp = NULL;
3695         struct mount *mp = NULL;
3696         struct vattr at;
3697         nfsfh_t nfh;
3698         fhandle_t *fhp;
3699         struct statfs statfs;
3700         u_quad_t tval;
3701         struct nfsm_info info;
3702
3703         info.mrep = nfsd->nd_mrep;
3704         info.mreq = NULL;
3705         info.md = nfsd->nd_md;
3706         info.dpos = nfsd->nd_dpos;
3707         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3708
3709         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3710         fhp = &nfh.fh_generic;
3711         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3712         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3713                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3714         if (error) {
3715                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3716                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3717                 error = 0;
3718                 goto nfsmout;
3719         }
3720         sf = &statfs;
3721         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3722         getret = VOP_GETATTR(vp, &at);
3723         vput(vp);
3724         vp = NULL;
3725         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3726                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3727                               &error));
3728         if (info.v3)
3729                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3730         if (error) {
3731                 error = 0;
3732                 goto nfsmout;
3733         }
3734         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3735         if (info.v3) {
3736                 tval = (u_quad_t)sf->f_blocks;
3737                 tval *= (u_quad_t)sf->f_bsize;
3738                 txdr_hyper(tval, &sfp->sf_tbytes);
3739                 tval = (u_quad_t)sf->f_bfree;
3740                 tval *= (u_quad_t)sf->f_bsize;
3741                 txdr_hyper(tval, &sfp->sf_fbytes);
3742                 tval = (u_quad_t)sf->f_bavail;
3743                 tval *= (u_quad_t)sf->f_bsize;
3744                 txdr_hyper(tval, &sfp->sf_abytes);
3745                 sfp->sf_tfiles.nfsuquad[0] = 0;
3746                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3747                 sfp->sf_ffiles.nfsuquad[0] = 0;
3748                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3749                 sfp->sf_afiles.nfsuquad[0] = 0;
3750                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3751                 sfp->sf_invarsec = 0;
3752         } else {
3753                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3754                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3755                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3756                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3757                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3758         }
3759 nfsmout:
3760         *mrq = info.mreq;
3761         if (vp)
3762                 vput(vp);
3763         return(error);
3764 }
3765
3766 /*
3767  * nfs fsinfo service
3768  */
3769 int
3770 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3771              struct thread *td, struct mbuf **mrq)
3772 {
3773         struct sockaddr *nam = nfsd->nd_nam;
3774         struct ucred *cred = &nfsd->nd_cr;
3775         struct nfsv3_fsinfo *sip;
3776         int error = 0, rdonly, getret = 1, pref;
3777         struct vnode *vp = NULL;
3778         struct mount *mp = NULL;
3779         struct vattr at;
3780         nfsfh_t nfh;
3781         fhandle_t *fhp;
3782         u_quad_t maxfsize;
3783         struct statfs sb;
3784         struct nfsm_info info;
3785
3786         info.mrep = nfsd->nd_mrep;
3787         info.mreq = NULL;
3788         info.md = nfsd->nd_md;
3789         info.dpos = nfsd->nd_dpos;
3790
3791         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3792         fhp = &nfh.fh_generic;
3793         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3794         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3795                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3796         if (error) {
3797                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3798                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3799                 error = 0;
3800                 goto nfsmout;
3801         }
3802
3803         /* XXX Try to make a guess on the max file size. */
3804         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3805         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3806
3807         getret = VOP_GETATTR(vp, &at);
3808         vput(vp);
3809         vp = NULL;
3810         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3811                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3812         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3813         sip = nfsm_build(&info, NFSX_V3FSINFO);
3814
3815         /*
3816          * XXX
3817          * There should be file system VFS OP(s) to get this information.
3818          * For now, assume ufs.
3819          */
3820         if (slp->ns_so->so_type == SOCK_DGRAM)
3821                 pref = NFS_MAXDGRAMDATA;
3822         else
3823                 pref = NFS_MAXDATA;
3824         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3825         sip->fs_rtpref = txdr_unsigned(pref);
3826         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3827         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3828         sip->fs_wtpref = txdr_unsigned(pref);
3829         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3830         sip->fs_dtpref = txdr_unsigned(pref);
3831         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3832         sip->fs_timedelta.nfsv3_sec = 0;
3833         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3834         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3835                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3836                 NFSV3FSINFO_CANSETTIME);
3837 nfsmout:
3838         *mrq = info.mreq;
3839         if (vp)
3840                 vput(vp);
3841         return(error);
3842 }
3843
3844 /*
3845  * nfs pathconf service
3846  */
3847 int
3848 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3849                struct thread *td, struct mbuf **mrq)
3850 {
3851         struct sockaddr *nam = nfsd->nd_nam;
3852         struct ucred *cred = &nfsd->nd_cr;
3853         struct nfsv3_pathconf *pc;
3854         int error = 0, rdonly, getret = 1;
3855         register_t linkmax, namemax, chownres, notrunc;
3856         struct vnode *vp = NULL;
3857         struct mount *mp = NULL;
3858         struct vattr at;
3859         nfsfh_t nfh;
3860         fhandle_t *fhp;
3861         struct nfsm_info info;
3862
3863         info.mrep = nfsd->nd_mrep;
3864         info.mreq = NULL;
3865         info.md = nfsd->nd_md;
3866         info.dpos = nfsd->nd_dpos;
3867
3868         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3869         fhp = &nfh.fh_generic;
3870         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3871         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3872                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3873         if (error) {
3874                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3875                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3876                 error = 0;
3877                 goto nfsmout;
3878         }
3879         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3880         if (!error)
3881                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3882         if (!error)
3883                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3884         if (!error)
3885                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3886         getret = VOP_GETATTR(vp, &at);
3887         vput(vp);
3888         vp = NULL;
3889         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3890                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3891                               &error));
3892         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3893         if (error) {
3894                 error = 0;
3895                 goto nfsmout;
3896         }
3897         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3898
3899         pc->pc_linkmax = txdr_unsigned(linkmax);
3900         pc->pc_namemax = txdr_unsigned(namemax);
3901         pc->pc_notrunc = txdr_unsigned(notrunc);
3902         pc->pc_chownrestricted = txdr_unsigned(chownres);
3903
3904         /*
3905          * These should probably be supported by VOP_PATHCONF(), but
3906          * until msdosfs is exportable (why would you want to?), the
3907          * Unix defaults should be ok.
3908          */
3909         pc->pc_caseinsensitive = nfs_false;
3910         pc->pc_casepreserving = nfs_true;
3911 nfsmout:
3912         *mrq = info.mreq;
3913         if (vp) 
3914                 vput(vp);
3915         return(error);
3916 }
3917
3918 /*
3919  * Null operation, used by clients to ping server
3920  */
3921 /* ARGSUSED */
3922 int
3923 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3924            struct thread *td, struct mbuf **mrq)
3925 {
3926         struct nfsm_info info;
3927         int error = NFSERR_RETVOID;
3928
3929         info.mrep = nfsd->nd_mrep;
3930         info.mreq = NULL;
3931
3932         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3933         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3934 nfsmout:
3935         *mrq = info.mreq;
3936         return (error);
3937 }
3938
3939 /*
3940  * No operation, used for obsolete procedures
3941  */
3942 /* ARGSUSED */
3943 int
3944 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3945            struct thread *td, struct mbuf **mrq)
3946 {
3947         struct nfsm_info info;
3948         int error;
3949
3950         info.mrep = nfsd->nd_mrep;
3951         info.mreq = NULL;
3952
3953         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3954         if (nfsd->nd_repstat)
3955                 error = nfsd->nd_repstat;
3956         else
3957                 error = EPROCUNAVAIL;
3958         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3959         error = 0;
3960 nfsmout:
3961         *mrq = info.mreq;
3962         return (error);
3963 }
3964
3965 /*
3966  * Perform access checking for vnodes obtained from file handles that would
3967  * refer to files already opened by a Unix client. You cannot just use
3968  * vn_writechk() and VOP_ACCESS() for two reasons.
3969  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3970  * 2 - The owner is to be given access irrespective of mode bits for some
3971  *     operations, so that processes that chmod after opening a file don't
3972  *     break. I don't like this because it opens a security hole, but since
3973  *     the nfs server opens a security hole the size of a barn door anyhow,
3974  *     what the heck.
3975  *
3976  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3977  * will return EPERM instead of EACCESS. EPERM is always an error.
3978  */
3979 static int
3980 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3981              int rdonly, struct thread *td, int override)
3982 {
3983         struct vattr vattr;
3984         int error;
3985
3986         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3987         if (flags & VWRITE) {
3988                 /* Just vn_writechk() changed to check rdonly */
3989                 /*
3990                  * Disallow write attempts on read-only file systems;
3991                  * unless the file is a socket or a block or character
3992                  * device resident on the file system.
3993                  */
3994                 if (rdonly || 
3995                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3996                         switch (vp->v_type) {
3997                         case VREG:
3998                         case VDIR:
3999                         case VLNK:
4000                                 return (EROFS);
4001                         default:
4002                                 break;
4003                         }
4004                 }
4005                 /*
4006                  * If there's shared text associated with
4007                  * the inode, we can't allow writing.
4008                  */
4009                 if (vp->v_flag & VTEXT)
4010                         return (ETXTBSY);
4011         }
4012         error = VOP_GETATTR(vp, &vattr);
4013         if (error)
4014                 return (error);
4015         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
4016         /*
4017          * Allow certain operations for the owner (reads and writes
4018          * on files that are already open).
4019          */
4020         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4021                 error = 0;
4022         return error;
4023 }
4024 #endif /* NFS_NOSERVER */
4025