NFS server: Import various fixes from FreeBSD.
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  */
39
40 /*
41  * nfs version 2 and 3 server calls to vnode ops
42  * - these routines generally have 3 phases
43  *   1 - break down and validate rpc request in mbuf list
44  *   2 - do the vnode ops for the request
45  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
46  *   3 - build the rpc reply in an mbuf list
47  *   nb:
48  *      - do not mix the phases, since the nfsm_?? macros can return failures
49  *        on a bad rpc or similar and do not do any vrele() or vput()'s
50  *
51  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
52  *      error number iff error != 0 whereas
53  *      returning an error from the server function implies a fatal error
54  *      such as a badly constructed rpc request that should be dropped without
55  *      a reply.
56  *      For Version 3, nfsm_reply() does not return for the error case, since
57  *      most version 3 rpcs return more than the status for error cases.
58  *
59  * Other notes:
60  *      Warning: always pay careful attention to resource cleanup on return
61  *      and note that nfsm_*() macros can terminate a procedure on certain
62  *      errors.
63  */
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/proc.h>
68 #include <sys/priv.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87
88 #include <sys/buf2.h>
89
90 #include <sys/thread2.h>
91
92 #include "nfsproto.h"
93 #include "rpcv2.h"
94 #include "nfs.h"
95 #include "xdr_subs.h"
96 #include "nfsm_subs.h"
97
98 #ifdef NFSRV_DEBUG
99 #define nfsdbprintf(info)       kprintf info
100 #else
101 #define nfsdbprintf(info)
102 #endif
103
104 #define MAX_COMMIT_COUNT        (1024 * 1024)
105
106 #define NUM_HEURISTIC           1031
107 #define NHUSE_INIT              64
108 #define NHUSE_INC               16
109 #define NHUSE_MAX               2048
110
111 static struct nfsheur {
112     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
113     off_t nh_nextoff;           /* next offset for sequential detection */
114     int nh_use;                 /* use count for selection */
115     int nh_seqcount;            /* heuristic */
116 } nfsheur[NUM_HEURISTIC];
117
118 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
119                       NFFIFO, NFNON };
120 #ifndef NFS_NOSERVER 
121 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
122                       NFCHR, NFNON };
123
124 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
125 int nfsrvw_procrastinate_v3 = 0;
126
127 static struct timespec  nfsver;
128
129 SYSCTL_DECL(_vfs_nfs);
130
131 int nfs_async;
132 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
133     "Enable unstable and fast writes");
134 static int nfs_commit_blks;
135 static int nfs_commit_miss;
136 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
137     "Number of committed blocks");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
139     "Number of nfs blocks committed from dirty buffers");
140
141 static int nfsrv_access (struct mount *, struct vnode *, int,
142                         struct ucred *, int, struct thread *, int);
143 static void nfsrvw_coalesce (struct nfsrv_descript *,
144                 struct nfsrv_descript *);
145
146 /*
147  * Heuristic to detect sequential operation.
148  */
149 static struct nfsheur *
150 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp, int writeop)
151 {
152         struct nfsheur *nh;
153         int hi, try;
154
155         /* Locate best candidate */
156         try = 32;
157         hi = ((int)(vm_offset_t) vp / sizeof(struct vnode)) % NUM_HEURISTIC;
158         nh = &nfsheur[hi];
159
160         while (try--) {
161                 if (nfsheur[hi].nh_vp == vp) {
162                         nh = &nfsheur[hi];
163                         break;
164                 }
165                 if (nfsheur[hi].nh_use > 0)
166                         --nfsheur[hi].nh_use;
167                 hi = (hi + 1) % NUM_HEURISTIC;
168                 if (nfsheur[hi].nh_use < nh->nh_use)
169                         nh = &nfsheur[hi];
170         }
171
172         /* Initialize hint if this is a new file */
173         if (nh->nh_vp != vp) {
174                 nh->nh_vp = vp;
175                 nh->nh_nextoff = uio->uio_offset;
176                 nh->nh_use = NHUSE_INIT;
177                 if (uio->uio_offset == 0)
178                         nh->nh_seqcount = 4;
179                 else
180                         nh->nh_seqcount = 1;
181         }
182
183         /*
184          * Calculate heuristic
185          *
186          * See vfs_vnops.c:sequential_heuristic().
187          */
188         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
189             uio->uio_offset == nh->nh_nextoff) {
190                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
191                 if (nh->nh_seqcount > IO_SEQMAX)
192                         nh->nh_seqcount = IO_SEQMAX;
193         } else if (nh->nh_seqcount > 1) {
194                 nh->nh_seqcount = 1;
195         } else {
196                 nh->nh_seqcount = 0;
197         }
198         nh->nh_use += NHUSE_INC;
199         if (nh->nh_use > NHUSE_MAX)
200                 nh->nh_use = NHUSE_MAX;
201         return (nh);
202 }
203
204 /*
205  * nfs v3 access service
206  */
207 int
208 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
209               struct thread *td, struct mbuf **mrq)
210 {
211         struct sockaddr *nam = nfsd->nd_nam;
212         struct ucred *cred = &nfsd->nd_cr;
213         struct vnode *vp = NULL;
214         struct mount *mp = NULL;
215         nfsfh_t nfh;
216         fhandle_t *fhp;
217         int error = 0, rdonly, getret;
218         struct vattr vattr, *vap = &vattr;
219         u_long testmode, nfsmode;
220         struct nfsm_info info;
221         u_int32_t *tl;
222
223         info.dpos = nfsd->nd_dpos;
224         info.md = nfsd->nd_md;
225         info.mrep = nfsd->nd_mrep;
226         info.mreq = NULL;
227
228         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
229         fhp = &nfh.fh_generic;
230         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
231         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
232         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
233             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
234         if (error) {
235                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
236                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
237                 error = 0;
238                 goto nfsmout;
239         }
240         nfsmode = fxdr_unsigned(u_int32_t, *tl);
241         if ((nfsmode & NFSV3ACCESS_READ) &&
242                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
243                 nfsmode &= ~NFSV3ACCESS_READ;
244         if (vp->v_type == VDIR)
245                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
246                         NFSV3ACCESS_DELETE);
247         else
248                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
249         if ((nfsmode & testmode) &&
250                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
251                 nfsmode &= ~testmode;
252         if (vp->v_type == VDIR)
253                 testmode = NFSV3ACCESS_LOOKUP;
254         else
255                 testmode = NFSV3ACCESS_EXECUTE;
256         if ((nfsmode & testmode) &&
257                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
258                 nfsmode &= ~testmode;
259         getret = VOP_GETATTR(vp, vap);
260         vput(vp);
261         vp = NULL;
262         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
263                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
264         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
265         tl = nfsm_build(&info, NFSX_UNSIGNED);
266         *tl = txdr_unsigned(nfsmode);
267 nfsmout:
268         *mrq = info.mreq;
269         if (vp)
270                 vput(vp);
271         return(error);
272 }
273
274 /*
275  * nfs getattr service
276  */
277 int
278 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
279               struct thread *td, struct mbuf **mrq)
280 {
281         struct sockaddr *nam = nfsd->nd_nam;
282         struct ucred *cred = &nfsd->nd_cr;
283         struct nfs_fattr *fp;
284         struct vattr va;
285         struct vattr *vap = &va;
286         struct vnode *vp = NULL;
287         struct mount *mp = NULL;
288         nfsfh_t nfh;
289         fhandle_t *fhp;
290         int error = 0, rdonly;
291         struct nfsm_info info;
292
293         info.mrep = nfsd->nd_mrep;
294         info.md = nfsd->nd_md;
295         info.dpos = nfsd->nd_dpos;
296         info.mreq = NULL;
297
298         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
299         fhp = &nfh.fh_generic;
300         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
301         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
302                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
303         if (error) {
304                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
305                 error = 0;
306                 goto nfsmout;
307         }
308         error = VOP_GETATTR(vp, vap);
309         vput(vp);
310         vp = NULL;
311         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
312                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
313         if (error) {
314                 error = 0;
315                 goto nfsmout;
316         }
317         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
318         nfsm_srvfattr(nfsd, vap, fp);
319         /* fall through */
320
321 nfsmout:
322         *mrq = info.mreq;
323         if (vp)
324                 vput(vp);
325         return(error);
326 }
327
328 /*
329  * nfs setattr service
330  */
331 int
332 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
333               struct thread *td, struct mbuf **mrq)
334 {
335         struct sockaddr *nam = nfsd->nd_nam;
336         struct ucred *cred = &nfsd->nd_cr;
337         struct vattr va, preat;
338         struct vattr *vap = &va;
339         struct nfsv2_sattr *sp;
340         struct nfs_fattr *fp;
341         struct vnode *vp = NULL;
342         struct mount *mp = NULL;
343         nfsfh_t nfh;
344         fhandle_t *fhp;
345         u_int32_t *tl;
346         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
347         int gcheck = 0;
348         struct timespec guard;
349         struct nfsm_info info;
350
351         info.mrep = nfsd->nd_mrep;
352         info.mreq = NULL;
353         info.md = nfsd->nd_md;
354         info.dpos = nfsd->nd_dpos;
355         info.v3 = (nfsd->nd_flag & ND_NFSV3);
356
357         guard.tv_sec = 0;       /* fix compiler warning */
358         guard.tv_nsec = 0;
359
360         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
361         fhp = &nfh.fh_generic;
362         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
363         VATTR_NULL(vap);
364         if (info.v3) {
365                 ERROROUT(nfsm_srvsattr(&info, vap));
366                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
367                 gcheck = fxdr_unsigned(int, *tl);
368                 if (gcheck) {
369                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
370                         fxdr_nfsv3time(tl, &guard);
371                 }
372         } else {
373                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
374                 /*
375                  * Nah nah nah nah na nah
376                  * There is a bug in the Sun client that puts 0xffff in the mode
377                  * field of sattr when it should put in 0xffffffff. The u_short
378                  * doesn't sign extend.
379                  * --> check the low order 2 bytes for 0xffff
380                  */
381                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
382                         vap->va_mode = nfstov_mode(sp->sa_mode);
383                 if (sp->sa_uid != nfs_xdrneg1)
384                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
385                 if (sp->sa_gid != nfs_xdrneg1)
386                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
387                 if (sp->sa_size != nfs_xdrneg1)
388                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
389                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
390 #ifdef notyet
391                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
392 #else
393                         vap->va_atime.tv_sec =
394                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
395                         vap->va_atime.tv_nsec = 0;
396 #endif
397                 }
398                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
399                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
400
401         }
402
403         /*
404          * Now that we have all the fields, lets do it.
405          */
406         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
407                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
408         if (error) {
409                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
410                                       2 * NFSX_UNSIGNED, &error));
411                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
412                                  postat_ret, vap);
413                 error = 0;
414                 goto nfsmout;
415         }
416
417         /*
418          * vp now an active resource, pay careful attention to cleanup
419          */
420
421         if (info.v3) {
422                 error = preat_ret = VOP_GETATTR(vp, &preat);
423                 if (!error && gcheck &&
424                         (preat.va_ctime.tv_sec != guard.tv_sec ||
425                          preat.va_ctime.tv_nsec != guard.tv_nsec))
426                         error = NFSERR_NOT_SYNC;
427                 if (error) {
428                         vput(vp);
429                         vp = NULL;
430                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
431                                               NFSX_WCCDATA(info.v3), &error));
432                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
433                                          postat_ret, vap);
434                         error = 0;
435                         goto nfsmout;
436                 }
437         }
438
439         /*
440          * If the size is being changed write acces is required, otherwise
441          * just check for a read only file system.
442          */
443         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
444                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
445                         error = EROFS;
446                         goto out;
447                 }
448         } else {
449                 if (vp->v_type == VDIR) {
450                         error = EISDIR;
451                         goto out;
452                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
453                             td, 0)) != 0){ 
454                         goto out;
455                 }
456         }
457         error = VOP_SETATTR(vp, vap, cred);
458         postat_ret = VOP_GETATTR(vp, vap);
459         if (!error)
460                 error = postat_ret;
461 out:
462         vput(vp);
463         vp = NULL;
464         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
465                    NFSX_WCCORFATTR(info.v3), &error));
466         if (info.v3) {
467                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
468                                  postat_ret, vap);
469                 error = 0;
470                 goto nfsmout;
471         } else {
472                 fp = nfsm_build(&info, NFSX_V2FATTR);
473                 nfsm_srvfattr(nfsd, vap, fp);
474         }
475         /* fall through */
476
477 nfsmout:
478         *mrq = info.mreq;
479         if (vp)
480                 vput(vp);
481         return(error);
482 }
483
484 /*
485  * nfs lookup rpc
486  */
487 int
488 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
489              struct thread *td, struct mbuf **mrq)
490 {
491         struct sockaddr *nam = nfsd->nd_nam;
492         struct ucred *cred = &nfsd->nd_cr;
493         struct nfs_fattr *fp;
494         struct nlookupdata nd;
495         struct vnode *vp;
496         struct vnode *dirp;
497         struct nchandle nch;
498         nfsfh_t nfh;
499         fhandle_t *fhp;
500         int error = 0, len, dirattr_ret = 1;
501         int pubflag;
502         struct vattr va, dirattr, *vap = &va;
503         struct nfsm_info info;
504
505         info.mrep = nfsd->nd_mrep;
506         info.mreq = NULL;
507         info.md = nfsd->nd_md;
508         info.dpos = nfsd->nd_dpos;
509         info.v3 = (nfsd->nd_flag & ND_NFSV3);
510
511         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
512         nlookup_zero(&nd);
513         dirp = NULL;
514         vp = NULL;
515
516         fhp = &nfh.fh_generic;
517         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
518         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
519
520         pubflag = nfs_ispublicfh(fhp);
521
522         error = nfs_namei(&nd, cred, 0, NULL, &vp,
523                 fhp, len, slp, nam, &info.md, &info.dpos,
524                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
525
526         /*
527          * namei failure, only dirp to cleanup.  Clear out garbarge from
528          * structure in case macros jump to nfsmout.
529          */
530
531         if (error) {
532                 if (dirp) {
533                         if (info.v3)
534                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
535                         vrele(dirp);
536                         dirp = NULL;
537                 }
538                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
539                                       NFSX_POSTOPATTR(info.v3), &error));
540                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
541                 error = 0;
542                 goto nfsmout;
543         }
544
545         /*
546          * Locate index file for public filehandle
547          *
548          * error is 0 on entry and 0 on exit from this block.
549          */
550
551         if (pubflag) {
552                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
553                         /*
554                          * Setup call to lookup() to see if we can find
555                          * the index file. Arguably, this doesn't belong
556                          * in a kernel.. Ugh.  If an error occurs, do not
557                          * try to install an index file and then clear the
558                          * error.
559                          *
560                          * When we replace nd with ind and redirect ndp,
561                          * maintenance of ni_startdir and ni_vp shift to
562                          * ind and we have to clean them up in the old nd.
563                          * However, the cnd resource continues to be maintained
564                          * via the original nd.  Confused?  You aren't alone!
565                          */
566                         vn_unlock(vp);
567                         cache_copy(&nd.nl_nch, &nch);
568                         nlookup_done(&nd);
569                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
570                                                 UIO_SYSSPACE, 0, cred, &nch);
571                         cache_drop(&nch);
572                         if (error == 0)
573                                 error = nlookup(&nd);
574
575                         if (error == 0) {
576                                 /*
577                                  * Found an index file. Get rid of
578                                  * the old references.  transfer vp and
579                                  * load up the new vp.  Fortunately we do
580                                  * not have to deal with dvp, that would be
581                                  * a huge mess.
582                                  */
583                                 if (dirp)       
584                                         vrele(dirp);
585                                 dirp = vp;
586                                 vp = NULL;
587                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
588                                                         LK_EXCLUSIVE, &vp);
589                                 KKASSERT(error == 0);
590                         }
591                         error = 0;
592                 }
593                 /*
594                  * If the public filehandle was used, check that this lookup
595                  * didn't result in a filehandle outside the publicly exported
596                  * filesystem.  We clear the poor vp here to avoid lockups due
597                  * to NFS I/O.
598                  */
599
600                 if (vp->v_mount != nfs_pub.np_mount) {
601                         vput(vp);
602                         vp = NULL;
603                         error = EPERM;
604                 }
605         }
606
607         if (dirp) {
608                 if (info.v3)
609                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
610                 vrele(dirp);
611                 dirp = NULL;
612         }
613
614         /*
615          * Resources at this point:
616          *      ndp->ni_vp      may not be NULL
617          *
618          */
619
620         if (error) {
621                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
622                                       NFSX_POSTOPATTR(info.v3), &error));
623                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
624                 error = 0;
625                 goto nfsmout;
626         }
627
628         /*
629          * Clear out some resources prior to potentially blocking.  This
630          * is not as critical as ni_dvp resources in other routines, but
631          * it helps.
632          */
633         nlookup_done(&nd);
634
635         /*
636          * Get underlying attribute, then release remaining resources ( for
637          * the same potential blocking reason ) and reply.
638          */
639         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
640         error = VFS_VPTOFH(vp, &fhp->fh_fid);
641         if (!error)
642                 error = VOP_GETATTR(vp, vap);
643
644         vput(vp);
645         vp = NULL;
646         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
647                               NFSX_SRVFH(info.v3) +
648                               NFSX_POSTOPORFATTR(info.v3) +
649                               NFSX_POSTOPATTR(info.v3),
650                               &error));
651         if (error) {
652                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
653                 error = 0;
654                 goto nfsmout;
655         }
656         nfsm_srvfhtom(&info, fhp);
657         if (info.v3) {
658                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
659                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
660         } else {
661                 fp = nfsm_build(&info, NFSX_V2FATTR);
662                 nfsm_srvfattr(nfsd, vap, fp);
663         }
664
665 nfsmout:
666         *mrq = info.mreq;
667         if (dirp)
668                 vrele(dirp);
669         nlookup_done(&nd);              /* may be called twice */
670         if (vp)
671                 vput(vp);
672         return (error);
673 }
674
675 /*
676  * nfs readlink service
677  */
678 int
679 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
680                struct thread *td, struct mbuf **mrq)
681 {
682         struct sockaddr *nam = nfsd->nd_nam;
683         struct ucred *cred = &nfsd->nd_cr;
684         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
685         struct iovec *ivp = iv;
686         u_int32_t *tl;
687         int error = 0, rdonly, i, tlen, len, getret;
688         struct mbuf *mp1, *mp2, *mp3;
689         struct vnode *vp = NULL;
690         struct mount *mp = NULL;
691         struct vattr attr;
692         nfsfh_t nfh;
693         fhandle_t *fhp;
694         struct uio io, *uiop = &io;
695         struct nfsm_info info;
696
697         info.mrep = nfsd->nd_mrep;
698         info.mreq = NULL;
699         info.md = nfsd->nd_md;
700         info.dpos = nfsd->nd_dpos;
701         info.v3 = (nfsd->nd_flag & ND_NFSV3);
702
703         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
704 #ifndef nolint
705         mp2 = NULL;
706 #endif
707         mp3 = NULL;
708         fhp = &nfh.fh_generic;
709         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
710         len = 0;
711         i = 0;
712         while (len < NFS_MAXPATHLEN) {
713                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
714                 mp1->m_len = MCLBYTES;
715                 if (len == 0)
716                         mp3 = mp2 = mp1;
717                 else {
718                         mp2->m_next = mp1;
719                         mp2 = mp1;
720                 }
721                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
722                         mp1->m_len = NFS_MAXPATHLEN-len;
723                         len = NFS_MAXPATHLEN;
724                 } else
725                         len += mp1->m_len;
726                 ivp->iov_base = mtod(mp1, caddr_t);
727                 ivp->iov_len = mp1->m_len;
728                 i++;
729                 ivp++;
730         }
731         uiop->uio_iov = iv;
732         uiop->uio_iovcnt = i;
733         uiop->uio_offset = 0;
734         uiop->uio_resid = len;
735         uiop->uio_rw = UIO_READ;
736         uiop->uio_segflg = UIO_SYSSPACE;
737         uiop->uio_td = NULL;
738         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
739                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
740         if (error) {
741                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
742                                       2 * NFSX_UNSIGNED, &error));
743                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
744                 error = 0;
745                 goto nfsmout;
746         }
747         if (vp->v_type != VLNK) {
748                 if (info.v3)
749                         error = EINVAL;
750                 else
751                         error = ENXIO;
752                 goto out;
753         }
754         error = VOP_READLINK(vp, uiop, cred);
755 out:
756         getret = VOP_GETATTR(vp, &attr);
757         vput(vp);
758         vp = NULL;
759         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
760                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
761                              &error));
762         if (info.v3) {
763                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
764                 if (error) {
765                         error = 0;
766                         goto nfsmout;
767                 }
768         }
769         if (uiop->uio_resid > 0) {
770                 len -= uiop->uio_resid;
771                 tlen = nfsm_rndup(len);
772                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
773         }
774         tl = nfsm_build(&info, NFSX_UNSIGNED);
775         *tl = txdr_unsigned(len);
776         info.mb->m_next = mp3;
777         mp3 = NULL;
778 nfsmout:
779         *mrq = info.mreq;
780         if (mp3)
781                 m_freem(mp3);
782         if (vp)
783                 vput(vp);
784         return(error);
785 }
786
787 /*
788  * nfs read service
789  */
790 int
791 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
792            struct thread *td, struct mbuf **mrq)
793 {
794         struct nfsm_info info;
795         struct sockaddr *nam = nfsd->nd_nam;
796         struct ucred *cred = &nfsd->nd_cr;
797         struct iovec *iv;
798         struct iovec *iv2;
799         struct mbuf *m;
800         struct nfs_fattr *fp;
801         u_int32_t *tl;
802         int i;
803         int reqlen;
804         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
805         struct mbuf *m2;
806         struct vnode *vp = NULL;
807         struct mount *mp = NULL;
808         nfsfh_t nfh;
809         fhandle_t *fhp;
810         struct uio io, *uiop = &io;
811         struct vattr va, *vap = &va;
812         struct nfsheur *nh;
813         off_t off;
814         int ioflag = 0;
815
816         info.mrep = nfsd->nd_mrep;
817         info.mreq = NULL;
818         info.md = nfsd->nd_md;
819         info.dpos = nfsd->nd_dpos;
820         info.v3 = (nfsd->nd_flag & ND_NFSV3);
821
822         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
823         fhp = &nfh.fh_generic;
824         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
825         if (info.v3) {
826                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
827                 off = fxdr_hyper(tl);
828         } else {
829                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
830                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
831         }
832         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
833                                             NFS_SRVMAXDATA(nfsd), &error));
834
835         /*
836          * Reference vp.  If an error occurs, vp will be invalid, but we
837          * have to NULL it just in case.  The macros might goto nfsmout
838          * as well.
839          */
840
841         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
842                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
843         if (error) {
844                 vp = NULL;
845                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
846                                       2 * NFSX_UNSIGNED, &error));
847                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
848                 error = 0;
849                 goto nfsmout;
850         }
851
852         if (vp->v_type != VREG) {
853                 if (info.v3)
854                         error = EINVAL;
855                 else
856                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
857         }
858         if (!error) {
859             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
860                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
861         }
862         getret = VOP_GETATTR(vp, vap);
863         if (!error)
864                 error = getret;
865         if (error) {
866                 vput(vp);
867                 vp = NULL;
868                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
869                                       NFSX_POSTOPATTR(info.v3), &error));
870                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
871                 error = 0;
872                 goto nfsmout;
873         }
874
875         /*
876          * Calculate byte count to read
877          */
878
879         if (off >= vap->va_size)
880                 cnt = 0;
881         else if ((off + reqlen) > vap->va_size)
882                 cnt = vap->va_size - off;
883         else
884                 cnt = reqlen;
885
886         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
887                               NFSX_POSTOPORFATTR(info.v3) +
888                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
889                               &error));
890         if (info.v3) {
891                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
892                 *tl++ = nfs_true;
893                 fp = (struct nfs_fattr *)tl;
894                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
895         } else {
896                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
897                 fp = (struct nfs_fattr *)tl;
898                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
899         }
900         len = left = nfsm_rndup(cnt);
901         if (cnt > 0) {
902                 /*
903                  * Generate the mbuf list with the uio_iov ref. to it.
904                  */
905                 i = 0;
906                 m = m2 = info.mb;
907                 while (left > 0) {
908                         siz = min(M_TRAILINGSPACE(m), left);
909                         if (siz > 0) {
910                                 left -= siz;
911                                 i++;
912                         }
913                         if (left > 0) {
914                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
915                                 m->m_len = 0;
916                                 m2->m_next = m;
917                                 m2 = m;
918                         }
919                 }
920                 iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
921                 uiop->uio_iov = iv2 = iv;
922                 m = info.mb;
923                 left = len;
924                 i = 0;
925                 while (left > 0) {
926                         if (m == NULL)
927                                 panic("nfsrv_read iov");
928                         siz = min(M_TRAILINGSPACE(m), left);
929                         if (siz > 0) {
930                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
931                                 iv->iov_len = siz;
932                                 m->m_len += siz;
933                                 left -= siz;
934                                 iv++;
935                                 i++;
936                         }
937                         m = m->m_next;
938                 }
939                 uiop->uio_iovcnt = i;
940                 uiop->uio_offset = off;
941                 uiop->uio_resid = len;
942                 uiop->uio_rw = UIO_READ;
943                 uiop->uio_segflg = UIO_SYSSPACE;
944                 nh = nfsrv_sequential_heuristic(uiop, vp, 0);
945                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
946                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
947                 if (error == 0) {
948                         off = uiop->uio_offset;
949                         nh->nh_nextoff = off;
950                 }
951                 kfree((caddr_t)iv2, M_TEMP);
952                 if (error || (getret = VOP_GETATTR(vp, vap))) {
953                         if (!error)
954                                 error = getret;
955                         m_freem(info.mreq);
956                         info.mreq = NULL;
957                         vput(vp);
958                         vp = NULL;
959                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
960                                               NFSX_POSTOPATTR(info.v3),
961                                               &error));
962                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
963                         error = 0;
964                         goto nfsmout;
965                 }
966         } else {
967                 uiop->uio_resid = 0;
968         }
969         vput(vp);
970         vp = NULL;
971         nfsm_srvfattr(nfsd, vap, fp);
972         tlen = len - uiop->uio_resid;
973         cnt = cnt < tlen ? cnt : tlen;
974         tlen = nfsm_rndup(cnt);
975         if (len != tlen || tlen != cnt)
976                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
977         if (info.v3) {
978                 *tl++ = txdr_unsigned(cnt);
979                 if (cnt < reqlen)
980                         *tl++ = nfs_true;
981                 else
982                         *tl++ = nfs_false;
983         }
984         *tl = txdr_unsigned(cnt);
985 nfsmout:
986         *mrq = info.mreq;
987         if (vp)
988                 vput(vp);
989         return(error);
990 }
991
992 /*
993  * nfs write service
994  */
995 int
996 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
997             struct thread *td, struct mbuf **mrq)
998 {
999         struct sockaddr *nam = nfsd->nd_nam;
1000         struct ucred *cred = &nfsd->nd_cr;
1001         struct iovec *ivp;
1002         int i, cnt;
1003         struct mbuf *mp1;
1004         struct nfs_fattr *fp;
1005         struct iovec *iv;
1006         struct vattr va, forat;
1007         struct vattr *vap = &va;
1008         u_int32_t *tl;
1009         int error = 0, rdonly, len, forat_ret = 1;
1010         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1011         int stable = NFSV3WRITE_FILESYNC;
1012         struct vnode *vp = NULL;
1013         struct mount *mp = NULL;
1014         struct nfsheur *nh;
1015         nfsfh_t nfh;
1016         fhandle_t *fhp;
1017         struct uio io, *uiop = &io;
1018         struct nfsm_info info;
1019         off_t off;
1020
1021         info.mrep = nfsd->nd_mrep;
1022         info.mreq = NULL;
1023         info.md = nfsd->nd_md;
1024         info.dpos = nfsd->nd_dpos;
1025         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1026
1027         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1028         if (info.mrep == NULL) {
1029                 error = 0;
1030                 goto nfsmout;
1031         }
1032         fhp = &nfh.fh_generic;
1033         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1034         if (info.v3) {
1035                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1036                 off = fxdr_hyper(tl);
1037                 tl += 3;
1038                 stable = fxdr_unsigned(int, *tl++);
1039         } else {
1040                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1041                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1042                 tl += 2;
1043                 if (nfs_async)
1044                         stable = NFSV3WRITE_UNSTABLE;
1045         }
1046         retlen = len = fxdr_unsigned(int32_t, *tl);
1047         cnt = i = 0;
1048
1049         /*
1050          * For NFS Version 2, it is not obvious what a write of zero length
1051          * should do, but I might as well be consistent with Version 3,
1052          * which is to return ok so long as there are no permission problems.
1053          */
1054         if (len > 0) {
1055             zeroing = 1;
1056             mp1 = info.mrep;
1057             while (mp1) {
1058                 if (mp1 == info.md) {
1059                         zeroing = 0;
1060                         adjust = info.dpos - mtod(mp1, caddr_t);
1061                         mp1->m_len -= adjust;
1062                         if (mp1->m_len > 0 && adjust > 0)
1063                                 mp1->m_data += adjust;
1064                 }
1065                 if (zeroing)
1066                         mp1->m_len = 0;
1067                 else if (mp1->m_len > 0) {
1068                         i += mp1->m_len;
1069                         if (i > len) {
1070                                 mp1->m_len -= (i - len);
1071                                 zeroing = 1;
1072                         }
1073                         if (mp1->m_len > 0)
1074                                 cnt++;
1075                 }
1076                 mp1 = mp1->m_next;
1077             }
1078         }
1079         if (len > NFS_MAXDATA || len < 0 || i < len) {
1080                 error = EIO;
1081                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1082                                       2 * NFSX_UNSIGNED, &error));
1083                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1084                                  aftat_ret, vap);
1085                 error = 0;
1086                 goto nfsmout;
1087         }
1088         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1089                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1090         if (error) {
1091                 vp = NULL;
1092                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1093                                       2 * NFSX_UNSIGNED, &error));
1094                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1095                                  aftat_ret, vap);
1096                 error = 0;
1097                 goto nfsmout;
1098         }
1099         if (info.v3)
1100                 forat_ret = VOP_GETATTR(vp, &forat);
1101         if (vp->v_type != VREG) {
1102                 if (info.v3)
1103                         error = EINVAL;
1104                 else
1105                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1106         }
1107         if (!error) {
1108                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1109         }
1110         if (error) {
1111                 vput(vp);
1112                 vp = NULL;
1113                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1114                                       NFSX_WCCDATA(info.v3), &error));
1115                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1116                                  aftat_ret, vap);
1117                 error = 0;
1118                 goto nfsmout;
1119         }
1120
1121         if (len > 0) {
1122             ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1123             uiop->uio_iov = iv = ivp;
1124             uiop->uio_iovcnt = cnt;
1125             mp1 = info.mrep;
1126             while (mp1) {
1127                 if (mp1->m_len > 0) {
1128                         ivp->iov_base = mtod(mp1, caddr_t);
1129                         ivp->iov_len = mp1->m_len;
1130                         ivp++;
1131                 }
1132                 mp1 = mp1->m_next;
1133             }
1134
1135             /*
1136              * XXX
1137              * The IO_METASYNC flag indicates that all metadata (and not just
1138              * enough to ensure data integrity) mus be written to stable storage
1139              * synchronously.
1140              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1141              */
1142             if (stable == NFSV3WRITE_UNSTABLE)
1143                 ioflags = IO_NODELOCKED;
1144             else if (stable == NFSV3WRITE_DATASYNC)
1145                 ioflags = (IO_SYNC | IO_NODELOCKED);
1146             else
1147                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1148             uiop->uio_resid = len;
1149             uiop->uio_rw = UIO_WRITE;
1150             uiop->uio_segflg = UIO_SYSSPACE;
1151             uiop->uio_td = NULL;
1152             uiop->uio_offset = off;
1153             nh = nfsrv_sequential_heuristic(uiop, vp, 1);
1154             ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1155             error = VOP_WRITE(vp, uiop, ioflags, cred);
1156             if (error == 0)
1157                 nh->nh_nextoff = uiop->uio_offset;
1158             nfsstats.srvvop_writes++;
1159             kfree((caddr_t)iv, M_TEMP);
1160         }
1161         aftat_ret = VOP_GETATTR(vp, vap);
1162         vput(vp);
1163         vp = NULL;
1164         if (!error)
1165                 error = aftat_ret;
1166         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1167                               NFSX_PREOPATTR(info.v3) +
1168                               NFSX_POSTOPORFATTR(info.v3) +
1169                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1170                               &error));
1171         if (info.v3) {
1172                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1173                                  aftat_ret, vap);
1174                 if (error) {
1175                         error = 0;
1176                         goto nfsmout;
1177                 }
1178                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1179                 *tl++ = txdr_unsigned(retlen);
1180                 /*
1181                  * If nfs_async is set, then pretend the write was FILESYNC.
1182                  */
1183                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1184                         *tl++ = txdr_unsigned(stable);
1185                 else
1186                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1187                 /*
1188                  * Actually, there is no need to txdr these fields,
1189                  * but it may make the values more human readable,
1190                  * for debugging purposes.
1191                  */
1192                 if (nfsver.tv_sec == 0)
1193                         nfsver = boottime;
1194                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1195                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1196         } else {
1197                 fp = nfsm_build(&info, NFSX_V2FATTR);
1198                 nfsm_srvfattr(nfsd, vap, fp);
1199         }
1200 nfsmout:
1201         *mrq = info.mreq;
1202         if (vp)
1203                 vput(vp);
1204         return(error);
1205 }
1206
1207 /*
1208  * NFS write service with write gathering support. Called when
1209  * nfsrvw_procrastinate > 0.
1210  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1211  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1212  * Jan. 1994.
1213  */
1214 int
1215 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1216                   struct thread *td, struct mbuf **mrq)
1217 {
1218         struct iovec *ivp;
1219         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1220         struct nfs_fattr *fp;
1221         int i;
1222         struct iovec *iov;
1223         struct nfsrvw_delayhash *wpp;
1224         struct ucred *cred;
1225         struct vattr va, forat;
1226         u_int32_t *tl;
1227         int error = 0, rdonly, len, forat_ret = 1;
1228         int ioflags, aftat_ret = 1, adjust, zeroing;
1229         struct mbuf *mp1;
1230         struct vnode *vp = NULL;
1231         struct mount *mp = NULL;
1232         struct uio io, *uiop = &io;
1233         u_quad_t cur_usec;
1234         struct nfsm_info info;
1235
1236         info.mreq = NULL;
1237
1238         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1239 #ifndef nolint
1240         i = 0;
1241         len = 0;
1242 #endif
1243         if (*ndp) {
1244             nfsd = *ndp;
1245             *ndp = NULL;
1246             info.mrep = nfsd->nd_mrep;
1247             info.mreq = NULL;
1248             info.md = nfsd->nd_md;
1249             info.dpos = nfsd->nd_dpos;
1250             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1251             cred = &nfsd->nd_cr;
1252             LIST_INIT(&nfsd->nd_coalesce);
1253             nfsd->nd_mreq = NULL;
1254             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1255             cur_usec = nfs_curusec();
1256             nfsd->nd_time = cur_usec +
1257                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1258     
1259             /*
1260              * Now, get the write header..
1261              */
1262             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1263             if (info.v3) {
1264                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1265                 nfsd->nd_off = fxdr_hyper(tl);
1266                 tl += 3;
1267                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1268             } else {
1269                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1270                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1271                 tl += 2;
1272                 if (nfs_async)
1273                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1274             }
1275             len = fxdr_unsigned(int32_t, *tl);
1276             nfsd->nd_len = len;
1277             nfsd->nd_eoff = nfsd->nd_off + len;
1278     
1279             /*
1280              * Trim the header out of the mbuf list and trim off any trailing
1281              * junk so that the mbuf list has only the write data.
1282              */
1283             zeroing = 1;
1284             i = 0;
1285             mp1 = info.mrep;
1286             while (mp1) {
1287                 if (mp1 == info.md) {
1288                     zeroing = 0;
1289                     adjust = info.dpos - mtod(mp1, caddr_t);
1290                     mp1->m_len -= adjust;
1291                     if (mp1->m_len > 0 && adjust > 0)
1292                         mp1->m_data += adjust;
1293                 }
1294                 if (zeroing)
1295                     mp1->m_len = 0;
1296                 else {
1297                     i += mp1->m_len;
1298                     if (i > len) {
1299                         mp1->m_len -= (i - len);
1300                         zeroing = 1;
1301                     }
1302                 }
1303                 mp1 = mp1->m_next;
1304             }
1305             if (len > NFS_MAXDATA || len < 0  || i < len) {
1306 nfsmout:
1307                 m_freem(info.mrep);
1308                 info.mrep = NULL;
1309                 error = EIO;
1310                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1311                 if (info.v3) {
1312                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1313                                      aftat_ret, &va);
1314                 }
1315                 nfsd->nd_mreq = info.mreq;
1316                 nfsd->nd_mrep = NULL;
1317                 nfsd->nd_time = 0;
1318             }
1319     
1320             /*
1321              * Add this entry to the hash and time queues.
1322              */
1323             owp = NULL;
1324             wp = slp->ns_tq.lh_first;
1325             while (wp && wp->nd_time < nfsd->nd_time) {
1326                 owp = wp;
1327                 wp = wp->nd_tq.le_next;
1328             }
1329             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1330             if (owp) {
1331                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1332             } else {
1333                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1334             }
1335             if (nfsd->nd_mrep) {
1336                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1337                 owp = NULL;
1338                 wp = wpp->lh_first;
1339                 while (wp &&
1340                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1341                     owp = wp;
1342                     wp = wp->nd_hash.le_next;
1343                 }
1344                 while (wp && wp->nd_off < nfsd->nd_off &&
1345                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1346                     owp = wp;
1347                     wp = wp->nd_hash.le_next;
1348                 }
1349                 if (owp) {
1350                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1351
1352                     /*
1353                      * Search the hash list for overlapping entries and
1354                      * coalesce.
1355                      */
1356                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1357                         wp = nfsd->nd_hash.le_next;
1358                         if (NFSW_SAMECRED(owp, nfsd))
1359                             nfsrvw_coalesce(owp, nfsd);
1360                     }
1361                 } else {
1362                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1363                 }
1364             }
1365         }
1366     
1367         /*
1368          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1369          * and generate the associated reply mbuf list(s).
1370          */
1371 loop1:
1372         cur_usec = nfs_curusec();
1373         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1374                 owp = nfsd->nd_tq.le_next;
1375                 if (nfsd->nd_time > cur_usec)
1376                     break;
1377                 if (nfsd->nd_mreq)
1378                     continue;
1379                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1380                 LIST_REMOVE(nfsd, nd_tq);
1381                 LIST_REMOVE(nfsd, nd_hash);
1382                 info.mrep = nfsd->nd_mrep;
1383                 info.mreq = NULL;
1384                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1385                 nfsd->nd_mrep = NULL;
1386                 cred = &nfsd->nd_cr;
1387                 forat_ret = aftat_ret = 1;
1388                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1389                                      nfsd->nd_nam, &rdonly,
1390                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1391                 if (!error) {
1392                     if (info.v3)
1393                         forat_ret = VOP_GETATTR(vp, &forat);
1394                     if (vp->v_type != VREG) {
1395                         if (info.v3)
1396                             error = EINVAL;
1397                         else
1398                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1399                     }
1400                 } else {
1401                     vp = NULL;
1402                 }
1403                 if (!error) {
1404                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1405                 }
1406     
1407                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1408                     ioflags = IO_NODELOCKED;
1409                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1410                     ioflags = (IO_SYNC | IO_NODELOCKED);
1411                 else
1412                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1413                 uiop->uio_rw = UIO_WRITE;
1414                 uiop->uio_segflg = UIO_SYSSPACE;
1415                 uiop->uio_td = NULL;
1416                 uiop->uio_offset = nfsd->nd_off;
1417                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1418                 if (uiop->uio_resid > 0) {
1419                     mp1 = info.mrep;
1420                     i = 0;
1421                     while (mp1) {
1422                         if (mp1->m_len > 0)
1423                             i++;
1424                         mp1 = mp1->m_next;
1425                     }
1426                     uiop->uio_iovcnt = i;
1427                     iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1428                     uiop->uio_iov = ivp = iov;
1429                     mp1 = info.mrep;
1430                     while (mp1) {
1431                         if (mp1->m_len > 0) {
1432                             ivp->iov_base = mtod(mp1, caddr_t);
1433                             ivp->iov_len = mp1->m_len;
1434                             ivp++;
1435                         }
1436                         mp1 = mp1->m_next;
1437                     }
1438                     if (!error) {
1439                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1440                         nfsstats.srvvop_writes++;
1441                     }
1442                     kfree((caddr_t)iov, M_TEMP);
1443                 }
1444                 m_freem(info.mrep);
1445                 info.mrep = NULL;
1446                 if (vp) {
1447                     aftat_ret = VOP_GETATTR(vp, &va);
1448                     vput(vp);
1449                     vp = NULL;
1450                 }
1451
1452                 /*
1453                  * Loop around generating replies for all write rpcs that have
1454                  * now been completed.
1455                  */
1456                 swp = nfsd;
1457                 do {
1458                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1459                     if (error) {
1460                         nfsm_writereply(&info, nfsd, slp, error,
1461                                         NFSX_WCCDATA(info.v3));
1462                         if (info.v3) {
1463                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1464                                              aftat_ret, &va);
1465                         }
1466                     } else {
1467                         nfsm_writereply(&info, nfsd, slp, error,
1468                                         NFSX_PREOPATTR(info.v3) +
1469                                         NFSX_POSTOPORFATTR(info.v3) +
1470                                         2 * NFSX_UNSIGNED +
1471                                         NFSX_WRITEVERF(info.v3));
1472                         if (info.v3) {
1473                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1474                                              aftat_ret, &va);
1475                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1476                             *tl++ = txdr_unsigned(nfsd->nd_len);
1477                             *tl++ = txdr_unsigned(swp->nd_stable);
1478                             /*
1479                              * Actually, there is no need to txdr these fields,
1480                              * but it may make the values more human readable,
1481                              * for debugging purposes.
1482                              */
1483                             if (nfsver.tv_sec == 0)
1484                                     nfsver = boottime;
1485                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1486                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1487                         } else {
1488                             fp = nfsm_build(&info, NFSX_V2FATTR);
1489                             nfsm_srvfattr(nfsd, &va, fp);
1490                         }
1491                     }
1492                     nfsd->nd_mreq = info.mreq;
1493                     if (nfsd->nd_mrep)
1494                         panic("nfsrv_write: nd_mrep not free");
1495
1496                     /*
1497                      * Done. Put it at the head of the timer queue so that
1498                      * the final phase can return the reply.
1499                      */
1500                     if (nfsd != swp) {
1501                         nfsd->nd_time = 0;
1502                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1503                     }
1504                     nfsd = swp->nd_coalesce.lh_first;
1505                     if (nfsd) {
1506                         LIST_REMOVE(nfsd, nd_tq);
1507                     }
1508                 } while (nfsd);
1509                 swp->nd_time = 0;
1510                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1511                 goto loop1;
1512         }
1513
1514         /*
1515          * Search for a reply to return.
1516          */
1517         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1518                 if (nfsd->nd_mreq) {
1519                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1520                     LIST_REMOVE(nfsd, nd_tq);
1521                     break;
1522                 }
1523         }
1524         if (nfsd) {
1525                 *ndp = nfsd;
1526                 *mrq = nfsd->nd_mreq;
1527         } else {
1528                 *ndp = NULL;
1529                 *mrq = NULL;
1530         }
1531         return (0);
1532 }
1533
1534 /*
1535  * Coalesce the write request nfsd into owp. To do this we must:
1536  * - remove nfsd from the queues
1537  * - merge nfsd->nd_mrep into owp->nd_mrep
1538  * - update the nd_eoff and nd_stable for owp
1539  * - put nfsd on owp's nd_coalesce list
1540  * NB: Must be called at splsoftclock().
1541  */
1542 static void
1543 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1544 {
1545         int overlap;
1546         struct mbuf *mp1;
1547         struct nfsrv_descript *p;
1548
1549         NFS_DPF(WG, ("C%03x-%03x",
1550                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1551         LIST_REMOVE(nfsd, nd_hash);
1552         LIST_REMOVE(nfsd, nd_tq);
1553         if (owp->nd_eoff < nfsd->nd_eoff) {
1554             overlap = owp->nd_eoff - nfsd->nd_off;
1555             if (overlap < 0)
1556                 panic("nfsrv_coalesce: bad off");
1557             if (overlap > 0)
1558                 m_adj(nfsd->nd_mrep, overlap);
1559             mp1 = owp->nd_mrep;
1560             while (mp1->m_next)
1561                 mp1 = mp1->m_next;
1562             mp1->m_next = nfsd->nd_mrep;
1563             owp->nd_eoff = nfsd->nd_eoff;
1564         } else
1565             m_freem(nfsd->nd_mrep);
1566         nfsd->nd_mrep = NULL;
1567         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1568             owp->nd_stable = NFSV3WRITE_FILESYNC;
1569         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1570             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1571             owp->nd_stable = NFSV3WRITE_DATASYNC;
1572         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1573
1574         /*
1575          * If nfsd had anything else coalesced into it, transfer them
1576          * to owp, otherwise their replies will never get sent.
1577          */
1578         for (p = nfsd->nd_coalesce.lh_first; p;
1579              p = nfsd->nd_coalesce.lh_first) {
1580             LIST_REMOVE(p, nd_tq);
1581             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1582         }
1583 }
1584
1585 /*
1586  * nfs create service
1587  * now does a truncate to 0 length via. setattr if it already exists
1588  */
1589 int
1590 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1591              struct thread *td, struct mbuf **mrq)
1592 {
1593         struct sockaddr *nam = nfsd->nd_nam;
1594         struct ucred *cred = &nfsd->nd_cr;
1595         struct nfs_fattr *fp;
1596         struct vattr va, dirfor, diraft;
1597         struct vattr *vap = &va;
1598         struct nfsv2_sattr *sp;
1599         u_int32_t *tl;
1600         struct nlookupdata nd;
1601         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1602         udev_t rdev = NOUDEV;
1603         caddr_t cp;
1604         int how, exclusive_flag = 0;
1605         struct vnode *dirp;
1606         struct vnode *dvp;
1607         struct vnode *vp;
1608         struct mount *mp;
1609         nfsfh_t nfh;
1610         fhandle_t *fhp;
1611         u_quad_t tempsize;
1612         u_char cverf[NFSX_V3CREATEVERF];
1613         struct nfsm_info info;
1614
1615         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1616         nlookup_zero(&nd);
1617         dirp = NULL;
1618         dvp = NULL;
1619         vp = NULL;
1620
1621         info.mrep = nfsd->nd_mrep;
1622         info.mreq = NULL;
1623         info.md = nfsd->nd_md;
1624         info.dpos = nfsd->nd_dpos;
1625         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1626
1627         fhp = &nfh.fh_generic;
1628         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1629         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1630
1631         /*
1632          * Call namei and do initial cleanup to get a few things
1633          * out of the way.  If we get an initial error we cleanup
1634          * and return here to avoid special-casing the invalid nd
1635          * structure through the rest of the case.  dirp may be
1636          * set even if an error occurs, but the nd structure will not
1637          * be valid at all if an error occurs so we have to invalidate it
1638          * prior to calling nfsm_reply ( which might goto nfsmout ).
1639          */
1640         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1641                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1642                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1643         mp = vfs_getvfs(&fhp->fh_fsid);
1644
1645         if (dirp) {
1646                 if (info.v3) {
1647                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1648                 } else {
1649                         vrele(dirp);
1650                         dirp = NULL;
1651                 }
1652         }
1653         if (error) {
1654                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1655                                       NFSX_WCCDATA(info.v3), &error));
1656                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1657                                  diraft_ret, &diraft);
1658                 error = 0;
1659                 goto nfsmout;
1660         }
1661
1662         /*
1663          * No error.  Continue.  State:
1664          *
1665          *      dirp            may be valid
1666          *      vp              may be valid or NULL if the target does not
1667          *                      exist.
1668          *      dvp             is valid
1669          *
1670          * The error state is set through the code and we may also do some
1671          * opportunistic releasing of vnodes to avoid holding locks through
1672          * NFS I/O.  The cleanup at the end is a catch-all
1673          */
1674
1675         VATTR_NULL(vap);
1676         if (info.v3) {
1677                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1678                 how = fxdr_unsigned(int, *tl);
1679                 switch (how) {
1680                 case NFSV3CREATE_GUARDED:
1681                         if (vp) {
1682                                 error = EEXIST;
1683                                 break;
1684                         }
1685                         /* fall through */
1686                 case NFSV3CREATE_UNCHECKED:
1687                         ERROROUT(nfsm_srvsattr(&info, vap));
1688                         break;
1689                 case NFSV3CREATE_EXCLUSIVE:
1690                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1691                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1692                         exclusive_flag = 1;
1693                         break;
1694                 };
1695                 vap->va_type = VREG;
1696         } else {
1697                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1698                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1699                 if (vap->va_type == VNON)
1700                         vap->va_type = VREG;
1701                 vap->va_mode = nfstov_mode(sp->sa_mode);
1702                 switch (vap->va_type) {
1703                 case VREG:
1704                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1705                         if (tsize != -1)
1706                                 vap->va_size = (u_quad_t)tsize;
1707                         break;
1708                 case VCHR:
1709                 case VBLK:
1710                 case VFIFO:
1711                         rdev = fxdr_unsigned(long, sp->sa_size);
1712                         break;
1713                 default:
1714                         break;
1715                 };
1716         }
1717
1718         /*
1719          * Iff doesn't exist, create it
1720          * otherwise just truncate to 0 length
1721          *   should I set the mode too ?
1722          *
1723          * The only possible error we can have at this point is EEXIST. 
1724          * nd.ni_vp will also be non-NULL in that case.
1725          */
1726         if (vp == NULL) {
1727                 if (vap->va_mode == (mode_t)VNOVAL)
1728                         vap->va_mode = 0;
1729                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1730                         vn_unlock(dvp);
1731                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1732                                             nd.nl_cred, vap);
1733                         vrele(dvp);
1734                         dvp = NULL;
1735                         if (error == 0) {
1736                                 if (exclusive_flag) {
1737                                         exclusive_flag = 0;
1738                                         VATTR_NULL(vap);
1739                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1740                                                 NFSX_V3CREATEVERF);
1741                                         error = VOP_SETATTR(vp, vap, cred);
1742                                 }
1743                         }
1744                 } else if (
1745                         vap->va_type == VCHR || 
1746                         vap->va_type == VBLK ||
1747                         vap->va_type == VFIFO
1748                 ) {
1749                         /*
1750                          * Handle SysV FIFO node special cases.  All other
1751                          * devices require super user to access.
1752                          */
1753                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1754                                 vap->va_type = VFIFO;
1755                         if (vap->va_type != VFIFO &&
1756                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1757                                 goto nfsmreply0;
1758                         }
1759                         vap->va_rmajor = umajor(rdev);
1760                         vap->va_rminor = uminor(rdev);
1761
1762                         vn_unlock(dvp);
1763                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1764                         vrele(dvp);
1765                         dvp = NULL;
1766                         if (error)
1767                                 goto nfsmreply0;
1768 #if 0
1769                         /*
1770                          * XXX what is this junk supposed to do ?
1771                          */
1772
1773                         vput(vp);
1774                         vp = NULL;
1775
1776                         /*
1777                          * release dvp prior to lookup
1778                          */
1779                         vput(dvp);
1780                         dvp = NULL;
1781
1782                         /*
1783                          * Setup for lookup. 
1784                          *
1785                          * Even though LOCKPARENT was cleared, ni_dvp may
1786                          * be garbage. 
1787                          */
1788                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1789                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1790                         nd.ni_cnd.cn_td = td;
1791                         nd.ni_cnd.cn_cred = cred;
1792
1793                         error = lookup(&nd);
1794                         nd.ni_dvp = NULL;
1795
1796                         if (error != 0) {
1797                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1798                                                       0, &error));
1799                                 /* fall through on certain errors */
1800                         }
1801                         nfsrv_object_create(nd.ni_vp);
1802                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1803                                 error = EINVAL;
1804                                 goto nfsmreply0;
1805                         }
1806 #endif
1807                 } else {
1808                         error = ENXIO;
1809                 }
1810         } else {
1811                 if (vap->va_size != -1) {
1812                         error = nfsrv_access(mp, vp, VWRITE, cred,
1813                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1814                         if (!error) {
1815                                 tempsize = vap->va_size;
1816                                 VATTR_NULL(vap);
1817                                 vap->va_size = tempsize;
1818                                 error = VOP_SETATTR(vp, vap, cred);
1819                         }
1820                 }
1821         }
1822
1823         if (!error) {
1824                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1825                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1826                 if (!error)
1827                         error = VOP_GETATTR(vp, vap);
1828         }
1829         if (info.v3) {
1830                 if (exclusive_flag && !error &&
1831                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1832                         error = EEXIST;
1833                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1834                 vrele(dirp);
1835                 dirp = NULL;
1836         }
1837         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1838                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1839                               NFSX_WCCDATA(info.v3),
1840                               &error));
1841         if (info.v3) {
1842                 if (!error) {
1843                         nfsm_srvpostop_fh(&info, fhp);
1844                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1845                 }
1846                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1847                                  diraft_ret, &diraft);
1848                 error = 0;
1849         } else {
1850                 nfsm_srvfhtom(&info, fhp);
1851                 fp = nfsm_build(&info, NFSX_V2FATTR);
1852                 nfsm_srvfattr(nfsd, vap, fp);
1853         }
1854         goto nfsmout;
1855
1856 nfsmreply0:
1857         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1858         error = 0;
1859         /* fall through */
1860
1861 nfsmout:
1862         *mrq = info.mreq;
1863         if (dirp)
1864                 vrele(dirp);
1865         nlookup_done(&nd);
1866         if (dvp) {
1867                 if (dvp == vp)
1868                         vrele(dvp);
1869                 else
1870                         vput(dvp);
1871         }
1872         if (vp)
1873                 vput(vp);
1874         return (error);
1875 }
1876
1877 /*
1878  * nfs v3 mknod service
1879  */
1880 int
1881 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1882             struct thread *td, struct mbuf **mrq)
1883 {
1884         struct sockaddr *nam = nfsd->nd_nam;
1885         struct ucred *cred = &nfsd->nd_cr;
1886         struct vattr va, dirfor, diraft;
1887         struct vattr *vap = &va;
1888         u_int32_t *tl;
1889         struct nlookupdata nd;
1890         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1891         enum vtype vtyp;
1892         struct vnode *dirp;
1893         struct vnode *dvp;
1894         struct vnode *vp;
1895         nfsfh_t nfh;
1896         fhandle_t *fhp;
1897         struct nfsm_info info;
1898
1899         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1900         nlookup_zero(&nd);
1901         dirp = NULL;
1902         dvp = NULL;
1903         vp = NULL;
1904
1905         info.mrep = nfsd->nd_mrep;
1906         info.mreq = NULL;
1907         info.md = nfsd->nd_md;
1908         info.dpos = nfsd->nd_dpos;
1909
1910         fhp = &nfh.fh_generic;
1911         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1912         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1913
1914         /*
1915          * Handle nfs_namei() call.  If an error occurs, the nd structure
1916          * is not valid.  However, nfsm_*() routines may still jump to
1917          * nfsmout.
1918          */
1919
1920         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1921                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1922                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1923         if (dirp)
1924                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1925         if (error) {
1926                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1927                            NFSX_WCCDATA(1), &error));
1928                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1929                                  diraft_ret, &diraft);
1930                 error = 0;
1931                 goto nfsmout;
1932         }
1933         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1934         vtyp = nfsv3tov_type(*tl);
1935         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1936                 error = NFSERR_BADTYPE;
1937                 goto out;
1938         }
1939         VATTR_NULL(vap);
1940         ERROROUT(nfsm_srvsattr(&info, vap));
1941         if (vtyp == VCHR || vtyp == VBLK) {
1942                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1943                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1944                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1945         }
1946
1947         /*
1948          * Iff doesn't exist, create it.
1949          */
1950         if (vp) {
1951                 error = EEXIST;
1952                 goto out;
1953         }
1954         vap->va_type = vtyp;
1955         if (vap->va_mode == (mode_t)VNOVAL)
1956                 vap->va_mode = 0;
1957         if (vtyp == VSOCK) {
1958                 vn_unlock(dvp);
1959                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1960                 vrele(dvp);
1961                 dvp = NULL;
1962         } else {
1963                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1964                         goto out;
1965
1966                 vn_unlock(dvp);
1967                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1968                 vrele(dvp);
1969                 dvp = NULL;
1970                 if (error)
1971                         goto out;
1972         }
1973
1974         /*
1975          * send response, cleanup, return.
1976          */
1977 out:
1978         nlookup_done(&nd);
1979         if (dvp) {
1980                 if (dvp == vp)
1981                         vrele(dvp);
1982                 else
1983                         vput(dvp);
1984                 dvp = NULL;
1985         }
1986         if (!error) {
1987                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1988                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1989                 if (!error)
1990                         error = VOP_GETATTR(vp, vap);
1991         }
1992         if (vp) {
1993                 vput(vp);
1994                 vp = NULL;
1995         }
1996         diraft_ret = VOP_GETATTR(dirp, &diraft);
1997         if (dirp) {
1998                 vrele(dirp);
1999                 dirp = NULL;
2000         }
2001         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2002                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
2003                               NFSX_WCCDATA(1), &error));
2004         if (!error) {
2005                 nfsm_srvpostop_fh(&info, fhp);
2006                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2007         }
2008         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2009                          diraft_ret, &diraft);
2010         *mrq = info.mreq;
2011         return (0);
2012 nfsmout:
2013         *mrq = info.mreq;
2014         if (dirp)
2015                 vrele(dirp);
2016         nlookup_done(&nd);
2017         if (dvp) {
2018                 if (dvp == vp)
2019                         vrele(dvp);
2020                 else
2021                         vput(dvp);
2022         }
2023         if (vp)
2024                 vput(vp);
2025         return (error);
2026 }
2027
2028 /*
2029  * nfs remove service
2030  */
2031 int
2032 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2033              struct thread *td, struct mbuf **mrq)
2034 {
2035         struct sockaddr *nam = nfsd->nd_nam;
2036         struct ucred *cred = &nfsd->nd_cr;
2037         struct nlookupdata nd;
2038         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2039         struct vnode *dirp;
2040         struct vnode *dvp;
2041         struct vnode *vp;
2042         struct vattr dirfor, diraft;
2043         nfsfh_t nfh;
2044         fhandle_t *fhp;
2045         struct nfsm_info info;
2046
2047         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2048         nlookup_zero(&nd);
2049         dirp = NULL;
2050         dvp = NULL;
2051         vp = NULL;
2052
2053         info.mrep = nfsd->nd_mrep;
2054         info.mreq = NULL;
2055         info.md = nfsd->nd_md;
2056         info.dpos = nfsd->nd_dpos;
2057         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2058
2059         fhp = &nfh.fh_generic;
2060         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2061         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2062
2063         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2064                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2065                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2066         if (dirp) {
2067                 if (info.v3)
2068                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2069         }
2070         if (error == 0) {
2071                 if (vp->v_type == VDIR) {
2072                         error = EPERM;          /* POSIX */
2073                         goto out;
2074                 }
2075                 /*
2076                  * The root of a mounted filesystem cannot be deleted.
2077                  */
2078                 if (vp->v_flag & VROOT) {
2079                         error = EBUSY;
2080                         goto out;
2081                 }
2082 out:
2083                 if (!error) {
2084                         if (dvp != vp)
2085                                 vn_unlock(dvp);
2086                         if (vp) {
2087                                 vput(vp);
2088                                 vp = NULL;
2089                         }
2090                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2091                         vrele(dvp);
2092                         dvp = NULL;
2093                 }
2094         }
2095         if (dirp && info.v3)
2096                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2097         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2098         if (info.v3) {
2099                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2100                                  diraft_ret, &diraft);
2101                 error = 0;
2102         }
2103 nfsmout:
2104         *mrq = info.mreq;
2105         nlookup_done(&nd);
2106         if (dirp)
2107                 vrele(dirp);
2108         if (dvp) {
2109                 if (dvp == vp)
2110                         vrele(dvp);
2111                 else
2112                         vput(dvp);
2113         }
2114         if (vp)
2115                 vput(vp);
2116         return(error);
2117 }
2118
2119 /*
2120  * nfs rename service
2121  */
2122 int
2123 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2124              struct thread *td, struct mbuf **mrq)
2125 {
2126         struct sockaddr *nam = nfsd->nd_nam;
2127         struct ucred *cred = &nfsd->nd_cr;
2128         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2129         int tdirfor_ret = 1, tdiraft_ret = 1;
2130         struct nlookupdata fromnd, tond;
2131         struct vnode *fvp, *fdirp, *fdvp;
2132         struct vnode *tvp, *tdirp, *tdvp;
2133         struct namecache *ncp;
2134         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2135         nfsfh_t fnfh, tnfh;
2136         fhandle_t *ffhp, *tfhp;
2137         uid_t saved_uid;
2138         struct nfsm_info info;
2139
2140         info.mrep = nfsd->nd_mrep;
2141         info.mreq = NULL;
2142         info.md = nfsd->nd_md;
2143         info.dpos = nfsd->nd_dpos;
2144         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2145
2146         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2147 #ifndef nolint
2148         fvp = NULL;
2149 #endif
2150         ffhp = &fnfh.fh_generic;
2151         tfhp = &tnfh.fh_generic;
2152
2153         /*
2154          * Clear fields incase goto nfsmout occurs from macro.
2155          */
2156
2157         nlookup_zero(&fromnd);
2158         nlookup_zero(&tond);
2159         fdirp = NULL;
2160         tdirp = NULL;
2161
2162         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2163         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2164
2165         /*
2166          * Remember our original uid so that we can reset cr_uid before
2167          * the second nfs_namei() call, in case it is remapped.
2168          */
2169         saved_uid = cred->cr_uid;
2170         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2171                           NULL, NULL,
2172                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2173                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2174         if (fdirp) {
2175                 if (info.v3)
2176                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2177         }
2178         if (error) {
2179                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2180                                       2 * NFSX_WCCDATA(info.v3), &error));
2181                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2182                                  fdiraft_ret, &fdiraft);
2183                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2184                                  tdiraft_ret, &tdiraft);
2185                 error = 0;
2186                 goto nfsmout;
2187         }
2188
2189         /*
2190          * We have to unlock the from ncp before we can safely lookup
2191          * the target ncp.
2192          */
2193         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2194         cache_unlock(&fromnd.nl_nch);
2195         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2196         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2197         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2198         cred->cr_uid = saved_uid;
2199
2200         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2201                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2202                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2203         if (tdirp) {
2204                 if (info.v3)
2205                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2206         }
2207         if (error)
2208                 goto out1;
2209
2210         /*
2211          * relock the source
2212          */
2213         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2214                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2215         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2216                 cache_lock(&fromnd.nl_nch);
2217                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2218         } else {
2219                 cache_unlock(&tond.nl_nch);
2220                 cache_lock(&fromnd.nl_nch);
2221                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2222                 cache_lock(&tond.nl_nch);
2223                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2224         }
2225         fromnd.nl_flags |= NLC_NCPISLOCKED;
2226
2227         fvp = fromnd.nl_nch.ncp->nc_vp;
2228         tvp = tond.nl_nch.ncp->nc_vp;
2229
2230         /*
2231          * Set fdvp and tdvp.  We haven't done all the topology checks
2232          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2233          * point).  If we get through the checks these will be guarenteed
2234          * to be non-NULL.
2235          *
2236          * Holding the children ncp's should be sufficient to prevent
2237          * fdvp and tdvp ripouts.
2238          */
2239         if (fromnd.nl_nch.ncp->nc_parent)
2240                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2241         else
2242                 fdvp = NULL;
2243         if (tond.nl_nch.ncp->nc_parent)
2244                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2245         else
2246                 tdvp = NULL;
2247
2248         if (tvp != NULL) {
2249                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2250                         if (info.v3)
2251                                 error = EEXIST;
2252                         else
2253                                 error = EISDIR;
2254                         goto out;
2255                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2256                         if (info.v3)
2257                                 error = EEXIST;
2258                         else
2259                                 error = ENOTDIR;
2260                         goto out;
2261                 }
2262                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2263                         if (info.v3)
2264                                 error = EXDEV;
2265                         else
2266                                 error = ENOTEMPTY;
2267                         goto out;
2268                 }
2269         }
2270         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2271                 if (info.v3)
2272                         error = EXDEV;
2273                 else
2274                         error = ENOTEMPTY;
2275                 goto out;
2276         }
2277         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2278                 if (info.v3)
2279                         error = EXDEV;
2280                 else
2281                         error = ENOTEMPTY;
2282                 goto out;
2283         }
2284         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2285                 if (info.v3)
2286                         error = EINVAL;
2287                 else
2288                         error = ENOTEMPTY;
2289         }
2290
2291         /*
2292          * You cannot rename a source into itself or a subdirectory of itself.
2293          * We check this by travsering the target directory upwards looking
2294          * for a match against the source.
2295          */
2296         if (error == 0) {
2297                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2298                         if (fromnd.nl_nch.ncp == ncp) {
2299                                 error = EINVAL;
2300                                 break;
2301                         }
2302                 }
2303         }
2304
2305         /*
2306          * If source is the same as the destination (that is the
2307          * same vnode with the same name in the same directory),
2308          * then there is nothing to do.
2309          */
2310         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2311                 error = -1;
2312 out:
2313         if (!error) {
2314                 /*
2315                  * The VOP_NRENAME function releases all vnode references &
2316                  * locks prior to returning so we need to clear the pointers
2317                  * to bypass cleanup code later on.
2318                  */
2319                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2320                                     fdvp, tdvp, tond.nl_cred);
2321         } else {
2322                 if (error == -1)
2323                         error = 0;
2324         }
2325         /* fall through */
2326
2327 out1:
2328         if (fdirp)
2329                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2330         if (tdirp)
2331                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2332         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2333                               2 * NFSX_WCCDATA(info.v3), &error));
2334         if (info.v3) {
2335                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2336                                  fdiraft_ret, &fdiraft);
2337                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2338                                  tdiraft_ret, &tdiraft);
2339         }
2340         error = 0;
2341         /* fall through */
2342
2343 nfsmout:
2344         *mrq = info.mreq;
2345         if (tdirp)
2346                 vrele(tdirp);
2347         nlookup_done(&tond);
2348         if (fdirp)
2349                 vrele(fdirp);
2350         nlookup_done(&fromnd);
2351         return (error);
2352 }
2353
2354 /*
2355  * nfs link service
2356  */
2357 int
2358 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2359            struct thread *td, struct mbuf **mrq)
2360 {
2361         struct sockaddr *nam = nfsd->nd_nam;
2362         struct ucred *cred = &nfsd->nd_cr;
2363         struct nlookupdata nd;
2364         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2365         int getret = 1;
2366         struct vnode *dirp;
2367         struct vnode *dvp;
2368         struct vnode *vp;
2369         struct vnode *xp;
2370         struct mount *mp;
2371         struct mount *xmp;
2372         struct vattr dirfor, diraft, at;
2373         nfsfh_t nfh, dnfh;
2374         fhandle_t *fhp, *dfhp;
2375         struct nfsm_info info;
2376
2377         info.mrep = nfsd->nd_mrep;
2378         info.mreq = NULL;
2379         info.md = nfsd->nd_md;
2380         info.dpos = nfsd->nd_dpos;
2381         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2382
2383         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2384         nlookup_zero(&nd);
2385         dirp = dvp = vp = xp = NULL;
2386         mp = xmp = NULL;
2387
2388         fhp = &nfh.fh_generic;
2389         dfhp = &dnfh.fh_generic;
2390         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2391         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2392         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2393
2394         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2395                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2396         if (error) {
2397                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2398                                       NFSX_POSTOPATTR(info.v3) +
2399                                       NFSX_WCCDATA(info.v3),
2400                                       &error));
2401                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2402                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2403                                  diraft_ret, &diraft);
2404                 xp = NULL;
2405                 error = 0;
2406                 goto nfsmout;
2407         }
2408         if (xp->v_type == VDIR) {
2409                 error = EPERM;          /* POSIX */
2410                 goto out1;
2411         }
2412
2413         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2414                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2415                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2416         if (dirp) {
2417                 if (info.v3)
2418                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2419         }
2420         if (error)
2421                 goto out1;
2422
2423         if (vp != NULL) {
2424                 error = EEXIST;
2425                 goto out;
2426         }
2427         if (xp->v_mount != dvp->v_mount)
2428                 error = EXDEV;
2429 out:
2430         if (!error) {
2431                 vn_unlock(dvp);
2432                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2433                 vrele(dvp);
2434                 dvp = NULL;
2435         }
2436         /* fall through */
2437
2438 out1:
2439         if (info.v3)
2440                 getret = VOP_GETATTR(xp, &at);
2441         if (dirp)
2442                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2443         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2444                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2445                               &error));
2446         if (info.v3) {
2447                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2448                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2449                                  diraft_ret, &diraft);
2450                 error = 0;
2451         }
2452         /* fall through */
2453
2454 nfsmout:
2455         *mrq = info.mreq;
2456         nlookup_done(&nd);
2457         if (dirp)
2458                 vrele(dirp);
2459         if (xp)
2460                 vrele(xp);
2461         if (dvp) {
2462                 if (dvp == vp)
2463                         vrele(dvp);
2464                 else
2465                         vput(dvp);
2466         }
2467         if (vp)
2468                 vput(vp);
2469         return(error);
2470 }
2471
2472 /*
2473  * nfs symbolic link service
2474  */
2475 int
2476 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2477               struct thread *td, struct mbuf **mrq)
2478 {
2479         struct sockaddr *nam = nfsd->nd_nam;
2480         struct ucred *cred = &nfsd->nd_cr;
2481         struct vattr va, dirfor, diraft;
2482         struct nlookupdata nd;
2483         struct vattr *vap = &va;
2484         struct nfsv2_sattr *sp;
2485         char *pathcp = NULL;
2486         struct uio io;
2487         struct iovec iv;
2488         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2489         struct vnode *dirp;
2490         struct vnode *vp;
2491         struct vnode *dvp;
2492         nfsfh_t nfh;
2493         fhandle_t *fhp;
2494         struct nfsm_info info;
2495
2496         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2497         nlookup_zero(&nd);
2498         dirp = NULL;
2499         dvp = NULL;
2500         vp = NULL;
2501
2502         info.mrep = nfsd->nd_mrep;
2503         info.mreq =  NULL;
2504         info.md = nfsd->nd_md;
2505         info.dpos = nfsd->nd_dpos;
2506         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2507
2508         fhp = &nfh.fh_generic;
2509         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2510         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2511
2512         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2513                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2514                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2515         if (dirp) {
2516                 if (info.v3)
2517                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2518         }
2519         if (error)
2520                 goto out;
2521
2522         VATTR_NULL(vap);
2523         if (info.v3) {
2524                 ERROROUT(nfsm_srvsattr(&info, vap));
2525         }
2526         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2527         pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2528         iv.iov_base = pathcp;
2529         iv.iov_len = len2;
2530         io.uio_resid = len2;
2531         io.uio_offset = 0;
2532         io.uio_iov = &iv;
2533         io.uio_iovcnt = 1;
2534         io.uio_segflg = UIO_SYSSPACE;
2535         io.uio_rw = UIO_READ;
2536         io.uio_td = NULL;
2537         ERROROUT(nfsm_mtouio(&info, &io, len2));
2538         if (info.v3 == 0) {
2539                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2540                 vap->va_mode = nfstov_mode(sp->sa_mode);
2541         }
2542         *(pathcp + len2) = '\0';
2543         if (vp) {
2544                 error = EEXIST;
2545                 goto out;
2546         }
2547
2548         if (vap->va_mode == (mode_t)VNOVAL)
2549                 vap->va_mode = 0;
2550         if (dvp != vp)
2551                 vn_unlock(dvp);
2552         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2553         vrele(dvp);
2554         dvp = NULL;
2555         if (error == 0) {
2556                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2557                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2558                 if (!error)
2559                         error = VOP_GETATTR(vp, vap);
2560         }
2561
2562 out:
2563         if (dvp) {
2564                 if (dvp == vp)
2565                         vrele(dvp);
2566                 else
2567                         vput(dvp);
2568         }
2569         if (vp) {
2570                 vput(vp);
2571                 vp = NULL;
2572         }
2573         if (pathcp) {
2574                 kfree(pathcp, M_TEMP);
2575                 pathcp = NULL;
2576         }
2577         if (dirp) {
2578                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2579                 vrele(dirp);
2580                 dirp = NULL;
2581         }
2582         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2583                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2584                               NFSX_WCCDATA(info.v3),
2585                               &error));
2586         if (info.v3) {
2587                 if (!error) {
2588                         nfsm_srvpostop_fh(&info, fhp);
2589                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2590                 }
2591                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2592                                  diraft_ret, &diraft);
2593         }
2594         error = 0;
2595         /* fall through */
2596
2597 nfsmout:
2598         *mrq = info.mreq;
2599         nlookup_done(&nd);
2600         if (vp)
2601                 vput(vp);
2602         if (dirp)
2603                 vrele(dirp);
2604         if (pathcp)
2605                 kfree(pathcp, M_TEMP);
2606         return (error);
2607 }
2608
2609 /*
2610  * nfs mkdir service
2611  */
2612 int
2613 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2614             struct thread *td, struct mbuf **mrq)
2615 {
2616         struct sockaddr *nam = nfsd->nd_nam;
2617         struct ucred *cred = &nfsd->nd_cr;
2618         struct vattr va, dirfor, diraft;
2619         struct vattr *vap = &va;
2620         struct nfs_fattr *fp;
2621         struct nlookupdata nd;
2622         u_int32_t *tl;
2623         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2624         struct vnode *dirp;
2625         struct vnode *dvp;
2626         struct vnode *vp;
2627         nfsfh_t nfh;
2628         fhandle_t *fhp;
2629         struct nfsm_info info;
2630
2631         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2632         nlookup_zero(&nd);
2633         dirp = NULL;
2634         dvp = NULL;
2635         vp = NULL;
2636
2637         info.dpos = nfsd->nd_dpos;
2638         info.mrep = nfsd->nd_mrep;
2639         info.mreq =  NULL;
2640         info.md = nfsd->nd_md;
2641         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2642
2643         fhp = &nfh.fh_generic;
2644         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2645         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2646
2647         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2648                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2649                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2650         if (dirp) {
2651                 if (info.v3)
2652                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2653         }
2654         if (error) {
2655                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2656                                       NFSX_WCCDATA(info.v3), &error));
2657                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2658                                  diraft_ret, &diraft);
2659                 error = 0;
2660                 goto nfsmout;
2661         }
2662         VATTR_NULL(vap);
2663         if (info.v3) {
2664                 ERROROUT(nfsm_srvsattr(&info, vap));
2665         } else {
2666                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2667                 vap->va_mode = nfstov_mode(*tl++);
2668         }
2669
2670         /*
2671          * At this point nd.ni_dvp is referenced and exclusively locked and
2672          * nd.ni_vp, if it exists, is referenced but not locked.
2673          */
2674
2675         vap->va_type = VDIR;
2676         if (vp != NULL) {
2677                 error = EEXIST;
2678                 goto out;
2679         }
2680
2681         /*
2682          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2683          * component is freed by the VOP call.  This will fill-in
2684          * nd.ni_vp, reference, and exclusively lock it.
2685          */
2686         if (vap->va_mode == (mode_t)VNOVAL)
2687                 vap->va_mode = 0;
2688         vn_unlock(dvp);
2689         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2690         vrele(dvp);
2691         dvp = NULL;
2692
2693         if (error == 0) {
2694                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2695                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2696                 if (error == 0)
2697                         error = VOP_GETATTR(vp, vap);
2698         }
2699 out:
2700         if (dirp)
2701                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2702         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2703                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2704                               NFSX_WCCDATA(info.v3),
2705                               &error));
2706         if (info.v3) {
2707                 if (!error) {
2708                         nfsm_srvpostop_fh(&info, fhp);
2709                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2710                 }
2711                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2712                                  diraft_ret, &diraft);
2713         } else {
2714                 nfsm_srvfhtom(&info, fhp);
2715                 fp = nfsm_build(&info, NFSX_V2FATTR);
2716                 nfsm_srvfattr(nfsd, vap, fp);
2717         }
2718         error = 0;
2719         /* fall through */
2720
2721 nfsmout:
2722         *mrq = info.mreq;
2723         nlookup_done(&nd);
2724         if (dirp)
2725                 vrele(dirp);
2726         if (dvp) {
2727                 if (dvp == vp)
2728                         vrele(dvp);
2729                 else
2730                         vput(dvp);
2731         }
2732         if (vp)
2733                 vput(vp);
2734         return (error);
2735 }
2736
2737 /*
2738  * nfs rmdir service
2739  */
2740 int
2741 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2742             struct thread *td, struct mbuf **mrq)
2743 {
2744         struct sockaddr *nam = nfsd->nd_nam;
2745         struct ucred *cred = &nfsd->nd_cr;
2746         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2747         struct vnode *dirp;
2748         struct vnode *dvp;
2749         struct vnode *vp;
2750         struct vattr dirfor, diraft;
2751         nfsfh_t nfh;
2752         fhandle_t *fhp;
2753         struct nlookupdata nd;
2754         struct nfsm_info info;
2755
2756         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2757         nlookup_zero(&nd);
2758         dirp = NULL;
2759         dvp = NULL;
2760         vp = NULL;
2761
2762         info.mrep = nfsd->nd_mrep;
2763         info.mreq = NULL;
2764         info.md = nfsd->nd_md;
2765         info.dpos = nfsd->nd_dpos;
2766         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2767
2768         fhp = &nfh.fh_generic;
2769         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2770         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2771
2772         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2773                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2774                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2775         if (dirp) {
2776                 if (info.v3)
2777                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2778         }
2779         if (error) {
2780                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2781                                       NFSX_WCCDATA(info.v3), &error));
2782                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2783                                  diraft_ret, &diraft);
2784                 error = 0;
2785                 goto nfsmout;
2786         }
2787         if (vp->v_type != VDIR) {
2788                 error = ENOTDIR;
2789                 goto out;
2790         }
2791
2792         /*
2793          * The root of a mounted filesystem cannot be deleted.
2794          */
2795         if (vp->v_flag & VROOT)
2796                 error = EBUSY;
2797 out:
2798         /*
2799          * Issue or abort op.  Since SAVESTART is not set, path name
2800          * component is freed by the VOP after either.
2801          */
2802         if (!error) {
2803                 if (dvp != vp)
2804                         vn_unlock(dvp);
2805                 vput(vp);
2806                 vp = NULL;
2807                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2808                 vrele(dvp);
2809                 dvp = NULL;
2810         }
2811         nlookup_done(&nd);
2812
2813         if (dirp)
2814                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2815         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2816         if (info.v3) {
2817                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2818                                  diraft_ret, &diraft);
2819                 error = 0;
2820         }
2821         /* fall through */
2822
2823 nfsmout:
2824         *mrq = info.mreq;
2825         if (dvp) {
2826                 if (dvp == vp)
2827                         vrele(dvp);
2828                 else
2829                         vput(dvp);
2830         }
2831         nlookup_done(&nd);
2832         if (dirp)
2833                 vrele(dirp);
2834         if (vp)
2835                 vput(vp);
2836         return(error);
2837 }
2838
2839 /*
2840  * nfs readdir service
2841  * - mallocs what it thinks is enough to read
2842  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2843  * - calls VOP_READDIR()
2844  * - loops around building the reply
2845  *      if the output generated exceeds count break out of loop
2846  *      The nfsm_clget macro is used here so that the reply will be packed
2847  *      tightly in mbuf clusters.
2848  * - it only knows that it has encountered eof when the VOP_READDIR()
2849  *      reads nothing
2850  * - as such one readdir rpc will return eof false although you are there
2851  *      and then the next will return eof
2852  * - it trims out records with d_fileno == 0
2853  *      this doesn't matter for Unix clients, but they might confuse clients
2854  *      for other os'.
2855  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2856  *      than requested, but this may not apply to all filesystems. For
2857  *      example, client NFS does not { although it is never remote mounted
2858  *      anyhow }
2859  *     The alternate call nfsrv_readdirplus() does lookups as well.
2860  * PS: The NFS protocol spec. does not clarify what the "count" byte
2861  *      argument is a count of.. just name strings and file id's or the
2862  *      entire reply rpc or ...
2863  *      I tried just file name and id sizes and it confused the Sun client,
2864  *      so I am using the full rpc size now. The "paranoia.." comment refers
2865  *      to including the status longwords that are not a part of the dir.
2866  *      "entry" structures, but are in the rpc.
2867  */
2868 struct flrep {
2869         nfsuint64       fl_off;
2870         u_int32_t       fl_postopok;
2871         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2872         u_int32_t       fl_fhok;
2873         u_int32_t       fl_fhsize;
2874         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2875 };
2876
2877 int
2878 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2879               struct thread *td, struct mbuf **mrq)
2880 {
2881         struct sockaddr *nam = nfsd->nd_nam;
2882         struct ucred *cred = &nfsd->nd_cr;
2883         char *bp, *be;
2884         struct dirent *dp;
2885         caddr_t cp;
2886         u_int32_t *tl;
2887         struct mbuf *mp1, *mp2;
2888         char *cpos, *cend, *rbuf;
2889         struct vnode *vp = NULL;
2890         struct mount *mp = NULL;
2891         struct vattr at;
2892         nfsfh_t nfh;
2893         fhandle_t *fhp;
2894         struct uio io;
2895         struct iovec iv;
2896         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2897         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2898         u_quad_t off, toff, verf;
2899         off_t *cookies = NULL, *cookiep;
2900         struct nfsm_info info;
2901
2902         info.mrep = nfsd->nd_mrep;
2903         info.mreq = NULL;
2904         info.md = nfsd->nd_md;
2905         info.dpos = nfsd->nd_dpos;
2906         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2907
2908         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2909         fhp = &nfh.fh_generic;
2910         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2911         if (info.v3) {
2912                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2913                 toff = fxdr_hyper(tl);
2914                 tl += 2;
2915                 verf = fxdr_hyper(tl);
2916                 tl += 2;
2917         } else {
2918                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2919                 toff = fxdr_unsigned(u_quad_t, *tl++);
2920                 verf = 0;       /* shut up gcc */
2921         }
2922         off = toff;
2923         cnt = fxdr_unsigned(int, *tl);
2924         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2925         xfer = NFS_SRVMAXDATA(nfsd);
2926         if ((unsigned)cnt > xfer)
2927                 cnt = xfer;
2928         if ((unsigned)siz > xfer)
2929                 siz = xfer;
2930         fullsiz = siz;
2931         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2932                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2933         if (!error && vp->v_type != VDIR) {
2934                 error = ENOTDIR;
2935                 vput(vp);
2936                 vp = NULL;
2937         }
2938         if (error) {
2939                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2940                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2941                 error = 0;
2942                 goto nfsmout;
2943         }
2944
2945         /*
2946          * Obtain lock on vnode for this section of the code
2947          */
2948
2949         if (info.v3) {
2950                 error = getret = VOP_GETATTR(vp, &at);
2951 #if 0
2952                 /*
2953                  * XXX This check may be too strict for Solaris 2.5 clients.
2954                  */
2955                 if (!error && toff && verf && verf != at.va_filerev)
2956                         error = NFSERR_BAD_COOKIE;
2957 #endif
2958         }
2959         if (!error)
2960                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2961         if (error) {
2962                 vput(vp);
2963                 vp = NULL;
2964                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2965                                       NFSX_POSTOPATTR(info.v3), &error));
2966                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2967                 error = 0;
2968                 goto nfsmout;
2969         }
2970         vn_unlock(vp);
2971
2972         /*
2973          * end section.  Allocate rbuf and continue
2974          */
2975         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2976 again:
2977         iv.iov_base = rbuf;
2978         iv.iov_len = fullsiz;
2979         io.uio_iov = &iv;
2980         io.uio_iovcnt = 1;
2981         io.uio_offset = (off_t)off;
2982         io.uio_resid = fullsiz;
2983         io.uio_segflg = UIO_SYSSPACE;
2984         io.uio_rw = UIO_READ;
2985         io.uio_td = NULL;
2986         eofflag = 0;
2987         if (cookies) {
2988                 kfree((caddr_t)cookies, M_TEMP);
2989                 cookies = NULL;
2990         }
2991         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2992         off = (off_t)io.uio_offset;
2993         if (!cookies && !error)
2994                 error = NFSERR_PERM;
2995         if (info.v3) {
2996                 getret = VOP_GETATTR(vp, &at);
2997                 if (!error)
2998                         error = getret;
2999         }
3000         if (error) {
3001                 vrele(vp);
3002                 vp = NULL;
3003                 kfree((caddr_t)rbuf, M_TEMP);
3004                 if (cookies)
3005                         kfree((caddr_t)cookies, M_TEMP);
3006                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3007                                       NFSX_POSTOPATTR(info.v3), &error));
3008                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3009                 error = 0;
3010                 goto nfsmout;
3011         }
3012         if (io.uio_resid) {
3013                 siz -= io.uio_resid;
3014
3015                 /*
3016                  * If nothing read, return eof
3017                  * rpc reply
3018                  */
3019                 if (siz == 0) {
3020                         vrele(vp);
3021                         vp = NULL;
3022                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3023                                               NFSX_POSTOPATTR(info.v3) +
3024                                               NFSX_COOKIEVERF(info.v3) +
3025                                               2 * NFSX_UNSIGNED,
3026                                               &error));
3027                         if (info.v3) {
3028                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3029                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3030                                 txdr_hyper(at.va_filerev, tl);
3031                                 tl += 2;
3032                         } else
3033                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3034                         *tl++ = nfs_false;
3035                         *tl = nfs_true;
3036                         kfree((caddr_t)rbuf, M_TEMP);
3037                         kfree((caddr_t)cookies, M_TEMP);
3038                         error = 0;
3039                         goto nfsmout;
3040                 }
3041         }
3042
3043         /*
3044          * Check for degenerate cases of nothing useful read.
3045          * If so go try again
3046          */
3047         cpos = rbuf;
3048         cend = rbuf + siz;
3049         dp = (struct dirent *)cpos;
3050         cookiep = cookies;
3051         /*
3052          * For some reason FreeBSD's ufs_readdir() chooses to back the
3053          * directory offset up to a block boundary, so it is necessary to
3054          * skip over the records that preceed the requested offset. This
3055          * requires the assumption that file offset cookies monotonically
3056          * increase.
3057          */
3058         while (cpos < cend && ncookies > 0 &&
3059                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3060                  ((u_quad_t)(*cookiep)) <= toff)) {
3061                 dp = _DIRENT_NEXT(dp);
3062                 cpos = (char *)dp;
3063                 cookiep++;
3064                 ncookies--;
3065         }
3066         if (cpos >= cend || ncookies == 0) {
3067                 toff = off;
3068                 siz = fullsiz;
3069                 goto again;
3070         }
3071
3072         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3073         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3074                               NFSX_POSTOPATTR(info.v3) +
3075                               NFSX_COOKIEVERF(info.v3) + siz,
3076                               &error));
3077         if (info.v3) {
3078                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3079                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3080                 txdr_hyper(at.va_filerev, tl);
3081         }
3082         mp1 = mp2 = info.mb;
3083         bp = info.bpos;
3084         be = bp + M_TRAILINGSPACE(mp1);
3085
3086         /* Loop through the records and build reply */
3087         while (cpos < cend && ncookies > 0) {
3088                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3089                         nlen = dp->d_namlen;
3090                         rem = nfsm_rndup(nlen) - nlen;
3091                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3092                         if (info.v3)
3093                                 len += 2 * NFSX_UNSIGNED;
3094                         if (len > cnt) {
3095                                 eofflag = 0;
3096                                 break;
3097                         }
3098                         /*
3099                          * Build the directory record xdr from
3100                          * the dirent entry.
3101                          */
3102                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3103                         *tl = nfs_true;
3104                         bp += NFSX_UNSIGNED;
3105                         if (info.v3) {
3106                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3107                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3108                                 bp += NFSX_UNSIGNED;
3109                         }
3110                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3111                         *tl = txdr_unsigned(dp->d_ino);
3112                         bp += NFSX_UNSIGNED;
3113                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3114                         *tl = txdr_unsigned(nlen);
3115                         bp += NFSX_UNSIGNED;
3116
3117                         /* And loop around copying the name */
3118                         xfer = nlen;
3119                         cp = dp->d_name;
3120                         while (xfer > 0) {
3121                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3122                                 if ((bp+xfer) > be)
3123                                         tsiz = be-bp;
3124                                 else
3125                                         tsiz = xfer;
3126                                 bcopy(cp, bp, tsiz);
3127                                 bp += tsiz;
3128                                 xfer -= tsiz;
3129                                 if (xfer > 0)
3130                                         cp += tsiz;
3131                         }
3132                         /* And null pad to a int32_t boundary */
3133                         for (i = 0; i < rem; i++)
3134                                 *bp++ = '\0';
3135                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3136
3137                         /* Finish off the record */
3138                         if (info.v3) {
3139                                 *tl = txdr_unsigned(*cookiep >> 32);
3140                                 bp += NFSX_UNSIGNED;
3141                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3142                         }
3143                         *tl = txdr_unsigned(*cookiep);
3144                         bp += NFSX_UNSIGNED;
3145                 }
3146                 dp = _DIRENT_NEXT(dp);
3147                 cpos = (char *)dp;
3148                 cookiep++;
3149                 ncookies--;
3150         }
3151         vrele(vp);
3152         vp = NULL;
3153         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3154         *tl = nfs_false;
3155         bp += NFSX_UNSIGNED;
3156         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3157         if (eofflag)
3158                 *tl = nfs_true;
3159         else
3160                 *tl = nfs_false;
3161         bp += NFSX_UNSIGNED;
3162         if (mp1 != info.mb) {
3163                 if (bp < be)
3164                         mp1->m_len = bp - mtod(mp1, caddr_t);
3165         } else
3166                 mp1->m_len += bp - info.bpos;
3167         kfree((caddr_t)rbuf, M_TEMP);
3168         kfree((caddr_t)cookies, M_TEMP);
3169
3170 nfsmout:
3171         *mrq = info.mreq;
3172         if (vp)
3173                 vrele(vp);
3174         return(error);
3175 }
3176
3177 int
3178 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3179                   struct thread *td, struct mbuf **mrq)
3180 {
3181         struct sockaddr *nam = nfsd->nd_nam;
3182         struct ucred *cred = &nfsd->nd_cr;
3183         char *bp, *be;
3184         struct dirent *dp;
3185         caddr_t cp;
3186         u_int32_t *tl;
3187         struct mbuf *mp1, *mp2;
3188         char *cpos, *cend, *rbuf;
3189         struct vnode *vp = NULL, *nvp;
3190         struct mount *mp = NULL;
3191         struct flrep fl;
3192         nfsfh_t nfh;
3193         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3194         struct uio io;
3195         struct iovec iv;
3196         struct vattr va, at, *vap = &va;
3197         struct nfs_fattr *fp;
3198         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3199         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3200         u_quad_t off, toff, verf;
3201         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3202         struct nfsm_info info;
3203
3204         info.mrep = nfsd->nd_mrep;
3205         info.mreq = NULL;
3206         info.md = nfsd->nd_md;
3207         info.dpos = nfsd->nd_dpos;
3208         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3209
3210         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3211         fhp = &nfh.fh_generic;
3212         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3213         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3214         toff = fxdr_hyper(tl);
3215         tl += 2;
3216         verf = fxdr_hyper(tl);
3217         tl += 2;
3218         siz = fxdr_unsigned(int, *tl++);
3219         cnt = fxdr_unsigned(int, *tl);
3220         off = toff;
3221         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3222         xfer = NFS_SRVMAXDATA(nfsd);
3223         if ((unsigned)cnt > xfer)
3224                 cnt = xfer;
3225         if ((unsigned)siz > xfer)
3226                 siz = xfer;
3227         fullsiz = siz;
3228         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3229                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3230         if (!error && vp->v_type != VDIR) {
3231                 error = ENOTDIR;
3232                 vput(vp);
3233                 vp = NULL;
3234         }
3235         if (error) {
3236                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3237                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3238                 error = 0;
3239                 goto nfsmout;
3240         }
3241         error = getret = VOP_GETATTR(vp, &at);
3242 #if 0
3243         /*
3244          * XXX This check may be too strict for Solaris 2.5 clients.
3245          */
3246         if (!error && toff && verf && verf != at.va_filerev)
3247                 error = NFSERR_BAD_COOKIE;
3248 #endif
3249         if (!error) {
3250                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3251         }
3252         if (error) {
3253                 vput(vp);
3254                 vp = NULL;
3255                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3256                                       NFSX_V3POSTOPATTR, &error));
3257                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3258                 error = 0;
3259                 goto nfsmout;
3260         }
3261         vn_unlock(vp);
3262         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3263 again:
3264         iv.iov_base = rbuf;
3265         iv.iov_len = fullsiz;
3266         io.uio_iov = &iv;
3267         io.uio_iovcnt = 1;
3268         io.uio_offset = (off_t)off;
3269         io.uio_resid = fullsiz;
3270         io.uio_segflg = UIO_SYSSPACE;
3271         io.uio_rw = UIO_READ;
3272         io.uio_td = NULL;
3273         eofflag = 0;
3274         if (cookies) {
3275                 kfree((caddr_t)cookies, M_TEMP);
3276                 cookies = NULL;
3277         }
3278         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3279         off = (u_quad_t)io.uio_offset;
3280         getret = VOP_GETATTR(vp, &at);
3281         if (!cookies && !error)
3282                 error = NFSERR_PERM;
3283         if (!error)
3284                 error = getret;
3285         if (error) {
3286                 vrele(vp);
3287                 vp = NULL;
3288                 if (cookies)
3289                         kfree((caddr_t)cookies, M_TEMP);
3290                 kfree((caddr_t)rbuf, M_TEMP);
3291                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3292                                       NFSX_V3POSTOPATTR, &error));
3293                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3294                 error = 0;
3295                 goto nfsmout;
3296         }
3297         if (io.uio_resid) {
3298                 siz -= io.uio_resid;
3299
3300                 /*
3301                  * If nothing read, return eof
3302                  * rpc reply
3303                  */
3304                 if (siz == 0) {
3305                         vrele(vp);
3306                         vp = NULL;
3307                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3308                                               NFSX_V3POSTOPATTR +
3309                                               NFSX_V3COOKIEVERF +
3310                                               2 * NFSX_UNSIGNED,
3311                                               &error));
3312                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3313                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3314                         txdr_hyper(at.va_filerev, tl);
3315                         tl += 2;
3316                         *tl++ = nfs_false;
3317                         *tl = nfs_true;
3318                         kfree((caddr_t)cookies, M_TEMP);
3319                         kfree((caddr_t)rbuf, M_TEMP);
3320                         error = 0;
3321                         goto nfsmout;
3322                 }
3323         }
3324
3325         /*
3326          * Check for degenerate cases of nothing useful read.
3327          * If so go try again
3328          */
3329         cpos = rbuf;
3330         cend = rbuf + siz;
3331         dp = (struct dirent *)cpos;
3332         cookiep = cookies;
3333         /*
3334          * For some reason FreeBSD's ufs_readdir() chooses to back the
3335          * directory offset up to a block boundary, so it is necessary to
3336          * skip over the records that preceed the requested offset. This
3337          * requires the assumption that file offset cookies monotonically
3338          * increase.
3339          */
3340         while (cpos < cend && ncookies > 0 &&
3341                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3342                  ((u_quad_t)(*cookiep)) <= toff)) {
3343                 dp = _DIRENT_NEXT(dp);
3344                 cpos = (char *)dp;
3345                 cookiep++;
3346                 ncookies--;
3347         }
3348         if (cpos >= cend || ncookies == 0) {
3349                 toff = off;
3350                 siz = fullsiz;
3351                 goto again;
3352         }
3353
3354         /*
3355          * Probe one of the directory entries to see if the filesystem
3356          * supports VGET.
3357          */
3358         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3359                 error = NFSERR_NOTSUPP;
3360                 vrele(vp);
3361                 vp = NULL;
3362                 kfree((caddr_t)cookies, M_TEMP);
3363                 kfree((caddr_t)rbuf, M_TEMP);
3364                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3365                                       NFSX_V3POSTOPATTR, &error));
3366                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3367                 error = 0;
3368                 goto nfsmout;
3369         }
3370         if (nvp) {
3371                 vput(nvp);
3372                 nvp = NULL;
3373         }
3374             
3375         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3376                         2 * NFSX_UNSIGNED;
3377         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3378         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3379         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3380         txdr_hyper(at.va_filerev, tl);
3381         mp1 = mp2 = info.mb;
3382         bp = info.bpos;
3383         be = bp + M_TRAILINGSPACE(mp1);
3384
3385         /* Loop through the records and build reply */
3386         while (cpos < cend && ncookies > 0) {
3387                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3388                         nlen = dp->d_namlen;
3389                         rem = nfsm_rndup(nlen) - nlen;
3390
3391                         /*
3392                          * For readdir_and_lookup get the vnode using
3393                          * the file number.
3394                          */
3395                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3396                                 goto invalid;
3397                         bzero((caddr_t)nfhp, NFSX_V3FH);
3398                         nfhp->fh_fsid = fhp->fh_fsid;
3399                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3400                                 vput(nvp);
3401                                 nvp = NULL;
3402                                 goto invalid;
3403                         }
3404                         if (VOP_GETATTR(nvp, vap)) {
3405                                 vput(nvp);
3406                                 nvp = NULL;
3407                                 goto invalid;
3408                         }
3409                         vput(nvp);
3410                         nvp = NULL;
3411
3412                         /*
3413                          * If either the dircount or maxcount will be
3414                          * exceeded, get out now. Both of these lengths
3415                          * are calculated conservatively, including all
3416                          * XDR overheads.
3417                          */
3418                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3419                                 NFSX_V3POSTOPATTR);
3420                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3421                         if (len > cnt || dirlen > fullsiz) {
3422                                 eofflag = 0;
3423                                 break;
3424                         }
3425
3426                         /*
3427                          * Build the directory record xdr from
3428                          * the dirent entry.
3429                          */
3430                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3431                         nfsm_srvfattr(nfsd, vap, fp);
3432                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3433                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3434                         fl.fl_postopok = nfs_true;
3435                         fl.fl_fhok = nfs_true;
3436                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3437
3438                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3439                         *tl = nfs_true;
3440                         bp += NFSX_UNSIGNED;
3441                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3442                         *tl = txdr_unsigned(dp->d_ino >> 32);
3443                         bp += NFSX_UNSIGNED;
3444                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3445                         *tl = txdr_unsigned(dp->d_ino);
3446                         bp += NFSX_UNSIGNED;
3447                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3448                         *tl = txdr_unsigned(nlen);
3449                         bp += NFSX_UNSIGNED;
3450
3451                         /* And loop around copying the name */
3452                         xfer = nlen;
3453                         cp = dp->d_name;
3454                         while (xfer > 0) {
3455                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3456                                 if ((bp + xfer) > be)
3457                                         tsiz = be - bp;
3458                                 else
3459                                         tsiz = xfer;
3460                                 bcopy(cp, bp, tsiz);
3461                                 bp += tsiz;
3462                                 xfer -= tsiz;
3463                                 cp += tsiz;
3464                         }
3465                         /* And null pad to a int32_t boundary */
3466                         for (i = 0; i < rem; i++)
3467                                 *bp++ = '\0';
3468         
3469                         /*
3470                          * Now copy the flrep structure out.
3471                          */
3472                         xfer = sizeof (struct flrep);
3473                         cp = (caddr_t)&fl;
3474                         while (xfer > 0) {
3475                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3476                                 if ((bp + xfer) > be)
3477                                         tsiz = be - bp;
3478                                 else
3479                                         tsiz = xfer;
3480                                 bcopy(cp, bp, tsiz);
3481                                 bp += tsiz;
3482                                 xfer -= tsiz;
3483                                 cp += tsiz;
3484                         }
3485                 }
3486 invalid:
3487                 dp = _DIRENT_NEXT(dp);
3488                 cpos = (char *)dp;
3489                 cookiep++;
3490                 ncookies--;
3491         }
3492         vrele(vp);
3493         vp = NULL;
3494         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3495         *tl = nfs_false;
3496         bp += NFSX_UNSIGNED;
3497         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3498         if (eofflag)
3499                 *tl = nfs_true;
3500         else
3501                 *tl = nfs_false;
3502         bp += NFSX_UNSIGNED;
3503         if (mp1 != info.mb) {
3504                 if (bp < be)
3505                         mp1->m_len = bp - mtod(mp1, caddr_t);
3506         } else
3507                 mp1->m_len += bp - info.bpos;
3508         kfree((caddr_t)cookies, M_TEMP);
3509         kfree((caddr_t)rbuf, M_TEMP);
3510 nfsmout:
3511         *mrq = info.mreq;
3512         if (vp)
3513                 vrele(vp);
3514         return(error);
3515 }
3516
3517 /*
3518  * nfs commit service
3519  */
3520 int
3521 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3522              struct thread *td, struct mbuf **mrq)
3523 {
3524         struct sockaddr *nam = nfsd->nd_nam;
3525         struct ucred *cred = &nfsd->nd_cr;
3526         struct vattr bfor, aft;
3527         struct vnode *vp = NULL;
3528         struct mount *mp = NULL;
3529         nfsfh_t nfh;
3530         fhandle_t *fhp;
3531         u_int32_t *tl;
3532         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3533         u_quad_t off;
3534         struct nfsm_info info;
3535
3536         info.mrep = nfsd->nd_mrep;
3537         info.mreq = NULL;
3538         info.md = nfsd->nd_md;
3539         info.dpos = nfsd->nd_dpos;
3540
3541         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3542         fhp = &nfh.fh_generic;
3543         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3544         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3545
3546         /*
3547          * XXX At this time VOP_FSYNC() does not accept offset and byte
3548          * count parameters, so these arguments are useless (someday maybe).
3549          */
3550         off = fxdr_hyper(tl);
3551         tl += 2;
3552         cnt = fxdr_unsigned(int, *tl);
3553         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3554                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3555         if (error) {
3556                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3557                                       2 * NFSX_UNSIGNED, &error));
3558                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3559                                  aft_ret, &aft);
3560                 error = 0;
3561                 goto nfsmout;
3562         }
3563         for_ret = VOP_GETATTR(vp, &bfor);
3564
3565         /*
3566          * RFC 1813 3.3.21: If count is 0, a flush from offset to the end of
3567          * file is done. At this time VOP_FSYNC does not accept offset and
3568          * byte count parameters, so call VOP_FSYNC the whole file for now.
3569          */
3570         if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
3571                 /*
3572                  * Give up and do the whole thing
3573                  */
3574                 if (vp->v_object &&
3575                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3576                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3577                 }
3578                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3579         } else {
3580                 /*
3581                  * Locate and synchronously write any buffers that fall
3582                  * into the requested range.  Note:  we are assuming that
3583                  * f_iosize is a power of 2.
3584                  */
3585                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3586                 int iomask = iosize - 1;
3587                 off_t loffset;
3588
3589                 /*
3590                  * Align to iosize boundry, super-align to page boundry.
3591                  */
3592                 if (off & iomask) {
3593                         cnt += off & iomask;
3594                         off &= ~(u_quad_t)iomask;
3595                 }
3596                 if (off & PAGE_MASK) {
3597                         cnt += off & PAGE_MASK;
3598                         off &= ~(u_quad_t)PAGE_MASK;
3599                 }
3600                 loffset = off;
3601
3602                 if (vp->v_object &&
3603                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3604                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3605                             (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3606                 }
3607
3608                 crit_enter();
3609                 while (cnt > 0) {
3610                         struct buf *bp;
3611
3612                         /*
3613                          * If we have a buffer and it is marked B_DELWRI we
3614                          * have to lock and write it.  Otherwise the prior
3615                          * write is assumed to have already been committed.
3616                          *
3617                          * WARNING: FINDBLK_TEST buffers represent stable
3618                          *          storage but not necessarily stable
3619                          *          content.  It is ok in this case.
3620                          */
3621                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3622                                 if (bp->b_flags & B_DELWRI)
3623                                         bp = findblk(vp, loffset, 0);
3624                                 else
3625                                         bp = NULL;
3626                         }
3627                         if (bp) {
3628                                 if (bp->b_flags & B_DELWRI) {
3629                                         bremfree(bp);
3630                                         bwrite(bp);
3631                                         ++nfs_commit_miss;
3632                                 } else {
3633                                         BUF_UNLOCK(bp);
3634                                 }
3635                         }
3636                         ++nfs_commit_blks;
3637                         if (cnt < iosize)
3638                                 break;
3639                         cnt -= iosize;
3640                         loffset += iosize;
3641                 }
3642                 crit_exit();
3643         }
3644
3645         aft_ret = VOP_GETATTR(vp, &aft);
3646         vput(vp);
3647         vp = NULL;
3648         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3649                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3650                               &error));
3651         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3652                          aft_ret, &aft);
3653         if (!error) {
3654                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3655                 if (nfsver.tv_sec == 0)
3656                         nfsver = boottime;
3657                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3658                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3659         } else {
3660                 error = 0;
3661         }
3662 nfsmout:
3663         *mrq = info.mreq;
3664         if (vp)
3665                 vput(vp);
3666         return(error);
3667 }
3668
3669 /*
3670  * nfs statfs service
3671  */
3672 int
3673 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3674              struct thread *td, struct mbuf **mrq)
3675 {
3676         struct sockaddr *nam = nfsd->nd_nam;
3677         struct ucred *cred = &nfsd->nd_cr;
3678         struct statfs *sf;
3679         struct nfs_statfs *sfp;
3680         int error = 0, rdonly, getret = 1;
3681         struct vnode *vp = NULL;
3682         struct mount *mp = NULL;
3683         struct vattr at;
3684         nfsfh_t nfh;
3685         fhandle_t *fhp;
3686         struct statfs statfs;
3687         u_quad_t tval;
3688         struct nfsm_info info;
3689
3690         info.mrep = nfsd->nd_mrep;
3691         info.mreq = NULL;
3692         info.md = nfsd->nd_md;
3693         info.dpos = nfsd->nd_dpos;
3694         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3695
3696         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3697         fhp = &nfh.fh_generic;
3698         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3699         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3700                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3701         if (error) {
3702                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3703                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3704                 error = 0;
3705                 goto nfsmout;
3706         }
3707         sf = &statfs;
3708         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3709         getret = VOP_GETATTR(vp, &at);
3710         vput(vp);
3711         vp = NULL;
3712         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3713                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3714                               &error));
3715         if (info.v3)
3716                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3717         if (error) {
3718                 error = 0;
3719                 goto nfsmout;
3720         }
3721         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3722         if (info.v3) {
3723                 tval = (u_quad_t)sf->f_blocks;
3724                 tval *= (u_quad_t)sf->f_bsize;
3725                 txdr_hyper(tval, &sfp->sf_tbytes);
3726                 tval = (u_quad_t)sf->f_bfree;
3727                 tval *= (u_quad_t)sf->f_bsize;
3728                 txdr_hyper(tval, &sfp->sf_fbytes);
3729                 tval = (u_quad_t)sf->f_bavail;
3730                 tval *= (u_quad_t)sf->f_bsize;
3731                 txdr_hyper(tval, &sfp->sf_abytes);
3732                 sfp->sf_tfiles.nfsuquad[0] = 0;
3733                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3734                 sfp->sf_ffiles.nfsuquad[0] = 0;
3735                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3736                 sfp->sf_afiles.nfsuquad[0] = 0;
3737                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3738                 sfp->sf_invarsec = 0;
3739         } else {
3740                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3741                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3742                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3743                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3744                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3745         }
3746 nfsmout:
3747         *mrq = info.mreq;
3748         if (vp)
3749                 vput(vp);
3750         return(error);
3751 }
3752
3753 /*
3754  * nfs fsinfo service
3755  */
3756 int
3757 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3758              struct thread *td, struct mbuf **mrq)
3759 {
3760         struct sockaddr *nam = nfsd->nd_nam;
3761         struct ucred *cred = &nfsd->nd_cr;
3762         struct nfsv3_fsinfo *sip;
3763         int error = 0, rdonly, getret = 1, pref;
3764         struct vnode *vp = NULL;
3765         struct mount *mp = NULL;
3766         struct vattr at;
3767         nfsfh_t nfh;
3768         fhandle_t *fhp;
3769         u_quad_t maxfsize;
3770         struct statfs sb;
3771         struct nfsm_info info;
3772
3773         info.mrep = nfsd->nd_mrep;
3774         info.mreq = NULL;
3775         info.md = nfsd->nd_md;
3776         info.dpos = nfsd->nd_dpos;
3777
3778         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3779         fhp = &nfh.fh_generic;
3780         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3781         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3782                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3783         if (error) {
3784                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3785                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3786                 error = 0;
3787                 goto nfsmout;
3788         }
3789
3790         /* XXX Try to make a guess on the max file size. */
3791         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3792         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3793
3794         getret = VOP_GETATTR(vp, &at);
3795         vput(vp);
3796         vp = NULL;
3797         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3798                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3799         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3800         sip = nfsm_build(&info, NFSX_V3FSINFO);
3801
3802         /*
3803          * XXX
3804          * There should be file system VFS OP(s) to get this information.
3805          * For now, assume ufs.
3806          */
3807         if (slp->ns_so->so_type == SOCK_DGRAM)
3808                 pref = NFS_MAXDGRAMDATA;
3809         else
3810                 pref = NFS_MAXDATA;
3811         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3812         sip->fs_rtpref = txdr_unsigned(pref);
3813         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3814         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3815         sip->fs_wtpref = txdr_unsigned(pref);
3816         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3817         sip->fs_dtpref = txdr_unsigned(pref);
3818         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3819         sip->fs_timedelta.nfsv3_sec = 0;
3820         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3821         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3822                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3823                 NFSV3FSINFO_CANSETTIME);
3824 nfsmout:
3825         *mrq = info.mreq;
3826         if (vp)
3827                 vput(vp);
3828         return(error);
3829 }
3830
3831 /*
3832  * nfs pathconf service
3833  */
3834 int
3835 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3836                struct thread *td, struct mbuf **mrq)
3837 {
3838         struct sockaddr *nam = nfsd->nd_nam;
3839         struct ucred *cred = &nfsd->nd_cr;
3840         struct nfsv3_pathconf *pc;
3841         int error = 0, rdonly, getret = 1;
3842         register_t linkmax, namemax, chownres, notrunc;
3843         struct vnode *vp = NULL;
3844         struct mount *mp = NULL;
3845         struct vattr at;
3846         nfsfh_t nfh;
3847         fhandle_t *fhp;
3848         struct nfsm_info info;
3849
3850         info.mrep = nfsd->nd_mrep;
3851         info.mreq = NULL;
3852         info.md = nfsd->nd_md;
3853         info.dpos = nfsd->nd_dpos;
3854
3855         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3856         fhp = &nfh.fh_generic;
3857         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3858         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3859                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3860         if (error) {
3861                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3862                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3863                 error = 0;
3864                 goto nfsmout;
3865         }
3866         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3867         if (!error)
3868                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3869         if (!error)
3870                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3871         if (!error)
3872                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3873         getret = VOP_GETATTR(vp, &at);
3874         vput(vp);
3875         vp = NULL;
3876         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3877                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3878                               &error));
3879         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3880         if (error) {
3881                 error = 0;
3882                 goto nfsmout;
3883         }
3884         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3885
3886         pc->pc_linkmax = txdr_unsigned(linkmax);
3887         pc->pc_namemax = txdr_unsigned(namemax);
3888         pc->pc_notrunc = txdr_unsigned(notrunc);
3889         pc->pc_chownrestricted = txdr_unsigned(chownres);
3890
3891         /*
3892          * These should probably be supported by VOP_PATHCONF(), but
3893          * until msdosfs is exportable (why would you want to?), the
3894          * Unix defaults should be ok.
3895          */
3896         pc->pc_caseinsensitive = nfs_false;
3897         pc->pc_casepreserving = nfs_true;
3898 nfsmout:
3899         *mrq = info.mreq;
3900         if (vp) 
3901                 vput(vp);
3902         return(error);
3903 }
3904
3905 /*
3906  * Null operation, used by clients to ping server
3907  */
3908 /* ARGSUSED */
3909 int
3910 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3911            struct thread *td, struct mbuf **mrq)
3912 {
3913         struct nfsm_info info;
3914         int error = NFSERR_RETVOID;
3915
3916         info.mrep = nfsd->nd_mrep;
3917         info.mreq = NULL;
3918
3919         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3920         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3921 nfsmout:
3922         *mrq = info.mreq;
3923         return (error);
3924 }
3925
3926 /*
3927  * No operation, used for obsolete procedures
3928  */
3929 /* ARGSUSED */
3930 int
3931 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3932            struct thread *td, struct mbuf **mrq)
3933 {
3934         struct nfsm_info info;
3935         int error;
3936
3937         info.mrep = nfsd->nd_mrep;
3938         info.mreq = NULL;
3939
3940         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3941         if (nfsd->nd_repstat)
3942                 error = nfsd->nd_repstat;
3943         else
3944                 error = EPROCUNAVAIL;
3945         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3946         error = 0;
3947 nfsmout:
3948         *mrq = info.mreq;
3949         return (error);
3950 }
3951
3952 /*
3953  * Perform access checking for vnodes obtained from file handles that would
3954  * refer to files already opened by a Unix client. You cannot just use
3955  * vn_writechk() and VOP_ACCESS() for two reasons.
3956  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3957  * 2 - The owner is to be given access irrespective of mode bits for some
3958  *     operations, so that processes that chmod after opening a file don't
3959  *     break. I don't like this because it opens a security hole, but since
3960  *     the nfs server opens a security hole the size of a barn door anyhow,
3961  *     what the heck.
3962  *
3963  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3964  * will return EPERM instead of EACCESS. EPERM is always an error.
3965  */
3966 static int
3967 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3968              int rdonly, struct thread *td, int override)
3969 {
3970         struct vattr vattr;
3971         int error;
3972
3973         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3974         if (flags & VWRITE) {
3975                 /* Just vn_writechk() changed to check rdonly */
3976                 /*
3977                  * Disallow write attempts on read-only file systems;
3978                  * unless the file is a socket or a block or character
3979                  * device resident on the file system.
3980                  */
3981                 if (rdonly || 
3982                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3983                         switch (vp->v_type) {
3984                         case VREG:
3985                         case VDIR:
3986                         case VLNK:
3987                                 return (EROFS);
3988                         default:
3989                                 break;
3990                         }
3991                 }
3992                 /*
3993                  * If there's shared text associated with
3994                  * the inode, we can't allow writing.
3995                  */
3996                 if (vp->v_flag & VTEXT)
3997                         return (ETXTBSY);
3998         }
3999         error = VOP_GETATTR(vp, &vattr);
4000         if (error)
4001                 return (error);
4002         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
4003         /*
4004          * Allow certain operations for the owner (reads and writes
4005          * on files that are already open).
4006          */
4007         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4008                 error = 0;
4009         return error;
4010 }
4011 #endif /* NFS_NOSERVER */
4012