Merge branch 'vendor/BINUTILS221'
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *      - do not mix the phases, since the nfsm_?? macros can return failures
50  *        on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *      error number iff error != 0 whereas
54  *      returning an error from the server function implies a fatal error
55  *      such as a badly constructed rpc request that should be dropped without
56  *      a reply.
57  *      For Version 3, nfsm_reply() does not return for the error case, since
58  *      most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *      Warning: always pay careful attention to resource cleanup on return
62  *      and note that nfsm_*() macros can terminate a procedure on certain
63  *      errors.
64  */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_object.h>
88
89 #include <sys/buf2.h>
90
91 #include <sys/thread2.h>
92
93 #include "nfsproto.h"
94 #include "rpcv2.h"
95 #include "nfs.h"
96 #include "xdr_subs.h"
97 #include "nfsm_subs.h"
98
99 #ifdef NFSRV_DEBUG
100 #define nfsdbprintf(info)       kprintf info
101 #else
102 #define nfsdbprintf(info)
103 #endif
104
105 #define MAX_COMMIT_COUNT        (1024 * 1024)
106
107 #define NUM_HEURISTIC           1017
108 #define NHUSE_INIT              64
109 #define NHUSE_INC               16
110 #define NHUSE_MAX               2048
111
112 static struct nfsheur {
113     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
114     off_t nh_nextr;             /* next offset for sequential detection */
115     int nh_use;                 /* use count for selection */
116     int nh_seqcount;            /* heuristic */
117 } nfsheur[NUM_HEURISTIC];
118
119 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
120                       NFFIFO, NFNON };
121 #ifndef NFS_NOSERVER 
122 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
123                       NFCHR, NFNON };
124
125 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
126 int nfsrvw_procrastinate_v3 = 0;
127
128 static struct timespec  nfsver;
129
130 SYSCTL_DECL(_vfs_nfs);
131
132 int nfs_async;
133 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
134     "Enable unstable and fast writes");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
138     "Number of committed blocks");
139 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
140     "Number of nfs blocks committed from dirty buffers");
141
142 static int nfsrv_access (struct mount *, struct vnode *, int,
143                         struct ucred *, int, struct thread *, int);
144 static void nfsrvw_coalesce (struct nfsrv_descript *,
145                 struct nfsrv_descript *);
146
147 /*
148  * nfs v3 access service
149  */
150 int
151 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
152               struct thread *td, struct mbuf **mrq)
153 {
154         struct sockaddr *nam = nfsd->nd_nam;
155         struct ucred *cred = &nfsd->nd_cr;
156         struct vnode *vp = NULL;
157         struct mount *mp = NULL;
158         nfsfh_t nfh;
159         fhandle_t *fhp;
160         int error = 0, rdonly, getret;
161         struct vattr vattr, *vap = &vattr;
162         u_long testmode, nfsmode;
163         struct nfsm_info info;
164         u_int32_t *tl;
165
166         info.dpos = nfsd->nd_dpos;
167         info.md = nfsd->nd_md;
168         info.mrep = nfsd->nd_mrep;
169         info.mreq = NULL;
170
171         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
172         fhp = &nfh.fh_generic;
173         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
174         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
175         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
176             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
177         if (error) {
178                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
179                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
180                 error = 0;
181                 goto nfsmout;
182         }
183         nfsmode = fxdr_unsigned(u_int32_t, *tl);
184         if ((nfsmode & NFSV3ACCESS_READ) &&
185                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
186                 nfsmode &= ~NFSV3ACCESS_READ;
187         if (vp->v_type == VDIR)
188                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
189                         NFSV3ACCESS_DELETE);
190         else
191                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
192         if ((nfsmode & testmode) &&
193                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
194                 nfsmode &= ~testmode;
195         if (vp->v_type == VDIR)
196                 testmode = NFSV3ACCESS_LOOKUP;
197         else
198                 testmode = NFSV3ACCESS_EXECUTE;
199         if ((nfsmode & testmode) &&
200                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
201                 nfsmode &= ~testmode;
202         getret = VOP_GETATTR(vp, vap);
203         vput(vp);
204         vp = NULL;
205         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
206                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
207         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
208         tl = nfsm_build(&info, NFSX_UNSIGNED);
209         *tl = txdr_unsigned(nfsmode);
210 nfsmout:
211         *mrq = info.mreq;
212         if (vp)
213                 vput(vp);
214         return(error);
215 }
216
217 /*
218  * nfs getattr service
219  */
220 int
221 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
222               struct thread *td, struct mbuf **mrq)
223 {
224         struct sockaddr *nam = nfsd->nd_nam;
225         struct ucred *cred = &nfsd->nd_cr;
226         struct nfs_fattr *fp;
227         struct vattr va;
228         struct vattr *vap = &va;
229         struct vnode *vp = NULL;
230         struct mount *mp = NULL;
231         nfsfh_t nfh;
232         fhandle_t *fhp;
233         int error = 0, rdonly;
234         struct nfsm_info info;
235
236         info.mrep = nfsd->nd_mrep;
237         info.md = nfsd->nd_md;
238         info.dpos = nfsd->nd_dpos;
239         info.mreq = NULL;
240
241         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
242         fhp = &nfh.fh_generic;
243         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
244         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
245                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
246         if (error) {
247                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
248                 error = 0;
249                 goto nfsmout;
250         }
251         error = VOP_GETATTR(vp, vap);
252         vput(vp);
253         vp = NULL;
254         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
255                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
256         if (error) {
257                 error = 0;
258                 goto nfsmout;
259         }
260         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
261         nfsm_srvfattr(nfsd, vap, fp);
262         /* fall through */
263
264 nfsmout:
265         *mrq = info.mreq;
266         if (vp)
267                 vput(vp);
268         return(error);
269 }
270
271 /*
272  * nfs setattr service
273  */
274 int
275 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
276               struct thread *td, struct mbuf **mrq)
277 {
278         struct sockaddr *nam = nfsd->nd_nam;
279         struct ucred *cred = &nfsd->nd_cr;
280         struct vattr va, preat;
281         struct vattr *vap = &va;
282         struct nfsv2_sattr *sp;
283         struct nfs_fattr *fp;
284         struct vnode *vp = NULL;
285         struct mount *mp = NULL;
286         nfsfh_t nfh;
287         fhandle_t *fhp;
288         u_int32_t *tl;
289         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
290         int gcheck = 0;
291         struct timespec guard;
292         struct nfsm_info info;
293
294         info.mrep = nfsd->nd_mrep;
295         info.mreq = NULL;
296         info.md = nfsd->nd_md;
297         info.dpos = nfsd->nd_dpos;
298         info.v3 = (nfsd->nd_flag & ND_NFSV3);
299
300         guard.tv_sec = 0;       /* fix compiler warning */
301         guard.tv_nsec = 0;
302
303         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
304         fhp = &nfh.fh_generic;
305         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
306         VATTR_NULL(vap);
307         if (info.v3) {
308                 ERROROUT(nfsm_srvsattr(&info, vap));
309                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
310                 gcheck = fxdr_unsigned(int, *tl);
311                 if (gcheck) {
312                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
313                         fxdr_nfsv3time(tl, &guard);
314                 }
315         } else {
316                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
317                 /*
318                  * Nah nah nah nah na nah
319                  * There is a bug in the Sun client that puts 0xffff in the mode
320                  * field of sattr when it should put in 0xffffffff. The u_short
321                  * doesn't sign extend.
322                  * --> check the low order 2 bytes for 0xffff
323                  */
324                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
325                         vap->va_mode = nfstov_mode(sp->sa_mode);
326                 if (sp->sa_uid != nfs_xdrneg1)
327                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
328                 if (sp->sa_gid != nfs_xdrneg1)
329                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
330                 if (sp->sa_size != nfs_xdrneg1)
331                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
332                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
333 #ifdef notyet
334                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
335 #else
336                         vap->va_atime.tv_sec =
337                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
338                         vap->va_atime.tv_nsec = 0;
339 #endif
340                 }
341                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
342                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
343
344         }
345
346         /*
347          * Now that we have all the fields, lets do it.
348          */
349         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
350                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
351         if (error) {
352                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
353                                       2 * NFSX_UNSIGNED, &error));
354                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
355                                  postat_ret, vap);
356                 error = 0;
357                 goto nfsmout;
358         }
359
360         /*
361          * vp now an active resource, pay careful attention to cleanup
362          */
363
364         if (info.v3) {
365                 error = preat_ret = VOP_GETATTR(vp, &preat);
366                 if (!error && gcheck &&
367                         (preat.va_ctime.tv_sec != guard.tv_sec ||
368                          preat.va_ctime.tv_nsec != guard.tv_nsec))
369                         error = NFSERR_NOT_SYNC;
370                 if (error) {
371                         vput(vp);
372                         vp = NULL;
373                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
374                                               NFSX_WCCDATA(info.v3), &error));
375                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
376                                          postat_ret, vap);
377                         error = 0;
378                         goto nfsmout;
379                 }
380         }
381
382         /*
383          * If the size is being changed write acces is required, otherwise
384          * just check for a read only file system.
385          */
386         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
387                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
388                         error = EROFS;
389                         goto out;
390                 }
391         } else {
392                 if (vp->v_type == VDIR) {
393                         error = EISDIR;
394                         goto out;
395                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
396                             td, 0)) != 0){ 
397                         goto out;
398                 }
399         }
400         error = VOP_SETATTR(vp, vap, cred);
401         postat_ret = VOP_GETATTR(vp, vap);
402         if (!error)
403                 error = postat_ret;
404 out:
405         vput(vp);
406         vp = NULL;
407         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
408                    NFSX_WCCORFATTR(info.v3), &error));
409         if (info.v3) {
410                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
411                                  postat_ret, vap);
412                 error = 0;
413                 goto nfsmout;
414         } else {
415                 fp = nfsm_build(&info, NFSX_V2FATTR);
416                 nfsm_srvfattr(nfsd, vap, fp);
417         }
418         /* fall through */
419
420 nfsmout:
421         *mrq = info.mreq;
422         if (vp)
423                 vput(vp);
424         return(error);
425 }
426
427 /*
428  * nfs lookup rpc
429  */
430 int
431 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
432              struct thread *td, struct mbuf **mrq)
433 {
434         struct sockaddr *nam = nfsd->nd_nam;
435         struct ucred *cred = &nfsd->nd_cr;
436         struct nfs_fattr *fp;
437         struct nlookupdata nd;
438         struct vnode *vp;
439         struct vnode *dirp;
440         struct nchandle nch;
441         nfsfh_t nfh;
442         fhandle_t *fhp;
443         int error = 0, len, dirattr_ret = 1;
444         int pubflag;
445         struct vattr va, dirattr, *vap = &va;
446         struct nfsm_info info;
447
448         info.mrep = nfsd->nd_mrep;
449         info.mreq = NULL;
450         info.md = nfsd->nd_md;
451         info.dpos = nfsd->nd_dpos;
452         info.v3 = (nfsd->nd_flag & ND_NFSV3);
453
454         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
455         nlookup_zero(&nd);
456         dirp = NULL;
457         vp = NULL;
458
459         fhp = &nfh.fh_generic;
460         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
461         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
462
463         pubflag = nfs_ispublicfh(fhp);
464
465         error = nfs_namei(&nd, cred, 0, NULL, &vp,
466                 fhp, len, slp, nam, &info.md, &info.dpos,
467                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
468
469         /*
470          * namei failure, only dirp to cleanup.  Clear out garbarge from
471          * structure in case macros jump to nfsmout.
472          */
473
474         if (error) {
475                 if (dirp) {
476                         if (info.v3)
477                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
478                         vrele(dirp);
479                         dirp = NULL;
480                 }
481                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
482                                       NFSX_POSTOPATTR(info.v3), &error));
483                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
484                 error = 0;
485                 goto nfsmout;
486         }
487
488         /*
489          * Locate index file for public filehandle
490          *
491          * error is 0 on entry and 0 on exit from this block.
492          */
493
494         if (pubflag) {
495                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
496                         /*
497                          * Setup call to lookup() to see if we can find
498                          * the index file. Arguably, this doesn't belong
499                          * in a kernel.. Ugh.  If an error occurs, do not
500                          * try to install an index file and then clear the
501                          * error.
502                          *
503                          * When we replace nd with ind and redirect ndp,
504                          * maintenance of ni_startdir and ni_vp shift to
505                          * ind and we have to clean them up in the old nd.
506                          * However, the cnd resource continues to be maintained
507                          * via the original nd.  Confused?  You aren't alone!
508                          */
509                         vn_unlock(vp);
510                         cache_copy(&nd.nl_nch, &nch);
511                         nlookup_done(&nd);
512                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
513                                                 UIO_SYSSPACE, 0, cred, &nch);
514                         cache_drop(&nch);
515                         if (error == 0)
516                                 error = nlookup(&nd);
517
518                         if (error == 0) {
519                                 /*
520                                  * Found an index file. Get rid of
521                                  * the old references.  transfer vp and
522                                  * load up the new vp.  Fortunately we do
523                                  * not have to deal with dvp, that would be
524                                  * a huge mess.
525                                  */
526                                 if (dirp)       
527                                         vrele(dirp);
528                                 dirp = vp;
529                                 vp = NULL;
530                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
531                                                         LK_EXCLUSIVE, &vp);
532                                 KKASSERT(error == 0);
533                         }
534                         error = 0;
535                 }
536                 /*
537                  * If the public filehandle was used, check that this lookup
538                  * didn't result in a filehandle outside the publicly exported
539                  * filesystem.  We clear the poor vp here to avoid lockups due
540                  * to NFS I/O.
541                  */
542
543                 if (vp->v_mount != nfs_pub.np_mount) {
544                         vput(vp);
545                         vp = NULL;
546                         error = EPERM;
547                 }
548         }
549
550         if (dirp) {
551                 if (info.v3)
552                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
553                 vrele(dirp);
554                 dirp = NULL;
555         }
556
557         /*
558          * Resources at this point:
559          *      ndp->ni_vp      may not be NULL
560          *
561          */
562
563         if (error) {
564                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
565                                       NFSX_POSTOPATTR(info.v3), &error));
566                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
567                 error = 0;
568                 goto nfsmout;
569         }
570
571         /*
572          * Clear out some resources prior to potentially blocking.  This
573          * is not as critical as ni_dvp resources in other routines, but
574          * it helps.
575          */
576         nlookup_done(&nd);
577
578         /*
579          * Get underlying attribute, then release remaining resources ( for
580          * the same potential blocking reason ) and reply.
581          */
582         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
583         error = VFS_VPTOFH(vp, &fhp->fh_fid);
584         if (!error)
585                 error = VOP_GETATTR(vp, vap);
586
587         vput(vp);
588         vp = NULL;
589         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
590                               NFSX_SRVFH(info.v3) +
591                               NFSX_POSTOPORFATTR(info.v3) +
592                               NFSX_POSTOPATTR(info.v3),
593                               &error));
594         if (error) {
595                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
596                 error = 0;
597                 goto nfsmout;
598         }
599         nfsm_srvfhtom(&info, fhp);
600         if (info.v3) {
601                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
602                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
603         } else {
604                 fp = nfsm_build(&info, NFSX_V2FATTR);
605                 nfsm_srvfattr(nfsd, vap, fp);
606         }
607
608 nfsmout:
609         *mrq = info.mreq;
610         if (dirp)
611                 vrele(dirp);
612         nlookup_done(&nd);              /* may be called twice */
613         if (vp)
614                 vput(vp);
615         return (error);
616 }
617
618 /*
619  * nfs readlink service
620  */
621 int
622 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
623                struct thread *td, struct mbuf **mrq)
624 {
625         struct sockaddr *nam = nfsd->nd_nam;
626         struct ucred *cred = &nfsd->nd_cr;
627         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
628         struct iovec *ivp = iv;
629         u_int32_t *tl;
630         int error = 0, rdonly, i, tlen, len, getret;
631         struct mbuf *mp1, *mp2, *mp3;
632         struct vnode *vp = NULL;
633         struct mount *mp = NULL;
634         struct vattr attr;
635         nfsfh_t nfh;
636         fhandle_t *fhp;
637         struct uio io, *uiop = &io;
638         struct nfsm_info info;
639
640         info.mrep = nfsd->nd_mrep;
641         info.mreq = NULL;
642         info.md = nfsd->nd_md;
643         info.dpos = nfsd->nd_dpos;
644         info.v3 = (nfsd->nd_flag & ND_NFSV3);
645
646         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
647 #ifndef nolint
648         mp2 = NULL;
649 #endif
650         mp3 = NULL;
651         fhp = &nfh.fh_generic;
652         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
653         len = 0;
654         i = 0;
655         while (len < NFS_MAXPATHLEN) {
656                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
657                 mp1->m_len = MCLBYTES;
658                 if (len == 0)
659                         mp3 = mp2 = mp1;
660                 else {
661                         mp2->m_next = mp1;
662                         mp2 = mp1;
663                 }
664                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
665                         mp1->m_len = NFS_MAXPATHLEN-len;
666                         len = NFS_MAXPATHLEN;
667                 } else
668                         len += mp1->m_len;
669                 ivp->iov_base = mtod(mp1, caddr_t);
670                 ivp->iov_len = mp1->m_len;
671                 i++;
672                 ivp++;
673         }
674         uiop->uio_iov = iv;
675         uiop->uio_iovcnt = i;
676         uiop->uio_offset = 0;
677         uiop->uio_resid = len;
678         uiop->uio_rw = UIO_READ;
679         uiop->uio_segflg = UIO_SYSSPACE;
680         uiop->uio_td = NULL;
681         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
682                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
683         if (error) {
684                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
685                                       2 * NFSX_UNSIGNED, &error));
686                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
687                 error = 0;
688                 goto nfsmout;
689         }
690         if (vp->v_type != VLNK) {
691                 if (info.v3)
692                         error = EINVAL;
693                 else
694                         error = ENXIO;
695                 goto out;
696         }
697         error = VOP_READLINK(vp, uiop, cred);
698 out:
699         getret = VOP_GETATTR(vp, &attr);
700         vput(vp);
701         vp = NULL;
702         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
703                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
704                              &error));
705         if (info.v3) {
706                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
707                 if (error) {
708                         error = 0;
709                         goto nfsmout;
710                 }
711         }
712         if (uiop->uio_resid > 0) {
713                 len -= uiop->uio_resid;
714                 tlen = nfsm_rndup(len);
715                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
716         }
717         tl = nfsm_build(&info, NFSX_UNSIGNED);
718         *tl = txdr_unsigned(len);
719         info.mb->m_next = mp3;
720         mp3 = NULL;
721 nfsmout:
722         *mrq = info.mreq;
723         if (mp3)
724                 m_freem(mp3);
725         if (vp)
726                 vput(vp);
727         return(error);
728 }
729
730 /*
731  * nfs read service
732  */
733 int
734 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
735            struct thread *td, struct mbuf **mrq)
736 {
737         struct nfsm_info info;
738         struct sockaddr *nam = nfsd->nd_nam;
739         struct ucred *cred = &nfsd->nd_cr;
740         struct iovec *iv;
741         struct iovec *iv2;
742         struct mbuf *m;
743         struct nfs_fattr *fp;
744         u_int32_t *tl;
745         int i;
746         int reqlen;
747         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
748         struct mbuf *m2;
749         struct vnode *vp = NULL;
750         struct mount *mp = NULL;
751         nfsfh_t nfh;
752         fhandle_t *fhp;
753         struct uio io, *uiop = &io;
754         struct vattr va, *vap = &va;
755         struct nfsheur *nh;
756         off_t off;
757         int ioflag = 0;
758
759         info.mrep = nfsd->nd_mrep;
760         info.mreq = NULL;
761         info.md = nfsd->nd_md;
762         info.dpos = nfsd->nd_dpos;
763         info.v3 = (nfsd->nd_flag & ND_NFSV3);
764
765         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
766         fhp = &nfh.fh_generic;
767         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
768         if (info.v3) {
769                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
770                 off = fxdr_hyper(tl);
771         } else {
772                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
773                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
774         }
775         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
776                                             NFS_SRVMAXDATA(nfsd), &error));
777
778         /*
779          * Reference vp.  If an error occurs, vp will be invalid, but we
780          * have to NULL it just in case.  The macros might goto nfsmout
781          * as well.
782          */
783
784         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
785                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
786         if (error) {
787                 vp = NULL;
788                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
789                                       2 * NFSX_UNSIGNED, &error));
790                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
791                 error = 0;
792                 goto nfsmout;
793         }
794
795         if (vp->v_type != VREG) {
796                 if (info.v3)
797                         error = EINVAL;
798                 else
799                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
800         }
801         if (!error) {
802             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
803                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
804         }
805         getret = VOP_GETATTR(vp, vap);
806         if (!error)
807                 error = getret;
808         if (error) {
809                 vput(vp);
810                 vp = NULL;
811                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
812                                       NFSX_POSTOPATTR(info.v3), &error));
813                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
814                 error = 0;
815                 goto nfsmout;
816         }
817
818         /*
819          * Calculate byte count to read
820          */
821
822         if (off >= vap->va_size)
823                 cnt = 0;
824         else if ((off + reqlen) > vap->va_size)
825                 cnt = vap->va_size - off;
826         else
827                 cnt = reqlen;
828
829         /*
830          * Calculate seqcount for heuristic
831          */
832
833         {
834                 int hi;
835                 int try = 32;
836
837                 /*
838                  * Locate best candidate
839                  */
840
841                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
842                 nh = &nfsheur[hi];
843
844                 while (try--) {
845                         if (nfsheur[hi].nh_vp == vp) {
846                                 nh = &nfsheur[hi];
847                                 break;
848                         }
849                         if (nfsheur[hi].nh_use > 0)
850                                 --nfsheur[hi].nh_use;
851                         hi = (hi + 1) % NUM_HEURISTIC;
852                         if (nfsheur[hi].nh_use < nh->nh_use)
853                                 nh = &nfsheur[hi];
854                 }
855
856                 if (nh->nh_vp != vp) {
857                         nh->nh_vp = vp;
858                         nh->nh_nextr = off;
859                         nh->nh_use = NHUSE_INIT;
860                         if (off == 0)
861                                 nh->nh_seqcount = 4;
862                         else
863                                 nh->nh_seqcount = 1;
864                 }
865
866                 /*
867                  * Calculate heuristic
868                  */
869
870                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
871                         if (++nh->nh_seqcount > IO_SEQMAX)
872                                 nh->nh_seqcount = IO_SEQMAX;
873                 } else if (nh->nh_seqcount > 1) {
874                         nh->nh_seqcount = 1;
875                 } else {
876                         nh->nh_seqcount = 0;
877                 }
878                 nh->nh_use += NHUSE_INC;
879                 if (nh->nh_use > NHUSE_MAX)
880                         nh->nh_use = NHUSE_MAX;
881                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
882         }
883
884         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
885                               NFSX_POSTOPORFATTR(info.v3) +
886                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
887                               &error));
888         if (info.v3) {
889                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
890                 *tl++ = nfs_true;
891                 fp = (struct nfs_fattr *)tl;
892                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
893         } else {
894                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
895                 fp = (struct nfs_fattr *)tl;
896                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
897         }
898         len = left = nfsm_rndup(cnt);
899         if (cnt > 0) {
900                 /*
901                  * Generate the mbuf list with the uio_iov ref. to it.
902                  */
903                 i = 0;
904                 m = m2 = info.mb;
905                 while (left > 0) {
906                         siz = min(M_TRAILINGSPACE(m), left);
907                         if (siz > 0) {
908                                 left -= siz;
909                                 i++;
910                         }
911                         if (left > 0) {
912                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
913                                 m->m_len = 0;
914                                 m2->m_next = m;
915                                 m2 = m;
916                         }
917                 }
918                 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
919                        M_TEMP, M_WAITOK);
920                 uiop->uio_iov = iv2 = iv;
921                 m = info.mb;
922                 left = len;
923                 i = 0;
924                 while (left > 0) {
925                         if (m == NULL)
926                                 panic("nfsrv_read iov");
927                         siz = min(M_TRAILINGSPACE(m), left);
928                         if (siz > 0) {
929                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
930                                 iv->iov_len = siz;
931                                 m->m_len += siz;
932                                 left -= siz;
933                                 iv++;
934                                 i++;
935                         }
936                         m = m->m_next;
937                 }
938                 uiop->uio_iovcnt = i;
939                 uiop->uio_offset = off;
940                 uiop->uio_resid = len;
941                 uiop->uio_rw = UIO_READ;
942                 uiop->uio_segflg = UIO_SYSSPACE;
943                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
944                 off = uiop->uio_offset;
945                 nh->nh_nextr = off;
946                 FREE((caddr_t)iv2, M_TEMP);
947                 if (error || (getret = VOP_GETATTR(vp, vap))) {
948                         if (!error)
949                                 error = getret;
950                         m_freem(info.mreq);
951                         info.mreq = NULL;
952                         vput(vp);
953                         vp = NULL;
954                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
955                                               NFSX_POSTOPATTR(info.v3),
956                                               &error));
957                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
958                         error = 0;
959                         goto nfsmout;
960                 }
961         } else {
962                 uiop->uio_resid = 0;
963         }
964         vput(vp);
965         vp = NULL;
966         nfsm_srvfattr(nfsd, vap, fp);
967         tlen = len - uiop->uio_resid;
968         cnt = cnt < tlen ? cnt : tlen;
969         tlen = nfsm_rndup(cnt);
970         if (len != tlen || tlen != cnt)
971                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
972         if (info.v3) {
973                 *tl++ = txdr_unsigned(cnt);
974                 if (len < reqlen)
975                         *tl++ = nfs_true;
976                 else
977                         *tl++ = nfs_false;
978         }
979         *tl = txdr_unsigned(cnt);
980 nfsmout:
981         *mrq = info.mreq;
982         if (vp)
983                 vput(vp);
984         return(error);
985 }
986
987 /*
988  * nfs write service
989  */
990 int
991 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
992             struct thread *td, struct mbuf **mrq)
993 {
994         struct sockaddr *nam = nfsd->nd_nam;
995         struct ucred *cred = &nfsd->nd_cr;
996         struct iovec *ivp;
997         int i, cnt;
998         struct mbuf *mp1;
999         struct nfs_fattr *fp;
1000         struct iovec *iv;
1001         struct vattr va, forat;
1002         struct vattr *vap = &va;
1003         u_int32_t *tl;
1004         int error = 0, rdonly, len, forat_ret = 1;
1005         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1006         int stable = NFSV3WRITE_FILESYNC;
1007         struct vnode *vp = NULL;
1008         struct mount *mp = NULL;
1009         nfsfh_t nfh;
1010         fhandle_t *fhp;
1011         struct uio io, *uiop = &io;
1012         struct nfsm_info info;
1013         off_t off;
1014
1015         info.mrep = nfsd->nd_mrep;
1016         info.mreq = NULL;
1017         info.md = nfsd->nd_md;
1018         info.dpos = nfsd->nd_dpos;
1019         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1020
1021         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1022         if (info.mrep == NULL) {
1023                 error = 0;
1024                 goto nfsmout;
1025         }
1026         fhp = &nfh.fh_generic;
1027         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1028         if (info.v3) {
1029                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1030                 off = fxdr_hyper(tl);
1031                 tl += 3;
1032                 stable = fxdr_unsigned(int, *tl++);
1033         } else {
1034                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1035                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1036                 tl += 2;
1037                 if (nfs_async)
1038                         stable = NFSV3WRITE_UNSTABLE;
1039         }
1040         retlen = len = fxdr_unsigned(int32_t, *tl);
1041         cnt = i = 0;
1042
1043         /*
1044          * For NFS Version 2, it is not obvious what a write of zero length
1045          * should do, but I might as well be consistent with Version 3,
1046          * which is to return ok so long as there are no permission problems.
1047          */
1048         if (len > 0) {
1049             zeroing = 1;
1050             mp1 = info.mrep;
1051             while (mp1) {
1052                 if (mp1 == info.md) {
1053                         zeroing = 0;
1054                         adjust = info.dpos - mtod(mp1, caddr_t);
1055                         mp1->m_len -= adjust;
1056                         if (mp1->m_len > 0 && adjust > 0)
1057                                 mp1->m_data += adjust;
1058                 }
1059                 if (zeroing)
1060                         mp1->m_len = 0;
1061                 else if (mp1->m_len > 0) {
1062                         i += mp1->m_len;
1063                         if (i > len) {
1064                                 mp1->m_len -= (i - len);
1065                                 zeroing = 1;
1066                         }
1067                         if (mp1->m_len > 0)
1068                                 cnt++;
1069                 }
1070                 mp1 = mp1->m_next;
1071             }
1072         }
1073         if (len > NFS_MAXDATA || len < 0 || i < len) {
1074                 error = EIO;
1075                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1076                                       2 * NFSX_UNSIGNED, &error));
1077                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1078                                  aftat_ret, vap);
1079                 error = 0;
1080                 goto nfsmout;
1081         }
1082         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1083                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1084         if (error) {
1085                 vp = NULL;
1086                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1087                                       2 * NFSX_UNSIGNED, &error));
1088                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1089                                  aftat_ret, vap);
1090                 error = 0;
1091                 goto nfsmout;
1092         }
1093         if (info.v3)
1094                 forat_ret = VOP_GETATTR(vp, &forat);
1095         if (vp->v_type != VREG) {
1096                 if (info.v3)
1097                         error = EINVAL;
1098                 else
1099                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1100         }
1101         if (!error) {
1102                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1103         }
1104         if (error) {
1105                 vput(vp);
1106                 vp = NULL;
1107                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1108                                       NFSX_WCCDATA(info.v3), &error));
1109                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1110                                  aftat_ret, vap);
1111                 error = 0;
1112                 goto nfsmout;
1113         }
1114
1115         if (len > 0) {
1116             MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1117                 M_WAITOK);
1118             uiop->uio_iov = iv = ivp;
1119             uiop->uio_iovcnt = cnt;
1120             mp1 = info.mrep;
1121             while (mp1) {
1122                 if (mp1->m_len > 0) {
1123                         ivp->iov_base = mtod(mp1, caddr_t);
1124                         ivp->iov_len = mp1->m_len;
1125                         ivp++;
1126                 }
1127                 mp1 = mp1->m_next;
1128             }
1129
1130             /*
1131              * XXX
1132              * The IO_METASYNC flag indicates that all metadata (and not just
1133              * enough to ensure data integrity) mus be written to stable storage
1134              * synchronously.
1135              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1136              */
1137             if (stable == NFSV3WRITE_UNSTABLE)
1138                 ioflags = IO_NODELOCKED;
1139             else if (stable == NFSV3WRITE_DATASYNC)
1140                 ioflags = (IO_SYNC | IO_NODELOCKED);
1141             else
1142                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1143             uiop->uio_resid = len;
1144             uiop->uio_rw = UIO_WRITE;
1145             uiop->uio_segflg = UIO_SYSSPACE;
1146             uiop->uio_td = NULL;
1147             uiop->uio_offset = off;
1148             error = VOP_WRITE(vp, uiop, ioflags, cred);
1149             nfsstats.srvvop_writes++;
1150             FREE((caddr_t)iv, M_TEMP);
1151         }
1152         aftat_ret = VOP_GETATTR(vp, vap);
1153         vput(vp);
1154         vp = NULL;
1155         if (!error)
1156                 error = aftat_ret;
1157         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1158                               NFSX_PREOPATTR(info.v3) +
1159                               NFSX_POSTOPORFATTR(info.v3) +
1160                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1161                               &error));
1162         if (info.v3) {
1163                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1164                                  aftat_ret, vap);
1165                 if (error) {
1166                         error = 0;
1167                         goto nfsmout;
1168                 }
1169                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1170                 *tl++ = txdr_unsigned(retlen);
1171                 /*
1172                  * If nfs_async is set, then pretend the write was FILESYNC.
1173                  */
1174                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1175                         *tl++ = txdr_unsigned(stable);
1176                 else
1177                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1178                 /*
1179                  * Actually, there is no need to txdr these fields,
1180                  * but it may make the values more human readable,
1181                  * for debugging purposes.
1182                  */
1183                 if (nfsver.tv_sec == 0)
1184                         nfsver = boottime;
1185                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1186                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1187         } else {
1188                 fp = nfsm_build(&info, NFSX_V2FATTR);
1189                 nfsm_srvfattr(nfsd, vap, fp);
1190         }
1191 nfsmout:
1192         *mrq = info.mreq;
1193         if (vp)
1194                 vput(vp);
1195         return(error);
1196 }
1197
1198 /*
1199  * NFS write service with write gathering support. Called when
1200  * nfsrvw_procrastinate > 0.
1201  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1202  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1203  * Jan. 1994.
1204  */
1205 int
1206 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1207                   struct thread *td, struct mbuf **mrq)
1208 {
1209         struct iovec *ivp;
1210         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1211         struct nfs_fattr *fp;
1212         int i;
1213         struct iovec *iov;
1214         struct nfsrvw_delayhash *wpp;
1215         struct ucred *cred;
1216         struct vattr va, forat;
1217         u_int32_t *tl;
1218         int error = 0, rdonly, len, forat_ret = 1;
1219         int ioflags, aftat_ret = 1, adjust, zeroing;
1220         struct mbuf *mp1;
1221         struct vnode *vp = NULL;
1222         struct mount *mp = NULL;
1223         struct uio io, *uiop = &io;
1224         u_quad_t cur_usec;
1225         struct nfsm_info info;
1226
1227         info.mreq = NULL;
1228
1229         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1230 #ifndef nolint
1231         i = 0;
1232         len = 0;
1233 #endif
1234         if (*ndp) {
1235             nfsd = *ndp;
1236             *ndp = NULL;
1237             info.mrep = nfsd->nd_mrep;
1238             info.mreq = NULL;
1239             info.md = nfsd->nd_md;
1240             info.dpos = nfsd->nd_dpos;
1241             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1242             cred = &nfsd->nd_cr;
1243             LIST_INIT(&nfsd->nd_coalesce);
1244             nfsd->nd_mreq = NULL;
1245             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1246             cur_usec = nfs_curusec();
1247             nfsd->nd_time = cur_usec +
1248                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1249     
1250             /*
1251              * Now, get the write header..
1252              */
1253             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1254             if (info.v3) {
1255                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1256                 nfsd->nd_off = fxdr_hyper(tl);
1257                 tl += 3;
1258                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1259             } else {
1260                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1261                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1262                 tl += 2;
1263                 if (nfs_async)
1264                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1265             }
1266             len = fxdr_unsigned(int32_t, *tl);
1267             nfsd->nd_len = len;
1268             nfsd->nd_eoff = nfsd->nd_off + len;
1269     
1270             /*
1271              * Trim the header out of the mbuf list and trim off any trailing
1272              * junk so that the mbuf list has only the write data.
1273              */
1274             zeroing = 1;
1275             i = 0;
1276             mp1 = info.mrep;
1277             while (mp1) {
1278                 if (mp1 == info.md) {
1279                     zeroing = 0;
1280                     adjust = info.dpos - mtod(mp1, caddr_t);
1281                     mp1->m_len -= adjust;
1282                     if (mp1->m_len > 0 && adjust > 0)
1283                         mp1->m_data += adjust;
1284                 }
1285                 if (zeroing)
1286                     mp1->m_len = 0;
1287                 else {
1288                     i += mp1->m_len;
1289                     if (i > len) {
1290                         mp1->m_len -= (i - len);
1291                         zeroing = 1;
1292                     }
1293                 }
1294                 mp1 = mp1->m_next;
1295             }
1296             if (len > NFS_MAXDATA || len < 0  || i < len) {
1297 nfsmout:
1298                 m_freem(info.mrep);
1299                 info.mrep = NULL;
1300                 error = EIO;
1301                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1302                 if (info.v3) {
1303                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1304                                      aftat_ret, &va);
1305                 }
1306                 nfsd->nd_mreq = info.mreq;
1307                 nfsd->nd_mrep = NULL;
1308                 nfsd->nd_time = 0;
1309             }
1310     
1311             /*
1312              * Add this entry to the hash and time queues.
1313              */
1314             owp = NULL;
1315             wp = slp->ns_tq.lh_first;
1316             while (wp && wp->nd_time < nfsd->nd_time) {
1317                 owp = wp;
1318                 wp = wp->nd_tq.le_next;
1319             }
1320             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1321             if (owp) {
1322                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1323             } else {
1324                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1325             }
1326             if (nfsd->nd_mrep) {
1327                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1328                 owp = NULL;
1329                 wp = wpp->lh_first;
1330                 while (wp &&
1331                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1332                     owp = wp;
1333                     wp = wp->nd_hash.le_next;
1334                 }
1335                 while (wp && wp->nd_off < nfsd->nd_off &&
1336                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1337                     owp = wp;
1338                     wp = wp->nd_hash.le_next;
1339                 }
1340                 if (owp) {
1341                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1342
1343                     /*
1344                      * Search the hash list for overlapping entries and
1345                      * coalesce.
1346                      */
1347                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1348                         wp = nfsd->nd_hash.le_next;
1349                         if (NFSW_SAMECRED(owp, nfsd))
1350                             nfsrvw_coalesce(owp, nfsd);
1351                     }
1352                 } else {
1353                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1354                 }
1355             }
1356         }
1357     
1358         /*
1359          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1360          * and generate the associated reply mbuf list(s).
1361          */
1362 loop1:
1363         cur_usec = nfs_curusec();
1364         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1365                 owp = nfsd->nd_tq.le_next;
1366                 if (nfsd->nd_time > cur_usec)
1367                     break;
1368                 if (nfsd->nd_mreq)
1369                     continue;
1370                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1371                 LIST_REMOVE(nfsd, nd_tq);
1372                 LIST_REMOVE(nfsd, nd_hash);
1373                 info.mrep = nfsd->nd_mrep;
1374                 info.mreq = NULL;
1375                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1376                 nfsd->nd_mrep = NULL;
1377                 cred = &nfsd->nd_cr;
1378                 forat_ret = aftat_ret = 1;
1379                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1380                                      nfsd->nd_nam, &rdonly,
1381                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1382                 if (!error) {
1383                     if (info.v3)
1384                         forat_ret = VOP_GETATTR(vp, &forat);
1385                     if (vp->v_type != VREG) {
1386                         if (info.v3)
1387                             error = EINVAL;
1388                         else
1389                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1390                     }
1391                 } else {
1392                     vp = NULL;
1393                 }
1394                 if (!error) {
1395                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1396                 }
1397     
1398                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1399                     ioflags = IO_NODELOCKED;
1400                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1401                     ioflags = (IO_SYNC | IO_NODELOCKED);
1402                 else
1403                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1404                 uiop->uio_rw = UIO_WRITE;
1405                 uiop->uio_segflg = UIO_SYSSPACE;
1406                 uiop->uio_td = NULL;
1407                 uiop->uio_offset = nfsd->nd_off;
1408                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1409                 if (uiop->uio_resid > 0) {
1410                     mp1 = info.mrep;
1411                     i = 0;
1412                     while (mp1) {
1413                         if (mp1->m_len > 0)
1414                             i++;
1415                         mp1 = mp1->m_next;
1416                     }
1417                     uiop->uio_iovcnt = i;
1418                     MALLOC(iov, struct iovec *, i * sizeof (struct iovec), 
1419                         M_TEMP, M_WAITOK);
1420                     uiop->uio_iov = ivp = iov;
1421                     mp1 = info.mrep;
1422                     while (mp1) {
1423                         if (mp1->m_len > 0) {
1424                             ivp->iov_base = mtod(mp1, caddr_t);
1425                             ivp->iov_len = mp1->m_len;
1426                             ivp++;
1427                         }
1428                         mp1 = mp1->m_next;
1429                     }
1430                     if (!error) {
1431                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1432                         nfsstats.srvvop_writes++;
1433                     }
1434                     FREE((caddr_t)iov, M_TEMP);
1435                 }
1436                 m_freem(info.mrep);
1437                 info.mrep = NULL;
1438                 if (vp) {
1439                     aftat_ret = VOP_GETATTR(vp, &va);
1440                     vput(vp);
1441                     vp = NULL;
1442                 }
1443
1444                 /*
1445                  * Loop around generating replies for all write rpcs that have
1446                  * now been completed.
1447                  */
1448                 swp = nfsd;
1449                 do {
1450                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1451                     if (error) {
1452                         nfsm_writereply(&info, nfsd, slp, error,
1453                                         NFSX_WCCDATA(info.v3));
1454                         if (info.v3) {
1455                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1456                                              aftat_ret, &va);
1457                         }
1458                     } else {
1459                         nfsm_writereply(&info, nfsd, slp, error,
1460                                         NFSX_PREOPATTR(info.v3) +
1461                                         NFSX_POSTOPORFATTR(info.v3) +
1462                                         2 * NFSX_UNSIGNED +
1463                                         NFSX_WRITEVERF(info.v3));
1464                         if (info.v3) {
1465                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1466                                              aftat_ret, &va);
1467                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1468                             *tl++ = txdr_unsigned(nfsd->nd_len);
1469                             *tl++ = txdr_unsigned(swp->nd_stable);
1470                             /*
1471                              * Actually, there is no need to txdr these fields,
1472                              * but it may make the values more human readable,
1473                              * for debugging purposes.
1474                              */
1475                             if (nfsver.tv_sec == 0)
1476                                     nfsver = boottime;
1477                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1478                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1479                         } else {
1480                             fp = nfsm_build(&info, NFSX_V2FATTR);
1481                             nfsm_srvfattr(nfsd, &va, fp);
1482                         }
1483                     }
1484                     nfsd->nd_mreq = info.mreq;
1485                     if (nfsd->nd_mrep)
1486                         panic("nfsrv_write: nd_mrep not free");
1487
1488                     /*
1489                      * Done. Put it at the head of the timer queue so that
1490                      * the final phase can return the reply.
1491                      */
1492                     if (nfsd != swp) {
1493                         nfsd->nd_time = 0;
1494                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1495                     }
1496                     nfsd = swp->nd_coalesce.lh_first;
1497                     if (nfsd) {
1498                         LIST_REMOVE(nfsd, nd_tq);
1499                     }
1500                 } while (nfsd);
1501                 swp->nd_time = 0;
1502                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1503                 goto loop1;
1504         }
1505
1506         /*
1507          * Search for a reply to return.
1508          */
1509         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1510                 if (nfsd->nd_mreq) {
1511                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1512                     LIST_REMOVE(nfsd, nd_tq);
1513                     break;
1514                 }
1515         }
1516         if (nfsd) {
1517                 *ndp = nfsd;
1518                 *mrq = nfsd->nd_mreq;
1519         } else {
1520                 *ndp = NULL;
1521                 *mrq = NULL;
1522         }
1523         return (0);
1524 }
1525
1526 /*
1527  * Coalesce the write request nfsd into owp. To do this we must:
1528  * - remove nfsd from the queues
1529  * - merge nfsd->nd_mrep into owp->nd_mrep
1530  * - update the nd_eoff and nd_stable for owp
1531  * - put nfsd on owp's nd_coalesce list
1532  * NB: Must be called at splsoftclock().
1533  */
1534 static void
1535 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1536 {
1537         int overlap;
1538         struct mbuf *mp1;
1539         struct nfsrv_descript *p;
1540
1541         NFS_DPF(WG, ("C%03x-%03x",
1542                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1543         LIST_REMOVE(nfsd, nd_hash);
1544         LIST_REMOVE(nfsd, nd_tq);
1545         if (owp->nd_eoff < nfsd->nd_eoff) {
1546             overlap = owp->nd_eoff - nfsd->nd_off;
1547             if (overlap < 0)
1548                 panic("nfsrv_coalesce: bad off");
1549             if (overlap > 0)
1550                 m_adj(nfsd->nd_mrep, overlap);
1551             mp1 = owp->nd_mrep;
1552             while (mp1->m_next)
1553                 mp1 = mp1->m_next;
1554             mp1->m_next = nfsd->nd_mrep;
1555             owp->nd_eoff = nfsd->nd_eoff;
1556         } else
1557             m_freem(nfsd->nd_mrep);
1558         nfsd->nd_mrep = NULL;
1559         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1560             owp->nd_stable = NFSV3WRITE_FILESYNC;
1561         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1562             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1563             owp->nd_stable = NFSV3WRITE_DATASYNC;
1564         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1565
1566         /*
1567          * If nfsd had anything else coalesced into it, transfer them
1568          * to owp, otherwise their replies will never get sent.
1569          */
1570         for (p = nfsd->nd_coalesce.lh_first; p;
1571              p = nfsd->nd_coalesce.lh_first) {
1572             LIST_REMOVE(p, nd_tq);
1573             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1574         }
1575 }
1576
1577 /*
1578  * nfs create service
1579  * now does a truncate to 0 length via. setattr if it already exists
1580  */
1581 int
1582 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1583              struct thread *td, struct mbuf **mrq)
1584 {
1585         struct sockaddr *nam = nfsd->nd_nam;
1586         struct ucred *cred = &nfsd->nd_cr;
1587         struct nfs_fattr *fp;
1588         struct vattr va, dirfor, diraft;
1589         struct vattr *vap = &va;
1590         struct nfsv2_sattr *sp;
1591         u_int32_t *tl;
1592         struct nlookupdata nd;
1593         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1594         udev_t rdev = NOUDEV;
1595         caddr_t cp;
1596         int how, exclusive_flag = 0;
1597         struct vnode *dirp;
1598         struct vnode *dvp;
1599         struct vnode *vp;
1600         struct mount *mp;
1601         nfsfh_t nfh;
1602         fhandle_t *fhp;
1603         u_quad_t tempsize;
1604         u_char cverf[NFSX_V3CREATEVERF];
1605         struct nfsm_info info;
1606
1607         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1608         nlookup_zero(&nd);
1609         dirp = NULL;
1610         dvp = NULL;
1611         vp = NULL;
1612
1613         info.mrep = nfsd->nd_mrep;
1614         info.mreq = NULL;
1615         info.md = nfsd->nd_md;
1616         info.dpos = nfsd->nd_dpos;
1617         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1618
1619         fhp = &nfh.fh_generic;
1620         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1621         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1622
1623         /*
1624          * Call namei and do initial cleanup to get a few things
1625          * out of the way.  If we get an initial error we cleanup
1626          * and return here to avoid special-casing the invalid nd
1627          * structure through the rest of the case.  dirp may be
1628          * set even if an error occurs, but the nd structure will not
1629          * be valid at all if an error occurs so we have to invalidate it
1630          * prior to calling nfsm_reply ( which might goto nfsmout ).
1631          */
1632         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1633                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1634                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1635         mp = vfs_getvfs(&fhp->fh_fsid);
1636
1637         if (dirp) {
1638                 if (info.v3) {
1639                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1640                 } else {
1641                         vrele(dirp);
1642                         dirp = NULL;
1643                 }
1644         }
1645         if (error) {
1646                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1647                                       NFSX_WCCDATA(info.v3), &error));
1648                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1649                                  diraft_ret, &diraft);
1650                 error = 0;
1651                 goto nfsmout;
1652         }
1653
1654         /*
1655          * No error.  Continue.  State:
1656          *
1657          *      dirp            may be valid
1658          *      vp              may be valid or NULL if the target does not
1659          *                      exist.
1660          *      dvp             is valid
1661          *
1662          * The error state is set through the code and we may also do some
1663          * opportunistic releasing of vnodes to avoid holding locks through
1664          * NFS I/O.  The cleanup at the end is a catch-all
1665          */
1666
1667         VATTR_NULL(vap);
1668         if (info.v3) {
1669                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1670                 how = fxdr_unsigned(int, *tl);
1671                 switch (how) {
1672                 case NFSV3CREATE_GUARDED:
1673                         if (vp) {
1674                                 error = EEXIST;
1675                                 break;
1676                         }
1677                         /* fall through */
1678                 case NFSV3CREATE_UNCHECKED:
1679                         ERROROUT(nfsm_srvsattr(&info, vap));
1680                         break;
1681                 case NFSV3CREATE_EXCLUSIVE:
1682                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1683                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1684                         exclusive_flag = 1;
1685                         break;
1686                 };
1687                 vap->va_type = VREG;
1688         } else {
1689                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1690                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1691                 if (vap->va_type == VNON)
1692                         vap->va_type = VREG;
1693                 vap->va_mode = nfstov_mode(sp->sa_mode);
1694                 switch (vap->va_type) {
1695                 case VREG:
1696                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1697                         if (tsize != -1)
1698                                 vap->va_size = (u_quad_t)tsize;
1699                         break;
1700                 case VCHR:
1701                 case VBLK:
1702                 case VFIFO:
1703                         rdev = fxdr_unsigned(long, sp->sa_size);
1704                         break;
1705                 default:
1706                         break;
1707                 };
1708         }
1709
1710         /*
1711          * Iff doesn't exist, create it
1712          * otherwise just truncate to 0 length
1713          *   should I set the mode too ?
1714          *
1715          * The only possible error we can have at this point is EEXIST. 
1716          * nd.ni_vp will also be non-NULL in that case.
1717          */
1718         if (vp == NULL) {
1719                 if (vap->va_mode == (mode_t)VNOVAL)
1720                         vap->va_mode = 0;
1721                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1722                         vn_unlock(dvp);
1723                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1724                                             nd.nl_cred, vap);
1725                         vrele(dvp);
1726                         dvp = NULL;
1727                         if (error == 0) {
1728                                 if (exclusive_flag) {
1729                                         exclusive_flag = 0;
1730                                         VATTR_NULL(vap);
1731                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1732                                                 NFSX_V3CREATEVERF);
1733                                         error = VOP_SETATTR(vp, vap, cred);
1734                                 }
1735                         }
1736                 } else if (
1737                         vap->va_type == VCHR || 
1738                         vap->va_type == VBLK ||
1739                         vap->va_type == VFIFO
1740                 ) {
1741                         /*
1742                          * Handle SysV FIFO node special cases.  All other
1743                          * devices require super user to access.
1744                          */
1745                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1746                                 vap->va_type = VFIFO;
1747                         if (vap->va_type != VFIFO &&
1748                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1749                                 goto nfsmreply0;
1750                         }
1751                         vap->va_rmajor = umajor(rdev);
1752                         vap->va_rminor = uminor(rdev);
1753
1754                         vn_unlock(dvp);
1755                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1756                         vrele(dvp);
1757                         dvp = NULL;
1758                         if (error)
1759                                 goto nfsmreply0;
1760 #if 0
1761                         /*
1762                          * XXX what is this junk supposed to do ?
1763                          */
1764
1765                         vput(vp);
1766                         vp = NULL;
1767
1768                         /*
1769                          * release dvp prior to lookup
1770                          */
1771                         vput(dvp);
1772                         dvp = NULL;
1773
1774                         /*
1775                          * Setup for lookup. 
1776                          *
1777                          * Even though LOCKPARENT was cleared, ni_dvp may
1778                          * be garbage. 
1779                          */
1780                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1781                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1782                         nd.ni_cnd.cn_td = td;
1783                         nd.ni_cnd.cn_cred = cred;
1784
1785                         error = lookup(&nd);
1786                         nd.ni_dvp = NULL;
1787
1788                         if (error != 0) {
1789                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1790                                                       0, &error));
1791                                 /* fall through on certain errors */
1792                         }
1793                         nfsrv_object_create(nd.ni_vp);
1794                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1795                                 error = EINVAL;
1796                                 goto nfsmreply0;
1797                         }
1798 #endif
1799                 } else {
1800                         error = ENXIO;
1801                 }
1802         } else {
1803                 if (vap->va_size != -1) {
1804                         error = nfsrv_access(mp, vp, VWRITE, cred,
1805                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1806                         if (!error) {
1807                                 tempsize = vap->va_size;
1808                                 VATTR_NULL(vap);
1809                                 vap->va_size = tempsize;
1810                                 error = VOP_SETATTR(vp, vap, cred);
1811                         }
1812                 }
1813         }
1814
1815         if (!error) {
1816                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1817                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1818                 if (!error)
1819                         error = VOP_GETATTR(vp, vap);
1820         }
1821         if (info.v3) {
1822                 if (exclusive_flag && !error &&
1823                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1824                         error = EEXIST;
1825                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1826                 vrele(dirp);
1827                 dirp = NULL;
1828         }
1829         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1830                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1831                               NFSX_WCCDATA(info.v3),
1832                               &error));
1833         if (info.v3) {
1834                 if (!error) {
1835                         nfsm_srvpostop_fh(&info, fhp);
1836                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1837                 }
1838                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1839                                  diraft_ret, &diraft);
1840                 error = 0;
1841         } else {
1842                 nfsm_srvfhtom(&info, fhp);
1843                 fp = nfsm_build(&info, NFSX_V2FATTR);
1844                 nfsm_srvfattr(nfsd, vap, fp);
1845         }
1846         goto nfsmout;
1847
1848 nfsmreply0:
1849         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1850         error = 0;
1851         /* fall through */
1852
1853 nfsmout:
1854         *mrq = info.mreq;
1855         if (dirp)
1856                 vrele(dirp);
1857         nlookup_done(&nd);
1858         if (dvp) {
1859                 if (dvp == vp)
1860                         vrele(dvp);
1861                 else
1862                         vput(dvp);
1863         }
1864         if (vp)
1865                 vput(vp);
1866         return (error);
1867 }
1868
1869 /*
1870  * nfs v3 mknod service
1871  */
1872 int
1873 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1874             struct thread *td, struct mbuf **mrq)
1875 {
1876         struct sockaddr *nam = nfsd->nd_nam;
1877         struct ucred *cred = &nfsd->nd_cr;
1878         struct vattr va, dirfor, diraft;
1879         struct vattr *vap = &va;
1880         u_int32_t *tl;
1881         struct nlookupdata nd;
1882         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1883         enum vtype vtyp;
1884         struct vnode *dirp;
1885         struct vnode *dvp;
1886         struct vnode *vp;
1887         nfsfh_t nfh;
1888         fhandle_t *fhp;
1889         struct nfsm_info info;
1890
1891         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1892         nlookup_zero(&nd);
1893         dirp = NULL;
1894         dvp = NULL;
1895         vp = NULL;
1896
1897         info.mrep = nfsd->nd_mrep;
1898         info.mreq = NULL;
1899         info.md = nfsd->nd_md;
1900         info.dpos = nfsd->nd_dpos;
1901
1902         fhp = &nfh.fh_generic;
1903         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1904         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1905
1906         /*
1907          * Handle nfs_namei() call.  If an error occurs, the nd structure
1908          * is not valid.  However, nfsm_*() routines may still jump to
1909          * nfsmout.
1910          */
1911
1912         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1913                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1914                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1915         if (dirp)
1916                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1917         if (error) {
1918                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1919                            NFSX_WCCDATA(1), &error));
1920                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1921                                  diraft_ret, &diraft);
1922                 error = 0;
1923                 goto nfsmout;
1924         }
1925         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1926         vtyp = nfsv3tov_type(*tl);
1927         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1928                 error = NFSERR_BADTYPE;
1929                 goto out;
1930         }
1931         VATTR_NULL(vap);
1932         ERROROUT(nfsm_srvsattr(&info, vap));
1933         if (vtyp == VCHR || vtyp == VBLK) {
1934                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1935                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1936                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1937         }
1938
1939         /*
1940          * Iff doesn't exist, create it.
1941          */
1942         if (vp) {
1943                 error = EEXIST;
1944                 goto out;
1945         }
1946         vap->va_type = vtyp;
1947         if (vap->va_mode == (mode_t)VNOVAL)
1948                 vap->va_mode = 0;
1949         if (vtyp == VSOCK) {
1950                 vn_unlock(dvp);
1951                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1952                 vrele(dvp);
1953                 dvp = NULL;
1954         } else {
1955                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1956                         goto out;
1957
1958                 vn_unlock(dvp);
1959                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1960                 vrele(dvp);
1961                 dvp = NULL;
1962                 if (error)
1963                         goto out;
1964         }
1965
1966         /*
1967          * send response, cleanup, return.
1968          */
1969 out:
1970         nlookup_done(&nd);
1971         if (dvp) {
1972                 if (dvp == vp)
1973                         vrele(dvp);
1974                 else
1975                         vput(dvp);
1976                 dvp = NULL;
1977         }
1978         if (!error) {
1979                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1980                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1981                 if (!error)
1982                         error = VOP_GETATTR(vp, vap);
1983         }
1984         if (vp) {
1985                 vput(vp);
1986                 vp = NULL;
1987         }
1988         diraft_ret = VOP_GETATTR(dirp, &diraft);
1989         if (dirp) {
1990                 vrele(dirp);
1991                 dirp = NULL;
1992         }
1993         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1994                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1995                               NFSX_WCCDATA(1), &error));
1996         if (!error) {
1997                 nfsm_srvpostop_fh(&info, fhp);
1998                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1999         }
2000         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2001                          diraft_ret, &diraft);
2002         *mrq = info.mreq;
2003         return (0);
2004 nfsmout:
2005         *mrq = info.mreq;
2006         if (dirp)
2007                 vrele(dirp);
2008         nlookup_done(&nd);
2009         if (dvp) {
2010                 if (dvp == vp)
2011                         vrele(dvp);
2012                 else
2013                         vput(dvp);
2014         }
2015         if (vp)
2016                 vput(vp);
2017         return (error);
2018 }
2019
2020 /*
2021  * nfs remove service
2022  */
2023 int
2024 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2025              struct thread *td, struct mbuf **mrq)
2026 {
2027         struct sockaddr *nam = nfsd->nd_nam;
2028         struct ucred *cred = &nfsd->nd_cr;
2029         struct nlookupdata nd;
2030         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2031         struct vnode *dirp;
2032         struct vnode *dvp;
2033         struct vnode *vp;
2034         struct vattr dirfor, diraft;
2035         nfsfh_t nfh;
2036         fhandle_t *fhp;
2037         struct nfsm_info info;
2038
2039         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2040         nlookup_zero(&nd);
2041         dirp = NULL;
2042         dvp = NULL;
2043         vp = NULL;
2044
2045         info.mrep = nfsd->nd_mrep;
2046         info.mreq = NULL;
2047         info.md = nfsd->nd_md;
2048         info.dpos = nfsd->nd_dpos;
2049         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2050
2051         fhp = &nfh.fh_generic;
2052         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2053         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2054
2055         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2056                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2057                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2058         if (dirp) {
2059                 if (info.v3)
2060                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2061         }
2062         if (error == 0) {
2063                 if (vp->v_type == VDIR) {
2064                         error = EPERM;          /* POSIX */
2065                         goto out;
2066                 }
2067                 /*
2068                  * The root of a mounted filesystem cannot be deleted.
2069                  */
2070                 if (vp->v_flag & VROOT) {
2071                         error = EBUSY;
2072                         goto out;
2073                 }
2074 out:
2075                 if (!error) {
2076                         if (dvp != vp)
2077                                 vn_unlock(dvp);
2078                         if (vp) {
2079                                 vput(vp);
2080                                 vp = NULL;
2081                         }
2082                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2083                         vrele(dvp);
2084                         dvp = NULL;
2085                 }
2086         }
2087         if (dirp && info.v3)
2088                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2089         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2090         if (info.v3) {
2091                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2092                                  diraft_ret, &diraft);
2093                 error = 0;
2094         }
2095 nfsmout:
2096         *mrq = info.mreq;
2097         nlookup_done(&nd);
2098         if (dirp)
2099                 vrele(dirp);
2100         if (dvp) {
2101                 if (dvp == vp)
2102                         vrele(dvp);
2103                 else
2104                         vput(dvp);
2105         }
2106         if (vp)
2107                 vput(vp);
2108         return(error);
2109 }
2110
2111 /*
2112  * nfs rename service
2113  */
2114 int
2115 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2116              struct thread *td, struct mbuf **mrq)
2117 {
2118         struct sockaddr *nam = nfsd->nd_nam;
2119         struct ucred *cred = &nfsd->nd_cr;
2120         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2121         int tdirfor_ret = 1, tdiraft_ret = 1;
2122         struct nlookupdata fromnd, tond;
2123         struct vnode *fvp, *fdirp, *fdvp;
2124         struct vnode *tvp, *tdirp, *tdvp;
2125         struct namecache *ncp;
2126         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2127         nfsfh_t fnfh, tnfh;
2128         fhandle_t *ffhp, *tfhp;
2129         uid_t saved_uid;
2130         struct nfsm_info info;
2131
2132         info.mrep = nfsd->nd_mrep;
2133         info.mreq = NULL;
2134         info.md = nfsd->nd_md;
2135         info.dpos = nfsd->nd_dpos;
2136         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2137
2138         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2139 #ifndef nolint
2140         fvp = NULL;
2141 #endif
2142         ffhp = &fnfh.fh_generic;
2143         tfhp = &tnfh.fh_generic;
2144
2145         /*
2146          * Clear fields incase goto nfsmout occurs from macro.
2147          */
2148
2149         nlookup_zero(&fromnd);
2150         nlookup_zero(&tond);
2151         fdirp = NULL;
2152         tdirp = NULL;
2153
2154         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2155         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2156
2157         /*
2158          * Remember our original uid so that we can reset cr_uid before
2159          * the second nfs_namei() call, in case it is remapped.
2160          */
2161         saved_uid = cred->cr_uid;
2162         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2163                           NULL, NULL,
2164                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2165                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2166         if (fdirp) {
2167                 if (info.v3)
2168                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2169         }
2170         if (error) {
2171                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2172                                       2 * NFSX_WCCDATA(info.v3), &error));
2173                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2174                                  fdiraft_ret, &fdiraft);
2175                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2176                                  tdiraft_ret, &tdiraft);
2177                 error = 0;
2178                 goto nfsmout;
2179         }
2180
2181         /*
2182          * We have to unlock the from ncp before we can safely lookup
2183          * the target ncp.
2184          */
2185         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2186         cache_unlock(&fromnd.nl_nch);
2187         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2188         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2189         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2190         cred->cr_uid = saved_uid;
2191
2192         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2193                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2194                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2195         if (tdirp) {
2196                 if (info.v3)
2197                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2198         }
2199         if (error)
2200                 goto out1;
2201
2202         /*
2203          * relock the source
2204          */
2205         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2206                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2207         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2208                 cache_lock(&fromnd.nl_nch);
2209                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210         } else {
2211                 cache_unlock(&tond.nl_nch);
2212                 cache_lock(&fromnd.nl_nch);
2213                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2214                 cache_lock(&tond.nl_nch);
2215                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2216         }
2217         fromnd.nl_flags |= NLC_NCPISLOCKED;
2218
2219         fvp = fromnd.nl_nch.ncp->nc_vp;
2220         tvp = tond.nl_nch.ncp->nc_vp;
2221
2222         /*
2223          * Set fdvp and tdvp.  We haven't done all the topology checks
2224          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2225          * point).  If we get through the checks these will be guarenteed
2226          * to be non-NULL.
2227          *
2228          * Holding the children ncp's should be sufficient to prevent
2229          * fdvp and tdvp ripouts.
2230          */
2231         if (fromnd.nl_nch.ncp->nc_parent)
2232                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2233         else
2234                 fdvp = NULL;
2235         if (tond.nl_nch.ncp->nc_parent)
2236                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2237         else
2238                 tdvp = NULL;
2239
2240         if (tvp != NULL) {
2241                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2242                         if (info.v3)
2243                                 error = EEXIST;
2244                         else
2245                                 error = EISDIR;
2246                         goto out;
2247                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2248                         if (info.v3)
2249                                 error = EEXIST;
2250                         else
2251                                 error = ENOTDIR;
2252                         goto out;
2253                 }
2254                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2255                         if (info.v3)
2256                                 error = EXDEV;
2257                         else
2258                                 error = ENOTEMPTY;
2259                         goto out;
2260                 }
2261         }
2262         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2263                 if (info.v3)
2264                         error = EXDEV;
2265                 else
2266                         error = ENOTEMPTY;
2267                 goto out;
2268         }
2269         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2270                 if (info.v3)
2271                         error = EXDEV;
2272                 else
2273                         error = ENOTEMPTY;
2274                 goto out;
2275         }
2276         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2277                 if (info.v3)
2278                         error = EINVAL;
2279                 else
2280                         error = ENOTEMPTY;
2281         }
2282
2283         /*
2284          * You cannot rename a source into itself or a subdirectory of itself.
2285          * We check this by travsering the target directory upwards looking
2286          * for a match against the source.
2287          */
2288         if (error == 0) {
2289                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2290                         if (fromnd.nl_nch.ncp == ncp) {
2291                                 error = EINVAL;
2292                                 break;
2293                         }
2294                 }
2295         }
2296
2297         /*
2298          * If source is the same as the destination (that is the
2299          * same vnode with the same name in the same directory),
2300          * then there is nothing to do.
2301          */
2302         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2303                 error = -1;
2304 out:
2305         if (!error) {
2306                 /*
2307                  * The VOP_NRENAME function releases all vnode references &
2308                  * locks prior to returning so we need to clear the pointers
2309                  * to bypass cleanup code later on.
2310                  */
2311                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2312                                     fdvp, tdvp, tond.nl_cred);
2313         } else {
2314                 if (error == -1)
2315                         error = 0;
2316         }
2317         /* fall through */
2318
2319 out1:
2320         if (fdirp)
2321                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2322         if (tdirp)
2323                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2324         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2325                               2 * NFSX_WCCDATA(info.v3), &error));
2326         if (info.v3) {
2327                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2328                                  fdiraft_ret, &fdiraft);
2329                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2330                                  tdiraft_ret, &tdiraft);
2331         }
2332         error = 0;
2333         /* fall through */
2334
2335 nfsmout:
2336         *mrq = info.mreq;
2337         if (tdirp)
2338                 vrele(tdirp);
2339         nlookup_done(&tond);
2340         if (fdirp)
2341                 vrele(fdirp);
2342         nlookup_done(&fromnd);
2343         return (error);
2344 }
2345
2346 /*
2347  * nfs link service
2348  */
2349 int
2350 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2351            struct thread *td, struct mbuf **mrq)
2352 {
2353         struct sockaddr *nam = nfsd->nd_nam;
2354         struct ucred *cred = &nfsd->nd_cr;
2355         struct nlookupdata nd;
2356         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2357         int getret = 1;
2358         struct vnode *dirp;
2359         struct vnode *dvp;
2360         struct vnode *vp;
2361         struct vnode *xp;
2362         struct mount *mp;
2363         struct mount *xmp;
2364         struct vattr dirfor, diraft, at;
2365         nfsfh_t nfh, dnfh;
2366         fhandle_t *fhp, *dfhp;
2367         struct nfsm_info info;
2368
2369         info.mrep = nfsd->nd_mrep;
2370         info.mreq = NULL;
2371         info.md = nfsd->nd_md;
2372         info.dpos = nfsd->nd_dpos;
2373         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2374
2375         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2376         nlookup_zero(&nd);
2377         dirp = dvp = vp = xp = NULL;
2378         mp = xmp = NULL;
2379
2380         fhp = &nfh.fh_generic;
2381         dfhp = &dnfh.fh_generic;
2382         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2383         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2384         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2385
2386         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2387                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2388         if (error) {
2389                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2390                                       NFSX_POSTOPATTR(info.v3) +
2391                                       NFSX_WCCDATA(info.v3),
2392                                       &error));
2393                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2394                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2395                                  diraft_ret, &diraft);
2396                 xp = NULL;
2397                 error = 0;
2398                 goto nfsmout;
2399         }
2400         if (xp->v_type == VDIR) {
2401                 error = EPERM;          /* POSIX */
2402                 goto out1;
2403         }
2404
2405         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2406                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2407                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2408         if (dirp) {
2409                 if (info.v3)
2410                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2411         }
2412         if (error)
2413                 goto out1;
2414
2415         if (vp != NULL) {
2416                 error = EEXIST;
2417                 goto out;
2418         }
2419         if (xp->v_mount != dvp->v_mount)
2420                 error = EXDEV;
2421 out:
2422         if (!error) {
2423                 vn_unlock(dvp);
2424                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2425                 vrele(dvp);
2426                 dvp = NULL;
2427         }
2428         /* fall through */
2429
2430 out1:
2431         if (info.v3)
2432                 getret = VOP_GETATTR(xp, &at);
2433         if (dirp)
2434                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2435         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2436                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2437                               &error));
2438         if (info.v3) {
2439                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2440                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2441                                  diraft_ret, &diraft);
2442                 error = 0;
2443         }
2444         /* fall through */
2445
2446 nfsmout:
2447         *mrq = info.mreq;
2448         nlookup_done(&nd);
2449         if (dirp)
2450                 vrele(dirp);
2451         if (xp)
2452                 vrele(xp);
2453         if (dvp) {
2454                 if (dvp == vp)
2455                         vrele(dvp);
2456                 else
2457                         vput(dvp);
2458         }
2459         if (vp)
2460                 vput(vp);
2461         return(error);
2462 }
2463
2464 /*
2465  * nfs symbolic link service
2466  */
2467 int
2468 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2469               struct thread *td, struct mbuf **mrq)
2470 {
2471         struct sockaddr *nam = nfsd->nd_nam;
2472         struct ucred *cred = &nfsd->nd_cr;
2473         struct vattr va, dirfor, diraft;
2474         struct nlookupdata nd;
2475         struct vattr *vap = &va;
2476         struct nfsv2_sattr *sp;
2477         char *pathcp = NULL;
2478         struct uio io;
2479         struct iovec iv;
2480         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2481         struct vnode *dirp;
2482         struct vnode *vp;
2483         struct vnode *dvp;
2484         nfsfh_t nfh;
2485         fhandle_t *fhp;
2486         struct nfsm_info info;
2487
2488         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2489         nlookup_zero(&nd);
2490         dirp = NULL;
2491         dvp = NULL;
2492         vp = NULL;
2493
2494         info.mrep = nfsd->nd_mrep;
2495         info.mreq =  NULL;
2496         info.md = nfsd->nd_md;
2497         info.dpos = nfsd->nd_dpos;
2498         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2499
2500         fhp = &nfh.fh_generic;
2501         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2502         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2503
2504         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2505                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2506                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2507         if (dirp) {
2508                 if (info.v3)
2509                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2510         }
2511         if (error)
2512                 goto out;
2513
2514         VATTR_NULL(vap);
2515         if (info.v3) {
2516                 ERROROUT(nfsm_srvsattr(&info, vap));
2517         }
2518         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2519         MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2520         iv.iov_base = pathcp;
2521         iv.iov_len = len2;
2522         io.uio_resid = len2;
2523         io.uio_offset = 0;
2524         io.uio_iov = &iv;
2525         io.uio_iovcnt = 1;
2526         io.uio_segflg = UIO_SYSSPACE;
2527         io.uio_rw = UIO_READ;
2528         io.uio_td = NULL;
2529         ERROROUT(nfsm_mtouio(&info, &io, len2));
2530         if (info.v3 == 0) {
2531                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2532                 vap->va_mode = nfstov_mode(sp->sa_mode);
2533         }
2534         *(pathcp + len2) = '\0';
2535         if (vp) {
2536                 error = EEXIST;
2537                 goto out;
2538         }
2539
2540         if (vap->va_mode == (mode_t)VNOVAL)
2541                 vap->va_mode = 0;
2542         if (dvp != vp)
2543                 vn_unlock(dvp);
2544         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2545         vrele(dvp);
2546         dvp = NULL;
2547         if (error == 0) {
2548                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2549                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2550                 if (!error)
2551                         error = VOP_GETATTR(vp, vap);
2552         }
2553
2554 out:
2555         if (dvp) {
2556                 if (dvp == vp)
2557                         vrele(dvp);
2558                 else
2559                         vput(dvp);
2560         }
2561         if (vp) {
2562                 vput(vp);
2563                 vp = NULL;
2564         }
2565         if (pathcp) {
2566                 FREE(pathcp, M_TEMP);
2567                 pathcp = NULL;
2568         }
2569         if (dirp) {
2570                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2571                 vrele(dirp);
2572                 dirp = NULL;
2573         }
2574         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2575                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2576                               NFSX_WCCDATA(info.v3),
2577                               &error));
2578         if (info.v3) {
2579                 if (!error) {
2580                         nfsm_srvpostop_fh(&info, fhp);
2581                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2582                 }
2583                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2584                                  diraft_ret, &diraft);
2585         }
2586         error = 0;
2587         /* fall through */
2588
2589 nfsmout:
2590         *mrq = info.mreq;
2591         nlookup_done(&nd);
2592         if (vp)
2593                 vput(vp);
2594         if (dirp)
2595                 vrele(dirp);
2596         if (pathcp)
2597                 FREE(pathcp, M_TEMP);
2598         return (error);
2599 }
2600
2601 /*
2602  * nfs mkdir service
2603  */
2604 int
2605 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2606             struct thread *td, struct mbuf **mrq)
2607 {
2608         struct sockaddr *nam = nfsd->nd_nam;
2609         struct ucred *cred = &nfsd->nd_cr;
2610         struct vattr va, dirfor, diraft;
2611         struct vattr *vap = &va;
2612         struct nfs_fattr *fp;
2613         struct nlookupdata nd;
2614         u_int32_t *tl;
2615         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2616         struct vnode *dirp;
2617         struct vnode *dvp;
2618         struct vnode *vp;
2619         nfsfh_t nfh;
2620         fhandle_t *fhp;
2621         struct nfsm_info info;
2622
2623         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2624         nlookup_zero(&nd);
2625         dirp = NULL;
2626         dvp = NULL;
2627         vp = NULL;
2628
2629         info.dpos = nfsd->nd_dpos;
2630         info.mrep = nfsd->nd_mrep;
2631         info.mreq =  NULL;
2632         info.md = nfsd->nd_md;
2633         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2634
2635         fhp = &nfh.fh_generic;
2636         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2637         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2638
2639         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2640                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2641                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2642         if (dirp) {
2643                 if (info.v3)
2644                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2645         }
2646         if (error) {
2647                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2648                                       NFSX_WCCDATA(info.v3), &error));
2649                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2650                                  diraft_ret, &diraft);
2651                 error = 0;
2652                 goto nfsmout;
2653         }
2654         VATTR_NULL(vap);
2655         if (info.v3) {
2656                 ERROROUT(nfsm_srvsattr(&info, vap));
2657         } else {
2658                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2659                 vap->va_mode = nfstov_mode(*tl++);
2660         }
2661
2662         /*
2663          * At this point nd.ni_dvp is referenced and exclusively locked and
2664          * nd.ni_vp, if it exists, is referenced but not locked.
2665          */
2666
2667         vap->va_type = VDIR;
2668         if (vp != NULL) {
2669                 error = EEXIST;
2670                 goto out;
2671         }
2672
2673         /*
2674          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2675          * component is freed by the VOP call.  This will fill-in
2676          * nd.ni_vp, reference, and exclusively lock it.
2677          */
2678         if (vap->va_mode == (mode_t)VNOVAL)
2679                 vap->va_mode = 0;
2680         vn_unlock(dvp);
2681         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2682         vrele(dvp);
2683         dvp = NULL;
2684
2685         if (error == 0) {
2686                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2687                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2688                 if (error == 0)
2689                         error = VOP_GETATTR(vp, vap);
2690         }
2691 out:
2692         if (dirp)
2693                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2694         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2695                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2696                               NFSX_WCCDATA(info.v3),
2697                               &error));
2698         if (info.v3) {
2699                 if (!error) {
2700                         nfsm_srvpostop_fh(&info, fhp);
2701                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2702                 }
2703                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2704                                  diraft_ret, &diraft);
2705         } else {
2706                 nfsm_srvfhtom(&info, fhp);
2707                 fp = nfsm_build(&info, NFSX_V2FATTR);
2708                 nfsm_srvfattr(nfsd, vap, fp);
2709         }
2710         error = 0;
2711         /* fall through */
2712
2713 nfsmout:
2714         *mrq = info.mreq;
2715         nlookup_done(&nd);
2716         if (dirp)
2717                 vrele(dirp);
2718         if (dvp) {
2719                 if (dvp == vp)
2720                         vrele(dvp);
2721                 else
2722                         vput(dvp);
2723         }
2724         if (vp)
2725                 vput(vp);
2726         return (error);
2727 }
2728
2729 /*
2730  * nfs rmdir service
2731  */
2732 int
2733 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2734             struct thread *td, struct mbuf **mrq)
2735 {
2736         struct sockaddr *nam = nfsd->nd_nam;
2737         struct ucred *cred = &nfsd->nd_cr;
2738         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2739         struct vnode *dirp;
2740         struct vnode *dvp;
2741         struct vnode *vp;
2742         struct vattr dirfor, diraft;
2743         nfsfh_t nfh;
2744         fhandle_t *fhp;
2745         struct nlookupdata nd;
2746         struct nfsm_info info;
2747
2748         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2749         nlookup_zero(&nd);
2750         dirp = NULL;
2751         dvp = NULL;
2752         vp = NULL;
2753
2754         info.mrep = nfsd->nd_mrep;
2755         info.mreq = NULL;
2756         info.md = nfsd->nd_md;
2757         info.dpos = nfsd->nd_dpos;
2758         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2759
2760         fhp = &nfh.fh_generic;
2761         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2762         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2763
2764         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2765                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2766                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2767         if (dirp) {
2768                 if (info.v3)
2769                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2770         }
2771         if (error) {
2772                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2773                                       NFSX_WCCDATA(info.v3), &error));
2774                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2775                                  diraft_ret, &diraft);
2776                 error = 0;
2777                 goto nfsmout;
2778         }
2779         if (vp->v_type != VDIR) {
2780                 error = ENOTDIR;
2781                 goto out;
2782         }
2783
2784         /*
2785          * The root of a mounted filesystem cannot be deleted.
2786          */
2787         if (vp->v_flag & VROOT)
2788                 error = EBUSY;
2789 out:
2790         /*
2791          * Issue or abort op.  Since SAVESTART is not set, path name
2792          * component is freed by the VOP after either.
2793          */
2794         if (!error) {
2795                 if (dvp != vp)
2796                         vn_unlock(dvp);
2797                 vput(vp);
2798                 vp = NULL;
2799                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2800                 vrele(dvp);
2801                 dvp = NULL;
2802         }
2803         nlookup_done(&nd);
2804
2805         if (dirp)
2806                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2807         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2808         if (info.v3) {
2809                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2810                                  diraft_ret, &diraft);
2811                 error = 0;
2812         }
2813         /* fall through */
2814
2815 nfsmout:
2816         *mrq = info.mreq;
2817         if (dvp) {
2818                 if (dvp == vp)
2819                         vrele(dvp);
2820                 else
2821                         vput(dvp);
2822         }
2823         nlookup_done(&nd);
2824         if (dirp)
2825                 vrele(dirp);
2826         if (vp)
2827                 vput(vp);
2828         return(error);
2829 }
2830
2831 /*
2832  * nfs readdir service
2833  * - mallocs what it thinks is enough to read
2834  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2835  * - calls VOP_READDIR()
2836  * - loops around building the reply
2837  *      if the output generated exceeds count break out of loop
2838  *      The nfsm_clget macro is used here so that the reply will be packed
2839  *      tightly in mbuf clusters.
2840  * - it only knows that it has encountered eof when the VOP_READDIR()
2841  *      reads nothing
2842  * - as such one readdir rpc will return eof false although you are there
2843  *      and then the next will return eof
2844  * - it trims out records with d_fileno == 0
2845  *      this doesn't matter for Unix clients, but they might confuse clients
2846  *      for other os'.
2847  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2848  *      than requested, but this may not apply to all filesystems. For
2849  *      example, client NFS does not { although it is never remote mounted
2850  *      anyhow }
2851  *     The alternate call nfsrv_readdirplus() does lookups as well.
2852  * PS: The NFS protocol spec. does not clarify what the "count" byte
2853  *      argument is a count of.. just name strings and file id's or the
2854  *      entire reply rpc or ...
2855  *      I tried just file name and id sizes and it confused the Sun client,
2856  *      so I am using the full rpc size now. The "paranoia.." comment refers
2857  *      to including the status longwords that are not a part of the dir.
2858  *      "entry" structures, but are in the rpc.
2859  */
2860 struct flrep {
2861         nfsuint64       fl_off;
2862         u_int32_t       fl_postopok;
2863         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2864         u_int32_t       fl_fhok;
2865         u_int32_t       fl_fhsize;
2866         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2867 };
2868
2869 int
2870 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2871               struct thread *td, struct mbuf **mrq)
2872 {
2873         struct sockaddr *nam = nfsd->nd_nam;
2874         struct ucred *cred = &nfsd->nd_cr;
2875         char *bp, *be;
2876         struct dirent *dp;
2877         caddr_t cp;
2878         u_int32_t *tl;
2879         struct mbuf *mp1, *mp2;
2880         char *cpos, *cend, *rbuf;
2881         struct vnode *vp = NULL;
2882         struct mount *mp = NULL;
2883         struct vattr at;
2884         nfsfh_t nfh;
2885         fhandle_t *fhp;
2886         struct uio io;
2887         struct iovec iv;
2888         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2889         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2890         u_quad_t off, toff, verf;
2891         off_t *cookies = NULL, *cookiep;
2892         struct nfsm_info info;
2893
2894         info.mrep = nfsd->nd_mrep;
2895         info.mreq = NULL;
2896         info.md = nfsd->nd_md;
2897         info.dpos = nfsd->nd_dpos;
2898         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2899
2900         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2901         fhp = &nfh.fh_generic;
2902         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2903         if (info.v3) {
2904                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2905                 toff = fxdr_hyper(tl);
2906                 tl += 2;
2907                 verf = fxdr_hyper(tl);
2908                 tl += 2;
2909         } else {
2910                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2911                 toff = fxdr_unsigned(u_quad_t, *tl++);
2912                 verf = 0;       /* shut up gcc */
2913         }
2914         off = toff;
2915         cnt = fxdr_unsigned(int, *tl);
2916         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2917         xfer = NFS_SRVMAXDATA(nfsd);
2918         if ((unsigned)cnt > xfer)
2919                 cnt = xfer;
2920         if ((unsigned)siz > xfer)
2921                 siz = xfer;
2922         fullsiz = siz;
2923         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2924                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2925         if (!error && vp->v_type != VDIR) {
2926                 error = ENOTDIR;
2927                 vput(vp);
2928                 vp = NULL;
2929         }
2930         if (error) {
2931                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2932                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2933                 error = 0;
2934                 goto nfsmout;
2935         }
2936
2937         /*
2938          * Obtain lock on vnode for this section of the code
2939          */
2940
2941         if (info.v3) {
2942                 error = getret = VOP_GETATTR(vp, &at);
2943 #if 0
2944                 /*
2945                  * XXX This check may be too strict for Solaris 2.5 clients.
2946                  */
2947                 if (!error && toff && verf && verf != at.va_filerev)
2948                         error = NFSERR_BAD_COOKIE;
2949 #endif
2950         }
2951         if (!error)
2952                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2953         if (error) {
2954                 vput(vp);
2955                 vp = NULL;
2956                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2957                                       NFSX_POSTOPATTR(info.v3), &error));
2958                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2959                 error = 0;
2960                 goto nfsmout;
2961         }
2962         vn_unlock(vp);
2963
2964         /*
2965          * end section.  Allocate rbuf and continue
2966          */
2967         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2968 again:
2969         iv.iov_base = rbuf;
2970         iv.iov_len = fullsiz;
2971         io.uio_iov = &iv;
2972         io.uio_iovcnt = 1;
2973         io.uio_offset = (off_t)off;
2974         io.uio_resid = fullsiz;
2975         io.uio_segflg = UIO_SYSSPACE;
2976         io.uio_rw = UIO_READ;
2977         io.uio_td = NULL;
2978         eofflag = 0;
2979         if (cookies) {
2980                 kfree((caddr_t)cookies, M_TEMP);
2981                 cookies = NULL;
2982         }
2983         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2984         off = (off_t)io.uio_offset;
2985         if (!cookies && !error)
2986                 error = NFSERR_PERM;
2987         if (info.v3) {
2988                 getret = VOP_GETATTR(vp, &at);
2989                 if (!error)
2990                         error = getret;
2991         }
2992         if (error) {
2993                 vrele(vp);
2994                 vp = NULL;
2995                 kfree((caddr_t)rbuf, M_TEMP);
2996                 if (cookies)
2997                         kfree((caddr_t)cookies, M_TEMP);
2998                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2999                                       NFSX_POSTOPATTR(info.v3), &error));
3000                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3001                 error = 0;
3002                 goto nfsmout;
3003         }
3004         if (io.uio_resid) {
3005                 siz -= io.uio_resid;
3006
3007                 /*
3008                  * If nothing read, return eof
3009                  * rpc reply
3010                  */
3011                 if (siz == 0) {
3012                         vrele(vp);
3013                         vp = NULL;
3014                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3015                                               NFSX_POSTOPATTR(info.v3) +
3016                                               NFSX_COOKIEVERF(info.v3) +
3017                                               2 * NFSX_UNSIGNED,
3018                                               &error));
3019                         if (info.v3) {
3020                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3021                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3022                                 txdr_hyper(at.va_filerev, tl);
3023                                 tl += 2;
3024                         } else
3025                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3026                         *tl++ = nfs_false;
3027                         *tl = nfs_true;
3028                         FREE((caddr_t)rbuf, M_TEMP);
3029                         FREE((caddr_t)cookies, M_TEMP);
3030                         error = 0;
3031                         goto nfsmout;
3032                 }
3033         }
3034
3035         /*
3036          * Check for degenerate cases of nothing useful read.
3037          * If so go try again
3038          */
3039         cpos = rbuf;
3040         cend = rbuf + siz;
3041         dp = (struct dirent *)cpos;
3042         cookiep = cookies;
3043         /*
3044          * For some reason FreeBSD's ufs_readdir() chooses to back the
3045          * directory offset up to a block boundary, so it is necessary to
3046          * skip over the records that preceed the requested offset. This
3047          * requires the assumption that file offset cookies monotonically
3048          * increase.
3049          */
3050         while (cpos < cend && ncookies > 0 &&
3051                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3052                  ((u_quad_t)(*cookiep)) <= toff)) {
3053                 dp = _DIRENT_NEXT(dp);
3054                 cpos = (char *)dp;
3055                 cookiep++;
3056                 ncookies--;
3057         }
3058         if (cpos >= cend || ncookies == 0) {
3059                 toff = off;
3060                 siz = fullsiz;
3061                 goto again;
3062         }
3063
3064         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3065         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3066                               NFSX_POSTOPATTR(info.v3) +
3067                               NFSX_COOKIEVERF(info.v3) + siz,
3068                               &error));
3069         if (info.v3) {
3070                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3071                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3072                 txdr_hyper(at.va_filerev, tl);
3073         }
3074         mp1 = mp2 = info.mb;
3075         bp = info.bpos;
3076         be = bp + M_TRAILINGSPACE(mp1);
3077
3078         /* Loop through the records and build reply */
3079         while (cpos < cend && ncookies > 0) {
3080                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3081                         nlen = dp->d_namlen;
3082                         rem = nfsm_rndup(nlen) - nlen;
3083                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3084                         if (info.v3)
3085                                 len += 2 * NFSX_UNSIGNED;
3086                         if (len > cnt) {
3087                                 eofflag = 0;
3088                                 break;
3089                         }
3090                         /*
3091                          * Build the directory record xdr from
3092                          * the dirent entry.
3093                          */
3094                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3095                         *tl = nfs_true;
3096                         bp += NFSX_UNSIGNED;
3097                         if (info.v3) {
3098                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3099                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3100                                 bp += NFSX_UNSIGNED;
3101                         }
3102                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3103                         *tl = txdr_unsigned(dp->d_ino);
3104                         bp += NFSX_UNSIGNED;
3105                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3106                         *tl = txdr_unsigned(nlen);
3107                         bp += NFSX_UNSIGNED;
3108
3109                         /* And loop around copying the name */
3110                         xfer = nlen;
3111                         cp = dp->d_name;
3112                         while (xfer > 0) {
3113                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3114                                 if ((bp+xfer) > be)
3115                                         tsiz = be-bp;
3116                                 else
3117                                         tsiz = xfer;
3118                                 bcopy(cp, bp, tsiz);
3119                                 bp += tsiz;
3120                                 xfer -= tsiz;
3121                                 if (xfer > 0)
3122                                         cp += tsiz;
3123                         }
3124                         /* And null pad to a int32_t boundary */
3125                         for (i = 0; i < rem; i++)
3126                                 *bp++ = '\0';
3127                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3128
3129                         /* Finish off the record */
3130                         if (info.v3) {
3131                                 *tl = txdr_unsigned(*cookiep >> 32);
3132                                 bp += NFSX_UNSIGNED;
3133                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3134                         }
3135                         *tl = txdr_unsigned(*cookiep);
3136                         bp += NFSX_UNSIGNED;
3137                 }
3138                 dp = _DIRENT_NEXT(dp);
3139                 cpos = (char *)dp;
3140                 cookiep++;
3141                 ncookies--;
3142         }
3143         vrele(vp);
3144         vp = NULL;
3145         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3146         *tl = nfs_false;
3147         bp += NFSX_UNSIGNED;
3148         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3149         if (eofflag)
3150                 *tl = nfs_true;
3151         else
3152                 *tl = nfs_false;
3153         bp += NFSX_UNSIGNED;
3154         if (mp1 != info.mb) {
3155                 if (bp < be)
3156                         mp1->m_len = bp - mtod(mp1, caddr_t);
3157         } else
3158                 mp1->m_len += bp - info.bpos;
3159         FREE((caddr_t)rbuf, M_TEMP);
3160         FREE((caddr_t)cookies, M_TEMP);
3161
3162 nfsmout:
3163         *mrq = info.mreq;
3164         if (vp)
3165                 vrele(vp);
3166         return(error);
3167 }
3168
3169 int
3170 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3171                   struct thread *td, struct mbuf **mrq)
3172 {
3173         struct sockaddr *nam = nfsd->nd_nam;
3174         struct ucred *cred = &nfsd->nd_cr;
3175         char *bp, *be;
3176         struct dirent *dp;
3177         caddr_t cp;
3178         u_int32_t *tl;
3179         struct mbuf *mp1, *mp2;
3180         char *cpos, *cend, *rbuf;
3181         struct vnode *vp = NULL, *nvp;
3182         struct mount *mp = NULL;
3183         struct flrep fl;
3184         nfsfh_t nfh;
3185         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3186         struct uio io;
3187         struct iovec iv;
3188         struct vattr va, at, *vap = &va;
3189         struct nfs_fattr *fp;
3190         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3191         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3192         u_quad_t off, toff, verf;
3193         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3194         struct nfsm_info info;
3195
3196         info.mrep = nfsd->nd_mrep;
3197         info.mreq = NULL;
3198         info.md = nfsd->nd_md;
3199         info.dpos = nfsd->nd_dpos;
3200         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3201
3202         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3203         fhp = &nfh.fh_generic;
3204         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3205         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3206         toff = fxdr_hyper(tl);
3207         tl += 2;
3208         verf = fxdr_hyper(tl);
3209         tl += 2;
3210         siz = fxdr_unsigned(int, *tl++);
3211         cnt = fxdr_unsigned(int, *tl);
3212         off = toff;
3213         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3214         xfer = NFS_SRVMAXDATA(nfsd);
3215         if ((unsigned)cnt > xfer)
3216                 cnt = xfer;
3217         if ((unsigned)siz > xfer)
3218                 siz = xfer;
3219         fullsiz = siz;
3220         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3221                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3222         if (!error && vp->v_type != VDIR) {
3223                 error = ENOTDIR;
3224                 vput(vp);
3225                 vp = NULL;
3226         }
3227         if (error) {
3228                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3229                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3230                 error = 0;
3231                 goto nfsmout;
3232         }
3233         error = getret = VOP_GETATTR(vp, &at);
3234 #if 0
3235         /*
3236          * XXX This check may be too strict for Solaris 2.5 clients.
3237          */
3238         if (!error && toff && verf && verf != at.va_filerev)
3239                 error = NFSERR_BAD_COOKIE;
3240 #endif
3241         if (!error) {
3242                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3243         }
3244         if (error) {
3245                 vput(vp);
3246                 vp = NULL;
3247                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3248                                       NFSX_V3POSTOPATTR, &error));
3249                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3250                 error = 0;
3251                 goto nfsmout;
3252         }
3253         vn_unlock(vp);
3254         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3255 again:
3256         iv.iov_base = rbuf;
3257         iv.iov_len = fullsiz;
3258         io.uio_iov = &iv;
3259         io.uio_iovcnt = 1;
3260         io.uio_offset = (off_t)off;
3261         io.uio_resid = fullsiz;
3262         io.uio_segflg = UIO_SYSSPACE;
3263         io.uio_rw = UIO_READ;
3264         io.uio_td = NULL;
3265         eofflag = 0;
3266         if (cookies) {
3267                 kfree((caddr_t)cookies, M_TEMP);
3268                 cookies = NULL;
3269         }
3270         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3271         off = (u_quad_t)io.uio_offset;
3272         getret = VOP_GETATTR(vp, &at);
3273         if (!cookies && !error)
3274                 error = NFSERR_PERM;
3275         if (!error)
3276                 error = getret;
3277         if (error) {
3278                 vrele(vp);
3279                 vp = NULL;
3280                 if (cookies)
3281                         kfree((caddr_t)cookies, M_TEMP);
3282                 kfree((caddr_t)rbuf, M_TEMP);
3283                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3284                                       NFSX_V3POSTOPATTR, &error));
3285                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3286                 error = 0;
3287                 goto nfsmout;
3288         }
3289         if (io.uio_resid) {
3290                 siz -= io.uio_resid;
3291
3292                 /*
3293                  * If nothing read, return eof
3294                  * rpc reply
3295                  */
3296                 if (siz == 0) {
3297                         vrele(vp);
3298                         vp = NULL;
3299                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3300                                               NFSX_V3POSTOPATTR +
3301                                               NFSX_V3COOKIEVERF +
3302                                               2 * NFSX_UNSIGNED,
3303                                               &error));
3304                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3305                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3306                         txdr_hyper(at.va_filerev, tl);
3307                         tl += 2;
3308                         *tl++ = nfs_false;
3309                         *tl = nfs_true;
3310                         FREE((caddr_t)cookies, M_TEMP);
3311                         FREE((caddr_t)rbuf, M_TEMP);
3312                         error = 0;
3313                         goto nfsmout;
3314                 }
3315         }
3316
3317         /*
3318          * Check for degenerate cases of nothing useful read.
3319          * If so go try again
3320          */
3321         cpos = rbuf;
3322         cend = rbuf + siz;
3323         dp = (struct dirent *)cpos;
3324         cookiep = cookies;
3325         /*
3326          * For some reason FreeBSD's ufs_readdir() chooses to back the
3327          * directory offset up to a block boundary, so it is necessary to
3328          * skip over the records that preceed the requested offset. This
3329          * requires the assumption that file offset cookies monotonically
3330          * increase.
3331          */
3332         while (cpos < cend && ncookies > 0 &&
3333                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3334                  ((u_quad_t)(*cookiep)) <= toff)) {
3335                 dp = _DIRENT_NEXT(dp);
3336                 cpos = (char *)dp;
3337                 cookiep++;
3338                 ncookies--;
3339         }
3340         if (cpos >= cend || ncookies == 0) {
3341                 toff = off;
3342                 siz = fullsiz;
3343                 goto again;
3344         }
3345
3346         /*
3347          * Probe one of the directory entries to see if the filesystem
3348          * supports VGET.
3349          */
3350         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3351                 error = NFSERR_NOTSUPP;
3352                 vrele(vp);
3353                 vp = NULL;
3354                 kfree((caddr_t)cookies, M_TEMP);
3355                 kfree((caddr_t)rbuf, M_TEMP);
3356                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3357                                       NFSX_V3POSTOPATTR, &error));
3358                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3359                 error = 0;
3360                 goto nfsmout;
3361         }
3362         if (nvp) {
3363                 vput(nvp);
3364                 nvp = NULL;
3365         }
3366             
3367         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3368                         2 * NFSX_UNSIGNED;
3369         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3370         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3371         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3372         txdr_hyper(at.va_filerev, tl);
3373         mp1 = mp2 = info.mb;
3374         bp = info.bpos;
3375         be = bp + M_TRAILINGSPACE(mp1);
3376
3377         /* Loop through the records and build reply */
3378         while (cpos < cend && ncookies > 0) {
3379                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3380                         nlen = dp->d_namlen;
3381                         rem = nfsm_rndup(nlen) - nlen;
3382
3383                         /*
3384                          * For readdir_and_lookup get the vnode using
3385                          * the file number.
3386                          */
3387                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3388                                 goto invalid;
3389                         bzero((caddr_t)nfhp, NFSX_V3FH);
3390                         nfhp->fh_fsid = fhp->fh_fsid;
3391                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3392                                 vput(nvp);
3393                                 nvp = NULL;
3394                                 goto invalid;
3395                         }
3396                         if (VOP_GETATTR(nvp, vap)) {
3397                                 vput(nvp);
3398                                 nvp = NULL;
3399                                 goto invalid;
3400                         }
3401                         vput(nvp);
3402                         nvp = NULL;
3403
3404                         /*
3405                          * If either the dircount or maxcount will be
3406                          * exceeded, get out now. Both of these lengths
3407                          * are calculated conservatively, including all
3408                          * XDR overheads.
3409                          */
3410                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3411                                 NFSX_V3POSTOPATTR);
3412                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3413                         if (len > cnt || dirlen > fullsiz) {
3414                                 eofflag = 0;
3415                                 break;
3416                         }
3417
3418                         /*
3419                          * Build the directory record xdr from
3420                          * the dirent entry.
3421                          */
3422                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3423                         nfsm_srvfattr(nfsd, vap, fp);
3424                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3425                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3426                         fl.fl_postopok = nfs_true;
3427                         fl.fl_fhok = nfs_true;
3428                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3429
3430                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3431                         *tl = nfs_true;
3432                         bp += NFSX_UNSIGNED;
3433                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3434                         *tl = txdr_unsigned(dp->d_ino >> 32);
3435                         bp += NFSX_UNSIGNED;
3436                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3437                         *tl = txdr_unsigned(dp->d_ino);
3438                         bp += NFSX_UNSIGNED;
3439                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3440                         *tl = txdr_unsigned(nlen);
3441                         bp += NFSX_UNSIGNED;
3442
3443                         /* And loop around copying the name */
3444                         xfer = nlen;
3445                         cp = dp->d_name;
3446                         while (xfer > 0) {
3447                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3448                                 if ((bp + xfer) > be)
3449                                         tsiz = be - bp;
3450                                 else
3451                                         tsiz = xfer;
3452                                 bcopy(cp, bp, tsiz);
3453                                 bp += tsiz;
3454                                 xfer -= tsiz;
3455                                 cp += tsiz;
3456                         }
3457                         /* And null pad to a int32_t boundary */
3458                         for (i = 0; i < rem; i++)
3459                                 *bp++ = '\0';
3460         
3461                         /*
3462                          * Now copy the flrep structure out.
3463                          */
3464                         xfer = sizeof (struct flrep);
3465                         cp = (caddr_t)&fl;
3466                         while (xfer > 0) {
3467                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3468                                 if ((bp + xfer) > be)
3469                                         tsiz = be - bp;
3470                                 else
3471                                         tsiz = xfer;
3472                                 bcopy(cp, bp, tsiz);
3473                                 bp += tsiz;
3474                                 xfer -= tsiz;
3475                                 cp += tsiz;
3476                         }
3477                 }
3478 invalid:
3479                 dp = _DIRENT_NEXT(dp);
3480                 cpos = (char *)dp;
3481                 cookiep++;
3482                 ncookies--;
3483         }
3484         vrele(vp);
3485         vp = NULL;
3486         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3487         *tl = nfs_false;
3488         bp += NFSX_UNSIGNED;
3489         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3490         if (eofflag)
3491                 *tl = nfs_true;
3492         else
3493                 *tl = nfs_false;
3494         bp += NFSX_UNSIGNED;
3495         if (mp1 != info.mb) {
3496                 if (bp < be)
3497                         mp1->m_len = bp - mtod(mp1, caddr_t);
3498         } else
3499                 mp1->m_len += bp - info.bpos;
3500         FREE((caddr_t)cookies, M_TEMP);
3501         FREE((caddr_t)rbuf, M_TEMP);
3502 nfsmout:
3503         *mrq = info.mreq;
3504         if (vp)
3505                 vrele(vp);
3506         return(error);
3507 }
3508
3509 /*
3510  * nfs commit service
3511  */
3512 int
3513 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3514              struct thread *td, struct mbuf **mrq)
3515 {
3516         struct sockaddr *nam = nfsd->nd_nam;
3517         struct ucred *cred = &nfsd->nd_cr;
3518         struct vattr bfor, aft;
3519         struct vnode *vp = NULL;
3520         struct mount *mp = NULL;
3521         nfsfh_t nfh;
3522         fhandle_t *fhp;
3523         u_int32_t *tl;
3524         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3525         u_quad_t off;
3526         struct nfsm_info info;
3527
3528         info.mrep = nfsd->nd_mrep;
3529         info.mreq = NULL;
3530         info.md = nfsd->nd_md;
3531         info.dpos = nfsd->nd_dpos;
3532
3533         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3534         fhp = &nfh.fh_generic;
3535         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3536         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3537
3538         /*
3539          * XXX At this time VOP_FSYNC() does not accept offset and byte
3540          * count parameters, so these arguments are useless (someday maybe).
3541          */
3542         off = fxdr_hyper(tl);
3543         tl += 2;
3544         cnt = fxdr_unsigned(int, *tl);
3545         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3546                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3547         if (error) {
3548                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3549                                       2 * NFSX_UNSIGNED, &error));
3550                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3551                                  aft_ret, &aft);
3552                 error = 0;
3553                 goto nfsmout;
3554         }
3555         for_ret = VOP_GETATTR(vp, &bfor);
3556
3557         if (cnt > MAX_COMMIT_COUNT) {
3558                 /*
3559                  * Give up and do the whole thing
3560                  */
3561                 if (vp->v_object &&
3562                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3563                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3564                 }
3565                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3566         } else {
3567                 /*
3568                  * Locate and synchronously write any buffers that fall
3569                  * into the requested range.  Note:  we are assuming that
3570                  * f_iosize is a power of 2.
3571                  */
3572                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3573                 int iomask = iosize - 1;
3574                 off_t loffset;
3575
3576                 /*
3577                  * Align to iosize boundry, super-align to page boundry.
3578                  */
3579                 if (off & iomask) {
3580                         cnt += off & iomask;
3581                         off &= ~(u_quad_t)iomask;
3582                 }
3583                 if (off & PAGE_MASK) {
3584                         cnt += off & PAGE_MASK;
3585                         off &= ~(u_quad_t)PAGE_MASK;
3586                 }
3587                 loffset = off;
3588
3589                 if (vp->v_object &&
3590                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3591                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3592                             (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3593                 }
3594
3595                 crit_enter();
3596                 while (cnt > 0) {
3597                         struct buf *bp;
3598
3599                         /*
3600                          * If we have a buffer and it is marked B_DELWRI we
3601                          * have to lock and write it.  Otherwise the prior
3602                          * write is assumed to have already been committed.
3603                          *
3604                          * WARNING: FINDBLK_TEST buffers represent stable
3605                          *          storage but not necessarily stable
3606                          *          content.  It is ok in this case.
3607                          */
3608                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3609                                 if (bp->b_flags & B_DELWRI)
3610                                         bp = findblk(vp, loffset, 0);
3611                                 else
3612                                         bp = NULL;
3613                         }
3614                         if (bp) {
3615                                 if (bp->b_flags & B_DELWRI) {
3616                                         bremfree(bp);
3617                                         bwrite(bp);
3618                                         ++nfs_commit_miss;
3619                                 } else {
3620                                         BUF_UNLOCK(bp);
3621                                 }
3622                         }
3623                         ++nfs_commit_blks;
3624                         if (cnt < iosize)
3625                                 break;
3626                         cnt -= iosize;
3627                         loffset += iosize;
3628                 }
3629                 crit_exit();
3630         }
3631
3632         aft_ret = VOP_GETATTR(vp, &aft);
3633         vput(vp);
3634         vp = NULL;
3635         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3636                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3637                               &error));
3638         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3639                          aft_ret, &aft);
3640         if (!error) {
3641                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3642                 if (nfsver.tv_sec == 0)
3643                         nfsver = boottime;
3644                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3645                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3646         } else {
3647                 error = 0;
3648         }
3649 nfsmout:
3650         *mrq = info.mreq;
3651         if (vp)
3652                 vput(vp);
3653         return(error);
3654 }
3655
3656 /*
3657  * nfs statfs service
3658  */
3659 int
3660 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3661              struct thread *td, struct mbuf **mrq)
3662 {
3663         struct sockaddr *nam = nfsd->nd_nam;
3664         struct ucred *cred = &nfsd->nd_cr;
3665         struct statfs *sf;
3666         struct nfs_statfs *sfp;
3667         int error = 0, rdonly, getret = 1;
3668         struct vnode *vp = NULL;
3669         struct mount *mp = NULL;
3670         struct vattr at;
3671         nfsfh_t nfh;
3672         fhandle_t *fhp;
3673         struct statfs statfs;
3674         u_quad_t tval;
3675         struct nfsm_info info;
3676
3677         info.mrep = nfsd->nd_mrep;
3678         info.mreq = NULL;
3679         info.md = nfsd->nd_md;
3680         info.dpos = nfsd->nd_dpos;
3681         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3682
3683         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3684         fhp = &nfh.fh_generic;
3685         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3686         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3687                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3688         if (error) {
3689                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3690                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3691                 error = 0;
3692                 goto nfsmout;
3693         }
3694         sf = &statfs;
3695         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3696         getret = VOP_GETATTR(vp, &at);
3697         vput(vp);
3698         vp = NULL;
3699         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3700                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3701                               &error));
3702         if (info.v3)
3703                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3704         if (error) {
3705                 error = 0;
3706                 goto nfsmout;
3707         }
3708         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3709         if (info.v3) {
3710                 tval = (u_quad_t)sf->f_blocks;
3711                 tval *= (u_quad_t)sf->f_bsize;
3712                 txdr_hyper(tval, &sfp->sf_tbytes);
3713                 tval = (u_quad_t)sf->f_bfree;
3714                 tval *= (u_quad_t)sf->f_bsize;
3715                 txdr_hyper(tval, &sfp->sf_fbytes);
3716                 tval = (u_quad_t)sf->f_bavail;
3717                 tval *= (u_quad_t)sf->f_bsize;
3718                 txdr_hyper(tval, &sfp->sf_abytes);
3719                 sfp->sf_tfiles.nfsuquad[0] = 0;
3720                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3721                 sfp->sf_ffiles.nfsuquad[0] = 0;
3722                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3723                 sfp->sf_afiles.nfsuquad[0] = 0;
3724                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3725                 sfp->sf_invarsec = 0;
3726         } else {
3727                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3728                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3729                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3730                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3731                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3732         }
3733 nfsmout:
3734         *mrq = info.mreq;
3735         if (vp)
3736                 vput(vp);
3737         return(error);
3738 }
3739
3740 /*
3741  * nfs fsinfo service
3742  */
3743 int
3744 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3745              struct thread *td, struct mbuf **mrq)
3746 {
3747         struct sockaddr *nam = nfsd->nd_nam;
3748         struct ucred *cred = &nfsd->nd_cr;
3749         struct nfsv3_fsinfo *sip;
3750         int error = 0, rdonly, getret = 1, pref;
3751         struct vnode *vp = NULL;
3752         struct mount *mp = NULL;
3753         struct vattr at;
3754         nfsfh_t nfh;
3755         fhandle_t *fhp;
3756         u_quad_t maxfsize;
3757         struct statfs sb;
3758         struct nfsm_info info;
3759
3760         info.mrep = nfsd->nd_mrep;
3761         info.mreq = NULL;
3762         info.md = nfsd->nd_md;
3763         info.dpos = nfsd->nd_dpos;
3764
3765         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3766         fhp = &nfh.fh_generic;
3767         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3768         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3769                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3770         if (error) {
3771                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3772                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3773                 error = 0;
3774                 goto nfsmout;
3775         }
3776
3777         /* XXX Try to make a guess on the max file size. */
3778         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3779         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3780
3781         getret = VOP_GETATTR(vp, &at);
3782         vput(vp);
3783         vp = NULL;
3784         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3785                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3786         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3787         sip = nfsm_build(&info, NFSX_V3FSINFO);
3788
3789         /*
3790          * XXX
3791          * There should be file system VFS OP(s) to get this information.
3792          * For now, assume ufs.
3793          */
3794         if (slp->ns_so->so_type == SOCK_DGRAM)
3795                 pref = NFS_MAXDGRAMDATA;
3796         else
3797                 pref = NFS_MAXDATA;
3798         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3799         sip->fs_rtpref = txdr_unsigned(pref);
3800         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3801         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3802         sip->fs_wtpref = txdr_unsigned(pref);
3803         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3804         sip->fs_dtpref = txdr_unsigned(pref);
3805         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3806         sip->fs_timedelta.nfsv3_sec = 0;
3807         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3808         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3809                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3810                 NFSV3FSINFO_CANSETTIME);
3811 nfsmout:
3812         *mrq = info.mreq;
3813         if (vp)
3814                 vput(vp);
3815         return(error);
3816 }
3817
3818 /*
3819  * nfs pathconf service
3820  */
3821 int
3822 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3823                struct thread *td, struct mbuf **mrq)
3824 {
3825         struct sockaddr *nam = nfsd->nd_nam;
3826         struct ucred *cred = &nfsd->nd_cr;
3827         struct nfsv3_pathconf *pc;
3828         int error = 0, rdonly, getret = 1;
3829         register_t linkmax, namemax, chownres, notrunc;
3830         struct vnode *vp = NULL;
3831         struct mount *mp = NULL;
3832         struct vattr at;
3833         nfsfh_t nfh;
3834         fhandle_t *fhp;
3835         struct nfsm_info info;
3836
3837         info.mrep = nfsd->nd_mrep;
3838         info.mreq = NULL;
3839         info.md = nfsd->nd_md;
3840         info.dpos = nfsd->nd_dpos;
3841
3842         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3843         fhp = &nfh.fh_generic;
3844         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3845         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3846                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3847         if (error) {
3848                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3849                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3850                 error = 0;
3851                 goto nfsmout;
3852         }
3853         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3854         if (!error)
3855                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3856         if (!error)
3857                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3858         if (!error)
3859                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3860         getret = VOP_GETATTR(vp, &at);
3861         vput(vp);
3862         vp = NULL;
3863         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3864                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3865                               &error));
3866         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3867         if (error) {
3868                 error = 0;
3869                 goto nfsmout;
3870         }
3871         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3872
3873         pc->pc_linkmax = txdr_unsigned(linkmax);
3874         pc->pc_namemax = txdr_unsigned(namemax);
3875         pc->pc_notrunc = txdr_unsigned(notrunc);
3876         pc->pc_chownrestricted = txdr_unsigned(chownres);
3877
3878         /*
3879          * These should probably be supported by VOP_PATHCONF(), but
3880          * until msdosfs is exportable (why would you want to?), the
3881          * Unix defaults should be ok.
3882          */
3883         pc->pc_caseinsensitive = nfs_false;
3884         pc->pc_casepreserving = nfs_true;
3885 nfsmout:
3886         *mrq = info.mreq;
3887         if (vp) 
3888                 vput(vp);
3889         return(error);
3890 }
3891
3892 /*
3893  * Null operation, used by clients to ping server
3894  */
3895 /* ARGSUSED */
3896 int
3897 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3898            struct thread *td, struct mbuf **mrq)
3899 {
3900         struct nfsm_info info;
3901         int error = NFSERR_RETVOID;
3902
3903         info.mrep = nfsd->nd_mrep;
3904         info.mreq = NULL;
3905
3906         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3907         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3908 nfsmout:
3909         *mrq = info.mreq;
3910         return (error);
3911 }
3912
3913 /*
3914  * No operation, used for obsolete procedures
3915  */
3916 /* ARGSUSED */
3917 int
3918 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3919            struct thread *td, struct mbuf **mrq)
3920 {
3921         struct nfsm_info info;
3922         int error;
3923
3924         info.mrep = nfsd->nd_mrep;
3925         info.mreq = NULL;
3926
3927         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3928         if (nfsd->nd_repstat)
3929                 error = nfsd->nd_repstat;
3930         else
3931                 error = EPROCUNAVAIL;
3932         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3933         error = 0;
3934 nfsmout:
3935         *mrq = info.mreq;
3936         return (error);
3937 }
3938
3939 /*
3940  * Perform access checking for vnodes obtained from file handles that would
3941  * refer to files already opened by a Unix client. You cannot just use
3942  * vn_writechk() and VOP_ACCESS() for two reasons.
3943  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3944  * 2 - The owner is to be given access irrespective of mode bits for some
3945  *     operations, so that processes that chmod after opening a file don't
3946  *     break. I don't like this because it opens a security hole, but since
3947  *     the nfs server opens a security hole the size of a barn door anyhow,
3948  *     what the heck.
3949  *
3950  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3951  * will return EPERM instead of EACCESS. EPERM is always an error.
3952  */
3953 static int
3954 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3955              int rdonly, struct thread *td, int override)
3956 {
3957         struct vattr vattr;
3958         int error;
3959
3960         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3961         if (flags & VWRITE) {
3962                 /* Just vn_writechk() changed to check rdonly */
3963                 /*
3964                  * Disallow write attempts on read-only file systems;
3965                  * unless the file is a socket or a block or character
3966                  * device resident on the file system.
3967                  */
3968                 if (rdonly || 
3969                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3970                         switch (vp->v_type) {
3971                         case VREG:
3972                         case VDIR:
3973                         case VLNK:
3974                                 return (EROFS);
3975                         default:
3976                                 break;
3977                         }
3978                 }
3979                 /*
3980                  * If there's shared text associated with
3981                  * the inode, we can't allow writing.
3982                  */
3983                 if (vp->v_flag & VTEXT)
3984                         return (ETXTBSY);
3985         }
3986         error = VOP_GETATTR(vp, &vattr);
3987         if (error)
3988                 return (error);
3989         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
3990         /*
3991          * Allow certain operations for the owner (reads and writes
3992          * on files that are already open).
3993          */
3994         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3995                 error = 0;
3996         return error;
3997 }
3998 #endif /* NFS_NOSERVER */
3999