Merge branch 'vendor/GCC44'
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *      - do not mix the phases, since the nfsm_?? macros can return failures
50  *        on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *      error number iff error != 0 whereas
54  *      returning an error from the server function implies a fatal error
55  *      such as a badly constructed rpc request that should be dropped without
56  *      a reply.
57  *      For Version 3, nfsm_reply() does not return for the error case, since
58  *      most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *      Warning: always pay careful attention to resource cleanup on return
62  *      and note that nfsm_*() macros can terminate a procedure on certain
63  *      errors.
64  */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_zone.h>
88 #include <vm/vm_object.h>
89
90 #include <sys/buf2.h>
91
92 #include <sys/thread2.h>
93
94 #include "nfsproto.h"
95 #include "rpcv2.h"
96 #include "nfs.h"
97 #include "xdr_subs.h"
98 #include "nfsm_subs.h"
99
100 #ifdef NFSRV_DEBUG
101 #define nfsdbprintf(info)       kprintf info
102 #else
103 #define nfsdbprintf(info)
104 #endif
105
106 #define MAX_COMMIT_COUNT        (1024 * 1024)
107
108 #define NUM_HEURISTIC           1017
109 #define NHUSE_INIT              64
110 #define NHUSE_INC               16
111 #define NHUSE_MAX               2048
112
113 static struct nfsheur {
114     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
115     off_t nh_nextr;             /* next offset for sequential detection */
116     int nh_use;                 /* use count for selection */
117     int nh_seqcount;            /* heuristic */
118 } nfsheur[NUM_HEURISTIC];
119
120 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
121                       NFFIFO, NFNON };
122 #ifndef NFS_NOSERVER 
123 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
124                       NFCHR, NFNON };
125
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128
129 static struct timespec  nfsver;
130
131 SYSCTL_DECL(_vfs_nfs);
132
133 int nfs_async;
134 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
139
140 static int nfsrv_access (struct mount *, struct vnode *, int,
141                         struct ucred *, int, struct thread *, int);
142 static void nfsrvw_coalesce (struct nfsrv_descript *,
143                 struct nfsrv_descript *);
144
145 /*
146  * nfs v3 access service
147  */
148 int
149 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
150               struct thread *td, struct mbuf **mrq)
151 {
152         struct sockaddr *nam = nfsd->nd_nam;
153         struct ucred *cred = &nfsd->nd_cr;
154         struct vnode *vp = NULL;
155         struct mount *mp = NULL;
156         nfsfh_t nfh;
157         fhandle_t *fhp;
158         int error = 0, rdonly, getret;
159         struct vattr vattr, *vap = &vattr;
160         u_long testmode, nfsmode;
161         struct nfsm_info info;
162         u_int32_t *tl;
163
164         info.dpos = nfsd->nd_dpos;
165         info.md = nfsd->nd_md;
166         info.mrep = nfsd->nd_mrep;
167         info.mreq = NULL;
168
169         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
170         fhp = &nfh.fh_generic;
171         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
172         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
173         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
174             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
175         if (error) {
176                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
177                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
178                 error = 0;
179                 goto nfsmout;
180         }
181         nfsmode = fxdr_unsigned(u_int32_t, *tl);
182         if ((nfsmode & NFSV3ACCESS_READ) &&
183                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
184                 nfsmode &= ~NFSV3ACCESS_READ;
185         if (vp->v_type == VDIR)
186                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
187                         NFSV3ACCESS_DELETE);
188         else
189                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
190         if ((nfsmode & testmode) &&
191                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
192                 nfsmode &= ~testmode;
193         if (vp->v_type == VDIR)
194                 testmode = NFSV3ACCESS_LOOKUP;
195         else
196                 testmode = NFSV3ACCESS_EXECUTE;
197         if ((nfsmode & testmode) &&
198                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
199                 nfsmode &= ~testmode;
200         getret = VOP_GETATTR(vp, vap);
201         vput(vp);
202         vp = NULL;
203         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
204                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
205         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
206         tl = nfsm_build(&info, NFSX_UNSIGNED);
207         *tl = txdr_unsigned(nfsmode);
208 nfsmout:
209         *mrq = info.mreq;
210         if (vp)
211                 vput(vp);
212         return(error);
213 }
214
215 /*
216  * nfs getattr service
217  */
218 int
219 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
220               struct thread *td, struct mbuf **mrq)
221 {
222         struct sockaddr *nam = nfsd->nd_nam;
223         struct ucred *cred = &nfsd->nd_cr;
224         struct nfs_fattr *fp;
225         struct vattr va;
226         struct vattr *vap = &va;
227         struct vnode *vp = NULL;
228         struct mount *mp = NULL;
229         nfsfh_t nfh;
230         fhandle_t *fhp;
231         int error = 0, rdonly;
232         struct nfsm_info info;
233
234         info.mrep = nfsd->nd_mrep;
235         info.md = nfsd->nd_md;
236         info.dpos = nfsd->nd_dpos;
237         info.mreq = NULL;
238
239         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
240         fhp = &nfh.fh_generic;
241         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
242         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
243                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
244         if (error) {
245                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
246                 error = 0;
247                 goto nfsmout;
248         }
249         error = VOP_GETATTR(vp, vap);
250         vput(vp);
251         vp = NULL;
252         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
253                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
254         if (error) {
255                 error = 0;
256                 goto nfsmout;
257         }
258         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259         nfsm_srvfattr(nfsd, vap, fp);
260         /* fall through */
261
262 nfsmout:
263         *mrq = info.mreq;
264         if (vp)
265                 vput(vp);
266         return(error);
267 }
268
269 /*
270  * nfs setattr service
271  */
272 int
273 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
274               struct thread *td, struct mbuf **mrq)
275 {
276         struct sockaddr *nam = nfsd->nd_nam;
277         struct ucred *cred = &nfsd->nd_cr;
278         struct vattr va, preat;
279         struct vattr *vap = &va;
280         struct nfsv2_sattr *sp;
281         struct nfs_fattr *fp;
282         struct vnode *vp = NULL;
283         struct mount *mp = NULL;
284         nfsfh_t nfh;
285         fhandle_t *fhp;
286         u_int32_t *tl;
287         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
288         int gcheck = 0;
289         struct timespec guard;
290         struct nfsm_info info;
291
292         info.mrep = nfsd->nd_mrep;
293         info.mreq = NULL;
294         info.md = nfsd->nd_md;
295         info.dpos = nfsd->nd_dpos;
296         info.v3 = (nfsd->nd_flag & ND_NFSV3);
297
298         guard.tv_sec = 0;       /* fix compiler warning */
299         guard.tv_nsec = 0;
300
301         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
302         fhp = &nfh.fh_generic;
303         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
304         VATTR_NULL(vap);
305         if (info.v3) {
306                 ERROROUT(nfsm_srvsattr(&info, vap));
307                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
308                 gcheck = fxdr_unsigned(int, *tl);
309                 if (gcheck) {
310                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
311                         fxdr_nfsv3time(tl, &guard);
312                 }
313         } else {
314                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
315                 /*
316                  * Nah nah nah nah na nah
317                  * There is a bug in the Sun client that puts 0xffff in the mode
318                  * field of sattr when it should put in 0xffffffff. The u_short
319                  * doesn't sign extend.
320                  * --> check the low order 2 bytes for 0xffff
321                  */
322                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
323                         vap->va_mode = nfstov_mode(sp->sa_mode);
324                 if (sp->sa_uid != nfs_xdrneg1)
325                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
326                 if (sp->sa_gid != nfs_xdrneg1)
327                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
328                 if (sp->sa_size != nfs_xdrneg1)
329                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
330                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
331 #ifdef notyet
332                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
333 #else
334                         vap->va_atime.tv_sec =
335                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
336                         vap->va_atime.tv_nsec = 0;
337 #endif
338                 }
339                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
340                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
341
342         }
343
344         /*
345          * Now that we have all the fields, lets do it.
346          */
347         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
348                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
349         if (error) {
350                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
351                                       2 * NFSX_UNSIGNED, &error));
352                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
353                                  postat_ret, vap);
354                 error = 0;
355                 goto nfsmout;
356         }
357
358         /*
359          * vp now an active resource, pay careful attention to cleanup
360          */
361
362         if (info.v3) {
363                 error = preat_ret = VOP_GETATTR(vp, &preat);
364                 if (!error && gcheck &&
365                         (preat.va_ctime.tv_sec != guard.tv_sec ||
366                          preat.va_ctime.tv_nsec != guard.tv_nsec))
367                         error = NFSERR_NOT_SYNC;
368                 if (error) {
369                         vput(vp);
370                         vp = NULL;
371                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
372                                               NFSX_WCCDATA(info.v3), &error));
373                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
374                                          postat_ret, vap);
375                         error = 0;
376                         goto nfsmout;
377                 }
378         }
379
380         /*
381          * If the size is being changed write acces is required, otherwise
382          * just check for a read only file system.
383          */
384         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
385                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
386                         error = EROFS;
387                         goto out;
388                 }
389         } else {
390                 if (vp->v_type == VDIR) {
391                         error = EISDIR;
392                         goto out;
393                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
394                             td, 0)) != 0){ 
395                         goto out;
396                 }
397         }
398         error = VOP_SETATTR(vp, vap, cred);
399         postat_ret = VOP_GETATTR(vp, vap);
400         if (!error)
401                 error = postat_ret;
402 out:
403         vput(vp);
404         vp = NULL;
405         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
406                    NFSX_WCCORFATTR(info.v3), &error));
407         if (info.v3) {
408                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
409                                  postat_ret, vap);
410                 error = 0;
411                 goto nfsmout;
412         } else {
413                 fp = nfsm_build(&info, NFSX_V2FATTR);
414                 nfsm_srvfattr(nfsd, vap, fp);
415         }
416         /* fall through */
417
418 nfsmout:
419         *mrq = info.mreq;
420         if (vp)
421                 vput(vp);
422         return(error);
423 }
424
425 /*
426  * nfs lookup rpc
427  */
428 int
429 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
430              struct thread *td, struct mbuf **mrq)
431 {
432         struct sockaddr *nam = nfsd->nd_nam;
433         struct ucred *cred = &nfsd->nd_cr;
434         struct nfs_fattr *fp;
435         struct nlookupdata nd;
436         struct vnode *vp;
437         struct vnode *dirp;
438         struct nchandle nch;
439         nfsfh_t nfh;
440         fhandle_t *fhp;
441         int error = 0, len, dirattr_ret = 1;
442         int pubflag;
443         struct vattr va, dirattr, *vap = &va;
444         struct nfsm_info info;
445
446         info.mrep = nfsd->nd_mrep;
447         info.mreq = NULL;
448         info.md = nfsd->nd_md;
449         info.dpos = nfsd->nd_dpos;
450         info.v3 = (nfsd->nd_flag & ND_NFSV3);
451
452         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
453         nlookup_zero(&nd);
454         dirp = NULL;
455         vp = NULL;
456
457         fhp = &nfh.fh_generic;
458         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
459         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
460
461         pubflag = nfs_ispublicfh(fhp);
462
463         error = nfs_namei(&nd, cred, 0, NULL, &vp,
464                 fhp, len, slp, nam, &info.md, &info.dpos,
465                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
466
467         /*
468          * namei failure, only dirp to cleanup.  Clear out garbarge from
469          * structure in case macros jump to nfsmout.
470          */
471
472         if (error) {
473                 if (dirp) {
474                         if (info.v3)
475                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
476                         vrele(dirp);
477                         dirp = NULL;
478                 }
479                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
480                                       NFSX_POSTOPATTR(info.v3), &error));
481                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
482                 error = 0;
483                 goto nfsmout;
484         }
485
486         /*
487          * Locate index file for public filehandle
488          *
489          * error is 0 on entry and 0 on exit from this block.
490          */
491
492         if (pubflag) {
493                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
494                         /*
495                          * Setup call to lookup() to see if we can find
496                          * the index file. Arguably, this doesn't belong
497                          * in a kernel.. Ugh.  If an error occurs, do not
498                          * try to install an index file and then clear the
499                          * error.
500                          *
501                          * When we replace nd with ind and redirect ndp,
502                          * maintenance of ni_startdir and ni_vp shift to
503                          * ind and we have to clean them up in the old nd.
504                          * However, the cnd resource continues to be maintained
505                          * via the original nd.  Confused?  You aren't alone!
506                          */
507                         vn_unlock(vp);
508                         cache_copy(&nd.nl_nch, &nch);
509                         nlookup_done(&nd);
510                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
511                                                 UIO_SYSSPACE, 0, cred, &nch);
512                         cache_drop(&nch);
513                         if (error == 0)
514                                 error = nlookup(&nd);
515
516                         if (error == 0) {
517                                 /*
518                                  * Found an index file. Get rid of
519                                  * the old references.  transfer vp and
520                                  * load up the new vp.  Fortunately we do
521                                  * not have to deal with dvp, that would be
522                                  * a huge mess.
523                                  */
524                                 if (dirp)       
525                                         vrele(dirp);
526                                 dirp = vp;
527                                 vp = NULL;
528                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
529                                                         LK_EXCLUSIVE, &vp);
530                                 KKASSERT(error == 0);
531                         }
532                         error = 0;
533                 }
534                 /*
535                  * If the public filehandle was used, check that this lookup
536                  * didn't result in a filehandle outside the publicly exported
537                  * filesystem.  We clear the poor vp here to avoid lockups due
538                  * to NFS I/O.
539                  */
540
541                 if (vp->v_mount != nfs_pub.np_mount) {
542                         vput(vp);
543                         vp = NULL;
544                         error = EPERM;
545                 }
546         }
547
548         if (dirp) {
549                 if (info.v3)
550                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
551                 vrele(dirp);
552                 dirp = NULL;
553         }
554
555         /*
556          * Resources at this point:
557          *      ndp->ni_vp      may not be NULL
558          *
559          */
560
561         if (error) {
562                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
563                                       NFSX_POSTOPATTR(info.v3), &error));
564                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
565                 error = 0;
566                 goto nfsmout;
567         }
568
569         /*
570          * Clear out some resources prior to potentially blocking.  This
571          * is not as critical as ni_dvp resources in other routines, but
572          * it helps.
573          */
574         nlookup_done(&nd);
575
576         /*
577          * Get underlying attribute, then release remaining resources ( for
578          * the same potential blocking reason ) and reply.
579          */
580         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
581         error = VFS_VPTOFH(vp, &fhp->fh_fid);
582         if (!error)
583                 error = VOP_GETATTR(vp, vap);
584
585         vput(vp);
586         vp = NULL;
587         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
588                               NFSX_SRVFH(info.v3) +
589                               NFSX_POSTOPORFATTR(info.v3) +
590                               NFSX_POSTOPATTR(info.v3),
591                               &error));
592         if (error) {
593                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
594                 error = 0;
595                 goto nfsmout;
596         }
597         nfsm_srvfhtom(&info, fhp);
598         if (info.v3) {
599                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
600                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
601         } else {
602                 fp = nfsm_build(&info, NFSX_V2FATTR);
603                 nfsm_srvfattr(nfsd, vap, fp);
604         }
605
606 nfsmout:
607         *mrq = info.mreq;
608         if (dirp)
609                 vrele(dirp);
610         nlookup_done(&nd);              /* may be called twice */
611         if (vp)
612                 vput(vp);
613         return (error);
614 }
615
616 /*
617  * nfs readlink service
618  */
619 int
620 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
621                struct thread *td, struct mbuf **mrq)
622 {
623         struct sockaddr *nam = nfsd->nd_nam;
624         struct ucred *cred = &nfsd->nd_cr;
625         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
626         struct iovec *ivp = iv;
627         u_int32_t *tl;
628         int error = 0, rdonly, i, tlen, len, getret;
629         struct mbuf *mp1, *mp2, *mp3;
630         struct vnode *vp = NULL;
631         struct mount *mp = NULL;
632         struct vattr attr;
633         nfsfh_t nfh;
634         fhandle_t *fhp;
635         struct uio io, *uiop = &io;
636         struct nfsm_info info;
637
638         info.mrep = nfsd->nd_mrep;
639         info.mreq = NULL;
640         info.md = nfsd->nd_md;
641         info.dpos = nfsd->nd_dpos;
642         info.v3 = (nfsd->nd_flag & ND_NFSV3);
643
644         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
645 #ifndef nolint
646         mp2 = NULL;
647 #endif
648         mp3 = NULL;
649         fhp = &nfh.fh_generic;
650         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
651         len = 0;
652         i = 0;
653         while (len < NFS_MAXPATHLEN) {
654                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
655                 mp1->m_len = MCLBYTES;
656                 if (len == 0)
657                         mp3 = mp2 = mp1;
658                 else {
659                         mp2->m_next = mp1;
660                         mp2 = mp1;
661                 }
662                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
663                         mp1->m_len = NFS_MAXPATHLEN-len;
664                         len = NFS_MAXPATHLEN;
665                 } else
666                         len += mp1->m_len;
667                 ivp->iov_base = mtod(mp1, caddr_t);
668                 ivp->iov_len = mp1->m_len;
669                 i++;
670                 ivp++;
671         }
672         uiop->uio_iov = iv;
673         uiop->uio_iovcnt = i;
674         uiop->uio_offset = 0;
675         uiop->uio_resid = len;
676         uiop->uio_rw = UIO_READ;
677         uiop->uio_segflg = UIO_SYSSPACE;
678         uiop->uio_td = NULL;
679         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
680                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
681         if (error) {
682                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
683                                       2 * NFSX_UNSIGNED, &error));
684                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
685                 error = 0;
686                 goto nfsmout;
687         }
688         if (vp->v_type != VLNK) {
689                 if (info.v3)
690                         error = EINVAL;
691                 else
692                         error = ENXIO;
693                 goto out;
694         }
695         error = VOP_READLINK(vp, uiop, cred);
696 out:
697         getret = VOP_GETATTR(vp, &attr);
698         vput(vp);
699         vp = NULL;
700         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
701                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
702                              &error));
703         if (info.v3) {
704                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
705                 if (error) {
706                         error = 0;
707                         goto nfsmout;
708                 }
709         }
710         if (uiop->uio_resid > 0) {
711                 len -= uiop->uio_resid;
712                 tlen = nfsm_rndup(len);
713                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
714         }
715         tl = nfsm_build(&info, NFSX_UNSIGNED);
716         *tl = txdr_unsigned(len);
717         info.mb->m_next = mp3;
718         mp3 = NULL;
719 nfsmout:
720         *mrq = info.mreq;
721         if (mp3)
722                 m_freem(mp3);
723         if (vp)
724                 vput(vp);
725         return(error);
726 }
727
728 /*
729  * nfs read service
730  */
731 int
732 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
733            struct thread *td, struct mbuf **mrq)
734 {
735         struct nfsm_info info;
736         struct sockaddr *nam = nfsd->nd_nam;
737         struct ucred *cred = &nfsd->nd_cr;
738         struct iovec *iv;
739         struct iovec *iv2;
740         struct mbuf *m;
741         struct nfs_fattr *fp;
742         u_int32_t *tl;
743         int i;
744         int reqlen;
745         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
746         struct mbuf *m2;
747         struct vnode *vp = NULL;
748         struct mount *mp = NULL;
749         nfsfh_t nfh;
750         fhandle_t *fhp;
751         struct uio io, *uiop = &io;
752         struct vattr va, *vap = &va;
753         struct nfsheur *nh;
754         off_t off;
755         int ioflag = 0;
756
757         info.mrep = nfsd->nd_mrep;
758         info.mreq = NULL;
759         info.md = nfsd->nd_md;
760         info.dpos = nfsd->nd_dpos;
761         info.v3 = (nfsd->nd_flag & ND_NFSV3);
762
763         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
764         fhp = &nfh.fh_generic;
765         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
766         if (info.v3) {
767                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
768                 off = fxdr_hyper(tl);
769         } else {
770                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
771                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
772         }
773         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
774                                             NFS_SRVMAXDATA(nfsd), &error));
775
776         /*
777          * Reference vp.  If an error occurs, vp will be invalid, but we
778          * have to NULL it just in case.  The macros might goto nfsmout
779          * as well.
780          */
781
782         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
783                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
784         if (error) {
785                 vp = NULL;
786                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
787                                       2 * NFSX_UNSIGNED, &error));
788                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
789                 error = 0;
790                 goto nfsmout;
791         }
792
793         if (vp->v_type != VREG) {
794                 if (info.v3)
795                         error = EINVAL;
796                 else
797                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
798         }
799         if (!error) {
800             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
801                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
802         }
803         getret = VOP_GETATTR(vp, vap);
804         if (!error)
805                 error = getret;
806         if (error) {
807                 vput(vp);
808                 vp = NULL;
809                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
810                                       NFSX_POSTOPATTR(info.v3), &error));
811                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
812                 error = 0;
813                 goto nfsmout;
814         }
815
816         /*
817          * Calculate byte count to read
818          */
819
820         if (off >= vap->va_size)
821                 cnt = 0;
822         else if ((off + reqlen) > vap->va_size)
823                 cnt = vap->va_size - off;
824         else
825                 cnt = reqlen;
826
827         /*
828          * Calculate seqcount for heuristic
829          */
830
831         {
832                 int hi;
833                 int try = 32;
834
835                 /*
836                  * Locate best candidate
837                  */
838
839                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
840                 nh = &nfsheur[hi];
841
842                 while (try--) {
843                         if (nfsheur[hi].nh_vp == vp) {
844                                 nh = &nfsheur[hi];
845                                 break;
846                         }
847                         if (nfsheur[hi].nh_use > 0)
848                                 --nfsheur[hi].nh_use;
849                         hi = (hi + 1) % NUM_HEURISTIC;
850                         if (nfsheur[hi].nh_use < nh->nh_use)
851                                 nh = &nfsheur[hi];
852                 }
853
854                 if (nh->nh_vp != vp) {
855                         nh->nh_vp = vp;
856                         nh->nh_nextr = off;
857                         nh->nh_use = NHUSE_INIT;
858                         if (off == 0)
859                                 nh->nh_seqcount = 4;
860                         else
861                                 nh->nh_seqcount = 1;
862                 }
863
864                 /*
865                  * Calculate heuristic
866                  */
867
868                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
869                         if (++nh->nh_seqcount > IO_SEQMAX)
870                                 nh->nh_seqcount = IO_SEQMAX;
871                 } else if (nh->nh_seqcount > 1) {
872                         nh->nh_seqcount = 1;
873                 } else {
874                         nh->nh_seqcount = 0;
875                 }
876                 nh->nh_use += NHUSE_INC;
877                 if (nh->nh_use > NHUSE_MAX)
878                         nh->nh_use = NHUSE_MAX;
879                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
880         }
881
882         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
883                               NFSX_POSTOPORFATTR(info.v3) +
884                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
885                               &error));
886         if (info.v3) {
887                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
888                 *tl++ = nfs_true;
889                 fp = (struct nfs_fattr *)tl;
890                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
891         } else {
892                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
893                 fp = (struct nfs_fattr *)tl;
894                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
895         }
896         len = left = nfsm_rndup(cnt);
897         if (cnt > 0) {
898                 /*
899                  * Generate the mbuf list with the uio_iov ref. to it.
900                  */
901                 i = 0;
902                 m = m2 = info.mb;
903                 while (left > 0) {
904                         siz = min(M_TRAILINGSPACE(m), left);
905                         if (siz > 0) {
906                                 left -= siz;
907                                 i++;
908                         }
909                         if (left > 0) {
910                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
911                                 m->m_len = 0;
912                                 m2->m_next = m;
913                                 m2 = m;
914                         }
915                 }
916                 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
917                        M_TEMP, M_WAITOK);
918                 uiop->uio_iov = iv2 = iv;
919                 m = info.mb;
920                 left = len;
921                 i = 0;
922                 while (left > 0) {
923                         if (m == NULL)
924                                 panic("nfsrv_read iov");
925                         siz = min(M_TRAILINGSPACE(m), left);
926                         if (siz > 0) {
927                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
928                                 iv->iov_len = siz;
929                                 m->m_len += siz;
930                                 left -= siz;
931                                 iv++;
932                                 i++;
933                         }
934                         m = m->m_next;
935                 }
936                 uiop->uio_iovcnt = i;
937                 uiop->uio_offset = off;
938                 uiop->uio_resid = len;
939                 uiop->uio_rw = UIO_READ;
940                 uiop->uio_segflg = UIO_SYSSPACE;
941                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
942                 off = uiop->uio_offset;
943                 nh->nh_nextr = off;
944                 FREE((caddr_t)iv2, M_TEMP);
945                 if (error || (getret = VOP_GETATTR(vp, vap))) {
946                         if (!error)
947                                 error = getret;
948                         m_freem(info.mreq);
949                         info.mreq = NULL;
950                         vput(vp);
951                         vp = NULL;
952                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
953                                               NFSX_POSTOPATTR(info.v3),
954                                               &error));
955                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
956                         error = 0;
957                         goto nfsmout;
958                 }
959         } else {
960                 uiop->uio_resid = 0;
961         }
962         vput(vp);
963         vp = NULL;
964         nfsm_srvfattr(nfsd, vap, fp);
965         tlen = len - uiop->uio_resid;
966         cnt = cnt < tlen ? cnt : tlen;
967         tlen = nfsm_rndup(cnt);
968         if (len != tlen || tlen != cnt)
969                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
970         if (info.v3) {
971                 *tl++ = txdr_unsigned(cnt);
972                 if (len < reqlen)
973                         *tl++ = nfs_true;
974                 else
975                         *tl++ = nfs_false;
976         }
977         *tl = txdr_unsigned(cnt);
978 nfsmout:
979         *mrq = info.mreq;
980         if (vp)
981                 vput(vp);
982         return(error);
983 }
984
985 /*
986  * nfs write service
987  */
988 int
989 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
990             struct thread *td, struct mbuf **mrq)
991 {
992         struct sockaddr *nam = nfsd->nd_nam;
993         struct ucred *cred = &nfsd->nd_cr;
994         struct iovec *ivp;
995         int i, cnt;
996         struct mbuf *mp1;
997         struct nfs_fattr *fp;
998         struct iovec *iv;
999         struct vattr va, forat;
1000         struct vattr *vap = &va;
1001         u_int32_t *tl;
1002         int error = 0, rdonly, len, forat_ret = 1;
1003         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1004         int stable = NFSV3WRITE_FILESYNC;
1005         struct vnode *vp = NULL;
1006         struct mount *mp = NULL;
1007         nfsfh_t nfh;
1008         fhandle_t *fhp;
1009         struct uio io, *uiop = &io;
1010         struct nfsm_info info;
1011         off_t off;
1012
1013         info.mrep = nfsd->nd_mrep;
1014         info.mreq = NULL;
1015         info.md = nfsd->nd_md;
1016         info.dpos = nfsd->nd_dpos;
1017         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1018
1019         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1020         if (info.mrep == NULL) {
1021                 error = 0;
1022                 goto nfsmout;
1023         }
1024         fhp = &nfh.fh_generic;
1025         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1026         if (info.v3) {
1027                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1028                 off = fxdr_hyper(tl);
1029                 tl += 3;
1030                 stable = fxdr_unsigned(int, *tl++);
1031         } else {
1032                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1033                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1034                 tl += 2;
1035                 if (nfs_async)
1036                         stable = NFSV3WRITE_UNSTABLE;
1037         }
1038         retlen = len = fxdr_unsigned(int32_t, *tl);
1039         cnt = i = 0;
1040
1041         /*
1042          * For NFS Version 2, it is not obvious what a write of zero length
1043          * should do, but I might as well be consistent with Version 3,
1044          * which is to return ok so long as there are no permission problems.
1045          */
1046         if (len > 0) {
1047             zeroing = 1;
1048             mp1 = info.mrep;
1049             while (mp1) {
1050                 if (mp1 == info.md) {
1051                         zeroing = 0;
1052                         adjust = info.dpos - mtod(mp1, caddr_t);
1053                         mp1->m_len -= adjust;
1054                         if (mp1->m_len > 0 && adjust > 0)
1055                                 mp1->m_data += adjust;
1056                 }
1057                 if (zeroing)
1058                         mp1->m_len = 0;
1059                 else if (mp1->m_len > 0) {
1060                         i += mp1->m_len;
1061                         if (i > len) {
1062                                 mp1->m_len -= (i - len);
1063                                 zeroing = 1;
1064                         }
1065                         if (mp1->m_len > 0)
1066                                 cnt++;
1067                 }
1068                 mp1 = mp1->m_next;
1069             }
1070         }
1071         if (len > NFS_MAXDATA || len < 0 || i < len) {
1072                 error = EIO;
1073                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1074                                       2 * NFSX_UNSIGNED, &error));
1075                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1076                                  aftat_ret, vap);
1077                 error = 0;
1078                 goto nfsmout;
1079         }
1080         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1081                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1082         if (error) {
1083                 vp = NULL;
1084                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1085                                       2 * NFSX_UNSIGNED, &error));
1086                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1087                                  aftat_ret, vap);
1088                 error = 0;
1089                 goto nfsmout;
1090         }
1091         if (info.v3)
1092                 forat_ret = VOP_GETATTR(vp, &forat);
1093         if (vp->v_type != VREG) {
1094                 if (info.v3)
1095                         error = EINVAL;
1096                 else
1097                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1098         }
1099         if (!error) {
1100                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1101         }
1102         if (error) {
1103                 vput(vp);
1104                 vp = NULL;
1105                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1106                                       NFSX_WCCDATA(info.v3), &error));
1107                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1108                                  aftat_ret, vap);
1109                 error = 0;
1110                 goto nfsmout;
1111         }
1112
1113         if (len > 0) {
1114             MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1115                 M_WAITOK);
1116             uiop->uio_iov = iv = ivp;
1117             uiop->uio_iovcnt = cnt;
1118             mp1 = info.mrep;
1119             while (mp1) {
1120                 if (mp1->m_len > 0) {
1121                         ivp->iov_base = mtod(mp1, caddr_t);
1122                         ivp->iov_len = mp1->m_len;
1123                         ivp++;
1124                 }
1125                 mp1 = mp1->m_next;
1126             }
1127
1128             /*
1129              * XXX
1130              * The IO_METASYNC flag indicates that all metadata (and not just
1131              * enough to ensure data integrity) mus be written to stable storage
1132              * synchronously.
1133              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1134              */
1135             if (stable == NFSV3WRITE_UNSTABLE)
1136                 ioflags = IO_NODELOCKED;
1137             else if (stable == NFSV3WRITE_DATASYNC)
1138                 ioflags = (IO_SYNC | IO_NODELOCKED);
1139             else
1140                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1141             uiop->uio_resid = len;
1142             uiop->uio_rw = UIO_WRITE;
1143             uiop->uio_segflg = UIO_SYSSPACE;
1144             uiop->uio_td = NULL;
1145             uiop->uio_offset = off;
1146             error = VOP_WRITE(vp, uiop, ioflags, cred);
1147             nfsstats.srvvop_writes++;
1148             FREE((caddr_t)iv, M_TEMP);
1149         }
1150         aftat_ret = VOP_GETATTR(vp, vap);
1151         vput(vp);
1152         vp = NULL;
1153         if (!error)
1154                 error = aftat_ret;
1155         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1156                               NFSX_PREOPATTR(info.v3) +
1157                               NFSX_POSTOPORFATTR(info.v3) +
1158                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1159                               &error));
1160         if (info.v3) {
1161                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1162                                  aftat_ret, vap);
1163                 if (error) {
1164                         error = 0;
1165                         goto nfsmout;
1166                 }
1167                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1168                 *tl++ = txdr_unsigned(retlen);
1169                 /*
1170                  * If nfs_async is set, then pretend the write was FILESYNC.
1171                  */
1172                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1173                         *tl++ = txdr_unsigned(stable);
1174                 else
1175                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1176                 /*
1177                  * Actually, there is no need to txdr these fields,
1178                  * but it may make the values more human readable,
1179                  * for debugging purposes.
1180                  */
1181                 if (nfsver.tv_sec == 0)
1182                         nfsver = boottime;
1183                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1184                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1185         } else {
1186                 fp = nfsm_build(&info, NFSX_V2FATTR);
1187                 nfsm_srvfattr(nfsd, vap, fp);
1188         }
1189 nfsmout:
1190         *mrq = info.mreq;
1191         if (vp)
1192                 vput(vp);
1193         return(error);
1194 }
1195
1196 /*
1197  * NFS write service with write gathering support. Called when
1198  * nfsrvw_procrastinate > 0.
1199  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1200  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1201  * Jan. 1994.
1202  */
1203 int
1204 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1205                   struct thread *td, struct mbuf **mrq)
1206 {
1207         struct iovec *ivp;
1208         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1209         struct nfs_fattr *fp;
1210         int i;
1211         struct iovec *iov;
1212         struct nfsrvw_delayhash *wpp;
1213         struct ucred *cred;
1214         struct vattr va, forat;
1215         u_int32_t *tl;
1216         int error = 0, rdonly, len, forat_ret = 1;
1217         int ioflags, aftat_ret = 1, adjust, zeroing;
1218         struct mbuf *mp1;
1219         struct vnode *vp = NULL;
1220         struct mount *mp = NULL;
1221         struct uio io, *uiop = &io;
1222         u_quad_t cur_usec;
1223         struct nfsm_info info;
1224
1225         info.mreq = NULL;
1226
1227         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1228 #ifndef nolint
1229         i = 0;
1230         len = 0;
1231 #endif
1232         *mrq = NULL;
1233         if (*ndp) {
1234             nfsd = *ndp;
1235             *ndp = NULL;
1236             info.mrep = nfsd->nd_mrep;
1237             info.mreq = NULL;
1238             info.md = nfsd->nd_md;
1239             info.dpos = nfsd->nd_dpos;
1240             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1241             cred = &nfsd->nd_cr;
1242             LIST_INIT(&nfsd->nd_coalesce);
1243             nfsd->nd_mreq = NULL;
1244             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1245             cur_usec = nfs_curusec();
1246             nfsd->nd_time = cur_usec +
1247                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1248     
1249             /*
1250              * Now, get the write header..
1251              */
1252             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1253             if (info.v3) {
1254                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1255                 nfsd->nd_off = fxdr_hyper(tl);
1256                 tl += 3;
1257                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1258             } else {
1259                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1260                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1261                 tl += 2;
1262                 if (nfs_async)
1263                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1264             }
1265             len = fxdr_unsigned(int32_t, *tl);
1266             nfsd->nd_len = len;
1267             nfsd->nd_eoff = nfsd->nd_off + len;
1268     
1269             /*
1270              * Trim the header out of the mbuf list and trim off any trailing
1271              * junk so that the mbuf list has only the write data.
1272              */
1273             zeroing = 1;
1274             i = 0;
1275             mp1 = info.mrep;
1276             while (mp1) {
1277                 if (mp1 == info.md) {
1278                     zeroing = 0;
1279                     adjust = info.dpos - mtod(mp1, caddr_t);
1280                     mp1->m_len -= adjust;
1281                     if (mp1->m_len > 0 && adjust > 0)
1282                         mp1->m_data += adjust;
1283                 }
1284                 if (zeroing)
1285                     mp1->m_len = 0;
1286                 else {
1287                     i += mp1->m_len;
1288                     if (i > len) {
1289                         mp1->m_len -= (i - len);
1290                         zeroing = 1;
1291                     }
1292                 }
1293                 mp1 = mp1->m_next;
1294             }
1295             if (len > NFS_MAXDATA || len < 0  || i < len) {
1296 nfsmout:
1297                 m_freem(info.mrep);
1298                 info.mrep = NULL;
1299                 error = EIO;
1300                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1301                 if (info.v3) {
1302                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1303                                      aftat_ret, &va);
1304                 }
1305                 nfsd->nd_mreq = info.mreq;
1306                 nfsd->nd_mrep = NULL;
1307                 nfsd->nd_time = 0;
1308             }
1309     
1310             /*
1311              * Add this entry to the hash and time queues.
1312              */
1313             crit_enter();
1314             owp = NULL;
1315             wp = slp->ns_tq.lh_first;
1316             while (wp && wp->nd_time < nfsd->nd_time) {
1317                 owp = wp;
1318                 wp = wp->nd_tq.le_next;
1319             }
1320             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1321             if (owp) {
1322                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1323             } else {
1324                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1325             }
1326             if (nfsd->nd_mrep) {
1327                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1328                 owp = NULL;
1329                 wp = wpp->lh_first;
1330                 while (wp &&
1331                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1332                     owp = wp;
1333                     wp = wp->nd_hash.le_next;
1334                 }
1335                 while (wp && wp->nd_off < nfsd->nd_off &&
1336                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1337                     owp = wp;
1338                     wp = wp->nd_hash.le_next;
1339                 }
1340                 if (owp) {
1341                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1342
1343                     /*
1344                      * Search the hash list for overlapping entries and
1345                      * coalesce.
1346                      */
1347                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1348                         wp = nfsd->nd_hash.le_next;
1349                         if (NFSW_SAMECRED(owp, nfsd))
1350                             nfsrvw_coalesce(owp, nfsd);
1351                     }
1352                 } else {
1353                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1354                 }
1355             }
1356             crit_exit();
1357         }
1358     
1359         /*
1360          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1361          * and generate the associated reply mbuf list(s).
1362          */
1363 loop1:
1364         cur_usec = nfs_curusec();
1365         crit_enter();
1366         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1367                 owp = nfsd->nd_tq.le_next;
1368                 if (nfsd->nd_time > cur_usec)
1369                     break;
1370                 if (nfsd->nd_mreq)
1371                     continue;
1372                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1373                 LIST_REMOVE(nfsd, nd_tq);
1374                 LIST_REMOVE(nfsd, nd_hash);
1375                 crit_exit();
1376                 info.mrep = nfsd->nd_mrep;
1377                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1378                 nfsd->nd_mrep = NULL;
1379                 cred = &nfsd->nd_cr;
1380                 forat_ret = aftat_ret = 1;
1381                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1382                     nfsd->nd_nam, &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1383                 if (!error) {
1384                     if (info.v3)
1385                         forat_ret = VOP_GETATTR(vp, &forat);
1386                     if (vp->v_type != VREG) {
1387                         if (info.v3)
1388                             error = EINVAL;
1389                         else
1390                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1391                     }
1392                 } else {
1393                     vp = NULL;
1394                 }
1395                 if (!error) {
1396                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1397                 }
1398     
1399                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1400                     ioflags = IO_NODELOCKED;
1401                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1402                     ioflags = (IO_SYNC | IO_NODELOCKED);
1403                 else
1404                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1405                 uiop->uio_rw = UIO_WRITE;
1406                 uiop->uio_segflg = UIO_SYSSPACE;
1407                 uiop->uio_td = NULL;
1408                 uiop->uio_offset = nfsd->nd_off;
1409                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1410                 if (uiop->uio_resid > 0) {
1411                     mp1 = info.mrep;
1412                     i = 0;
1413                     while (mp1) {
1414                         if (mp1->m_len > 0)
1415                             i++;
1416                         mp1 = mp1->m_next;
1417                     }
1418                     uiop->uio_iovcnt = i;
1419                     MALLOC(iov, struct iovec *, i * sizeof (struct iovec), 
1420                         M_TEMP, M_WAITOK);
1421                     uiop->uio_iov = ivp = iov;
1422                     mp1 = info.mrep;
1423                     while (mp1) {
1424                         if (mp1->m_len > 0) {
1425                             ivp->iov_base = mtod(mp1, caddr_t);
1426                             ivp->iov_len = mp1->m_len;
1427                             ivp++;
1428                         }
1429                         mp1 = mp1->m_next;
1430                     }
1431                     if (!error) {
1432                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1433                         nfsstats.srvvop_writes++;
1434                     }
1435                     FREE((caddr_t)iov, M_TEMP);
1436                 }
1437                 m_freem(info.mrep);
1438                 info.mrep = NULL;
1439                 if (vp) {
1440                     aftat_ret = VOP_GETATTR(vp, &va);
1441                     vput(vp);
1442                     vp = NULL;
1443                 }
1444
1445                 /*
1446                  * Loop around generating replies for all write rpcs that have
1447                  * now been completed.
1448                  */
1449                 swp = nfsd;
1450                 do {
1451                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1452                     if (error) {
1453                         nfsm_writereply(&info, nfsd, slp, error,
1454                                         NFSX_WCCDATA(info.v3));
1455                         if (info.v3) {
1456                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1457                                              aftat_ret, &va);
1458                         }
1459                     } else {
1460                         nfsm_writereply(&info, nfsd, slp, error,
1461                                         NFSX_PREOPATTR(info.v3) +
1462                                         NFSX_POSTOPORFATTR(info.v3) +
1463                                         2 * NFSX_UNSIGNED +
1464                                         NFSX_WRITEVERF(info.v3));
1465                         if (info.v3) {
1466                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1467                                              aftat_ret, &va);
1468                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1469                             *tl++ = txdr_unsigned(nfsd->nd_len);
1470                             *tl++ = txdr_unsigned(swp->nd_stable);
1471                             /*
1472                              * Actually, there is no need to txdr these fields,
1473                              * but it may make the values more human readable,
1474                              * for debugging purposes.
1475                              */
1476                             if (nfsver.tv_sec == 0)
1477                                     nfsver = boottime;
1478                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1479                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1480                         } else {
1481                             fp = nfsm_build(&info, NFSX_V2FATTR);
1482                             nfsm_srvfattr(nfsd, &va, fp);
1483                         }
1484                     }
1485                     nfsd->nd_mreq = info.mreq;
1486                     if (nfsd->nd_mrep)
1487                         panic("nfsrv_write: nd_mrep not free");
1488
1489                     /*
1490                      * Done. Put it at the head of the timer queue so that
1491                      * the final phase can return the reply.
1492                      */
1493                     crit_enter();
1494                     if (nfsd != swp) {
1495                         nfsd->nd_time = 0;
1496                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1497                     }
1498                     nfsd = swp->nd_coalesce.lh_first;
1499                     if (nfsd) {
1500                         LIST_REMOVE(nfsd, nd_tq);
1501                     }
1502                     crit_exit();
1503                 } while (nfsd);
1504                 crit_enter();
1505                 swp->nd_time = 0;
1506                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1507                 crit_exit();
1508                 goto loop1;
1509         }
1510         crit_exit();
1511
1512         /*
1513          * Search for a reply to return.
1514          */
1515         crit_enter();
1516         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next)
1517                 if (nfsd->nd_mreq) {
1518                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1519                     LIST_REMOVE(nfsd, nd_tq);
1520                     *mrq = nfsd->nd_mreq;
1521                     *ndp = nfsd;
1522                     break;
1523                 }
1524         crit_exit();
1525         *mrq = info.mreq;
1526         return (0);
1527 }
1528
1529 /*
1530  * Coalesce the write request nfsd into owp. To do this we must:
1531  * - remove nfsd from the queues
1532  * - merge nfsd->nd_mrep into owp->nd_mrep
1533  * - update the nd_eoff and nd_stable for owp
1534  * - put nfsd on owp's nd_coalesce list
1535  * NB: Must be called at splsoftclock().
1536  */
1537 static void
1538 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1539 {
1540         int overlap;
1541         struct mbuf *mp1;
1542         struct nfsrv_descript *p;
1543
1544         NFS_DPF(WG, ("C%03x-%03x",
1545                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1546         LIST_REMOVE(nfsd, nd_hash);
1547         LIST_REMOVE(nfsd, nd_tq);
1548         if (owp->nd_eoff < nfsd->nd_eoff) {
1549             overlap = owp->nd_eoff - nfsd->nd_off;
1550             if (overlap < 0)
1551                 panic("nfsrv_coalesce: bad off");
1552             if (overlap > 0)
1553                 m_adj(nfsd->nd_mrep, overlap);
1554             mp1 = owp->nd_mrep;
1555             while (mp1->m_next)
1556                 mp1 = mp1->m_next;
1557             mp1->m_next = nfsd->nd_mrep;
1558             owp->nd_eoff = nfsd->nd_eoff;
1559         } else
1560             m_freem(nfsd->nd_mrep);
1561         nfsd->nd_mrep = NULL;
1562         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1563             owp->nd_stable = NFSV3WRITE_FILESYNC;
1564         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1565             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1566             owp->nd_stable = NFSV3WRITE_DATASYNC;
1567         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1568
1569         /*
1570          * If nfsd had anything else coalesced into it, transfer them
1571          * to owp, otherwise their replies will never get sent.
1572          */
1573         for (p = nfsd->nd_coalesce.lh_first; p;
1574              p = nfsd->nd_coalesce.lh_first) {
1575             LIST_REMOVE(p, nd_tq);
1576             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1577         }
1578 }
1579
1580 /*
1581  * nfs create service
1582  * now does a truncate to 0 length via. setattr if it already exists
1583  */
1584 int
1585 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1586              struct thread *td, struct mbuf **mrq)
1587 {
1588         struct sockaddr *nam = nfsd->nd_nam;
1589         struct ucred *cred = &nfsd->nd_cr;
1590         struct nfs_fattr *fp;
1591         struct vattr va, dirfor, diraft;
1592         struct vattr *vap = &va;
1593         struct nfsv2_sattr *sp;
1594         u_int32_t *tl;
1595         struct nlookupdata nd;
1596         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1597         udev_t rdev = NOUDEV;
1598         caddr_t cp;
1599         int how, exclusive_flag = 0;
1600         struct vnode *dirp;
1601         struct vnode *dvp;
1602         struct vnode *vp;
1603         struct mount *mp;
1604         nfsfh_t nfh;
1605         fhandle_t *fhp;
1606         u_quad_t tempsize;
1607         u_char cverf[NFSX_V3CREATEVERF];
1608         struct nfsm_info info;
1609
1610         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1611         nlookup_zero(&nd);
1612         dirp = NULL;
1613         dvp = NULL;
1614         vp = NULL;
1615
1616         info.mrep = nfsd->nd_mrep;
1617         info.mreq = NULL;
1618         info.md = nfsd->nd_md;
1619         info.dpos = nfsd->nd_dpos;
1620         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1621
1622         fhp = &nfh.fh_generic;
1623         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1624         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1625
1626         /*
1627          * Call namei and do initial cleanup to get a few things
1628          * out of the way.  If we get an initial error we cleanup
1629          * and return here to avoid special-casing the invalid nd
1630          * structure through the rest of the case.  dirp may be
1631          * set even if an error occurs, but the nd structure will not
1632          * be valid at all if an error occurs so we have to invalidate it
1633          * prior to calling nfsm_reply ( which might goto nfsmout ).
1634          */
1635         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1636                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1637                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1638         mp = vfs_getvfs(&fhp->fh_fsid);
1639
1640         if (dirp) {
1641                 if (info.v3) {
1642                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1643                 } else {
1644                         vrele(dirp);
1645                         dirp = NULL;
1646                 }
1647         }
1648         if (error) {
1649                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1650                                       NFSX_WCCDATA(info.v3), &error));
1651                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1652                                  diraft_ret, &diraft);
1653                 error = 0;
1654                 goto nfsmout;
1655         }
1656
1657         /*
1658          * No error.  Continue.  State:
1659          *
1660          *      dirp            may be valid
1661          *      vp              may be valid or NULL if the target does not
1662          *                      exist.
1663          *      dvp             is valid
1664          *
1665          * The error state is set through the code and we may also do some
1666          * opportunistic releasing of vnodes to avoid holding locks through
1667          * NFS I/O.  The cleanup at the end is a catch-all
1668          */
1669
1670         VATTR_NULL(vap);
1671         if (info.v3) {
1672                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1673                 how = fxdr_unsigned(int, *tl);
1674                 switch (how) {
1675                 case NFSV3CREATE_GUARDED:
1676                         if (vp) {
1677                                 error = EEXIST;
1678                                 break;
1679                         }
1680                         /* fall through */
1681                 case NFSV3CREATE_UNCHECKED:
1682                         ERROROUT(nfsm_srvsattr(&info, vap));
1683                         break;
1684                 case NFSV3CREATE_EXCLUSIVE:
1685                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1686                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1687                         exclusive_flag = 1;
1688                         break;
1689                 };
1690                 vap->va_type = VREG;
1691         } else {
1692                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1693                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1694                 if (vap->va_type == VNON)
1695                         vap->va_type = VREG;
1696                 vap->va_mode = nfstov_mode(sp->sa_mode);
1697                 switch (vap->va_type) {
1698                 case VREG:
1699                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1700                         if (tsize != -1)
1701                                 vap->va_size = (u_quad_t)tsize;
1702                         break;
1703                 case VCHR:
1704                 case VBLK:
1705                 case VFIFO:
1706                         rdev = fxdr_unsigned(long, sp->sa_size);
1707                         break;
1708                 default:
1709                         break;
1710                 };
1711         }
1712
1713         /*
1714          * Iff doesn't exist, create it
1715          * otherwise just truncate to 0 length
1716          *   should I set the mode too ?
1717          *
1718          * The only possible error we can have at this point is EEXIST. 
1719          * nd.ni_vp will also be non-NULL in that case.
1720          */
1721         if (vp == NULL) {
1722                 if (vap->va_mode == (mode_t)VNOVAL)
1723                         vap->va_mode = 0;
1724                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1725                         vn_unlock(dvp);
1726                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1727                                             nd.nl_cred, vap);
1728                         vrele(dvp);
1729                         dvp = NULL;
1730                         if (error == 0) {
1731                                 if (exclusive_flag) {
1732                                         exclusive_flag = 0;
1733                                         VATTR_NULL(vap);
1734                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1735                                                 NFSX_V3CREATEVERF);
1736                                         error = VOP_SETATTR(vp, vap, cred);
1737                                 }
1738                         }
1739                 } else if (
1740                         vap->va_type == VCHR || 
1741                         vap->va_type == VBLK ||
1742                         vap->va_type == VFIFO
1743                 ) {
1744                         /*
1745                          * Handle SysV FIFO node special cases.  All other
1746                          * devices require super user to access.
1747                          */
1748                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1749                                 vap->va_type = VFIFO;
1750                         if (vap->va_type != VFIFO &&
1751                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1752                                 goto nfsmreply0;
1753                         }
1754                         vap->va_rmajor = umajor(rdev);
1755                         vap->va_rminor = uminor(rdev);
1756
1757                         vn_unlock(dvp);
1758                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1759                         vrele(dvp);
1760                         dvp = NULL;
1761                         if (error)
1762                                 goto nfsmreply0;
1763 #if 0
1764                         /*
1765                          * XXX what is this junk supposed to do ?
1766                          */
1767
1768                         vput(vp);
1769                         vp = NULL;
1770
1771                         /*
1772                          * release dvp prior to lookup
1773                          */
1774                         vput(dvp);
1775                         dvp = NULL;
1776
1777                         /*
1778                          * Setup for lookup. 
1779                          *
1780                          * Even though LOCKPARENT was cleared, ni_dvp may
1781                          * be garbage. 
1782                          */
1783                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1784                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1785                         nd.ni_cnd.cn_td = td;
1786                         nd.ni_cnd.cn_cred = cred;
1787
1788                         error = lookup(&nd);
1789                         nd.ni_dvp = NULL;
1790
1791                         if (error != 0) {
1792                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1793                                                       0, &error));
1794                                 /* fall through on certain errors */
1795                         }
1796                         nfsrv_object_create(nd.ni_vp);
1797                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1798                                 error = EINVAL;
1799                                 goto nfsmreply0;
1800                         }
1801 #endif
1802                 } else {
1803                         error = ENXIO;
1804                 }
1805         } else {
1806                 if (vap->va_size != -1) {
1807                         error = nfsrv_access(mp, vp, VWRITE, cred,
1808                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1809                         if (!error) {
1810                                 tempsize = vap->va_size;
1811                                 VATTR_NULL(vap);
1812                                 vap->va_size = tempsize;
1813                                 error = VOP_SETATTR(vp, vap, cred);
1814                         }
1815                 }
1816         }
1817
1818         if (!error) {
1819                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1820                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1821                 if (!error)
1822                         error = VOP_GETATTR(vp, vap);
1823         }
1824         if (info.v3) {
1825                 if (exclusive_flag && !error &&
1826                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1827                         error = EEXIST;
1828                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1829                 vrele(dirp);
1830                 dirp = NULL;
1831         }
1832         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1833                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1834                               NFSX_WCCDATA(info.v3),
1835                               &error));
1836         if (info.v3) {
1837                 if (!error) {
1838                         nfsm_srvpostop_fh(&info, fhp);
1839                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1840                 }
1841                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1842                                  diraft_ret, &diraft);
1843                 error = 0;
1844         } else {
1845                 nfsm_srvfhtom(&info, fhp);
1846                 fp = nfsm_build(&info, NFSX_V2FATTR);
1847                 nfsm_srvfattr(nfsd, vap, fp);
1848         }
1849         goto nfsmout;
1850
1851 nfsmreply0:
1852         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1853         error = 0;
1854         /* fall through */
1855
1856 nfsmout:
1857         *mrq = info.mreq;
1858         if (dirp)
1859                 vrele(dirp);
1860         nlookup_done(&nd);
1861         if (dvp) {
1862                 if (dvp == vp)
1863                         vrele(dvp);
1864                 else
1865                         vput(dvp);
1866         }
1867         if (vp)
1868                 vput(vp);
1869         return (error);
1870 }
1871
1872 /*
1873  * nfs v3 mknod service
1874  */
1875 int
1876 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1877             struct thread *td, struct mbuf **mrq)
1878 {
1879         struct sockaddr *nam = nfsd->nd_nam;
1880         struct ucred *cred = &nfsd->nd_cr;
1881         struct vattr va, dirfor, diraft;
1882         struct vattr *vap = &va;
1883         u_int32_t *tl;
1884         struct nlookupdata nd;
1885         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1886         enum vtype vtyp;
1887         struct vnode *dirp;
1888         struct vnode *dvp;
1889         struct vnode *vp;
1890         nfsfh_t nfh;
1891         fhandle_t *fhp;
1892         struct nfsm_info info;
1893
1894         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1895         nlookup_zero(&nd);
1896         dirp = NULL;
1897         dvp = NULL;
1898         vp = NULL;
1899
1900         info.mrep = nfsd->nd_mrep;
1901         info.mreq = NULL;
1902         info.md = nfsd->nd_md;
1903         info.dpos = nfsd->nd_dpos;
1904
1905         fhp = &nfh.fh_generic;
1906         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1907         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1908
1909         /*
1910          * Handle nfs_namei() call.  If an error occurs, the nd structure
1911          * is not valid.  However, nfsm_*() routines may still jump to
1912          * nfsmout.
1913          */
1914
1915         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1916                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1917                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1918         if (dirp)
1919                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1920         if (error) {
1921                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1922                            NFSX_WCCDATA(1), &error));
1923                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1924                                  diraft_ret, &diraft);
1925                 error = 0;
1926                 goto nfsmout;
1927         }
1928         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1929         vtyp = nfsv3tov_type(*tl);
1930         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1931                 error = NFSERR_BADTYPE;
1932                 goto out;
1933         }
1934         VATTR_NULL(vap);
1935         ERROROUT(nfsm_srvsattr(&info, vap));
1936         if (vtyp == VCHR || vtyp == VBLK) {
1937                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1938                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1939                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1940         }
1941
1942         /*
1943          * Iff doesn't exist, create it.
1944          */
1945         if (vp) {
1946                 error = EEXIST;
1947                 goto out;
1948         }
1949         vap->va_type = vtyp;
1950         if (vap->va_mode == (mode_t)VNOVAL)
1951                 vap->va_mode = 0;
1952         if (vtyp == VSOCK) {
1953                 vn_unlock(dvp);
1954                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1955                 vrele(dvp);
1956                 dvp = NULL;
1957         } else {
1958                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1959                         goto out;
1960
1961                 vn_unlock(dvp);
1962                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1963                 vrele(dvp);
1964                 dvp = NULL;
1965                 if (error)
1966                         goto out;
1967         }
1968
1969         /*
1970          * send response, cleanup, return.
1971          */
1972 out:
1973         nlookup_done(&nd);
1974         if (dvp) {
1975                 if (dvp == vp)
1976                         vrele(dvp);
1977                 else
1978                         vput(dvp);
1979                 dvp = NULL;
1980         }
1981         if (!error) {
1982                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1983                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1984                 if (!error)
1985                         error = VOP_GETATTR(vp, vap);
1986         }
1987         if (vp) {
1988                 vput(vp);
1989                 vp = NULL;
1990         }
1991         diraft_ret = VOP_GETATTR(dirp, &diraft);
1992         if (dirp) {
1993                 vrele(dirp);
1994                 dirp = NULL;
1995         }
1996         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1997                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1998                               NFSX_WCCDATA(1), &error));
1999         if (!error) {
2000                 nfsm_srvpostop_fh(&info, fhp);
2001                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2002         }
2003         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2004                          diraft_ret, &diraft);
2005         *mrq = info.mreq;
2006         return (0);
2007 nfsmout:
2008         *mrq = info.mreq;
2009         if (dirp)
2010                 vrele(dirp);
2011         nlookup_done(&nd);
2012         if (dvp) {
2013                 if (dvp == vp)
2014                         vrele(dvp);
2015                 else
2016                         vput(dvp);
2017         }
2018         if (vp)
2019                 vput(vp);
2020         return (error);
2021 }
2022
2023 /*
2024  * nfs remove service
2025  */
2026 int
2027 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2028              struct thread *td, struct mbuf **mrq)
2029 {
2030         struct sockaddr *nam = nfsd->nd_nam;
2031         struct ucred *cred = &nfsd->nd_cr;
2032         struct nlookupdata nd;
2033         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2034         struct vnode *dirp;
2035         struct vnode *dvp;
2036         struct vnode *vp;
2037         struct vattr dirfor, diraft;
2038         nfsfh_t nfh;
2039         fhandle_t *fhp;
2040         struct nfsm_info info;
2041
2042         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2043         nlookup_zero(&nd);
2044         dirp = NULL;
2045         dvp = NULL;
2046         vp = NULL;
2047
2048         info.mrep = nfsd->nd_mrep;
2049         info.mreq = NULL;
2050         info.md = nfsd->nd_md;
2051         info.dpos = nfsd->nd_dpos;
2052         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2053
2054         fhp = &nfh.fh_generic;
2055         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2056         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2057
2058         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2059                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2060                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2061         if (dirp) {
2062                 if (info.v3)
2063                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2064         }
2065         if (error == 0) {
2066                 if (vp->v_type == VDIR) {
2067                         error = EPERM;          /* POSIX */
2068                         goto out;
2069                 }
2070                 /*
2071                  * The root of a mounted filesystem cannot be deleted.
2072                  */
2073                 if (vp->v_flag & VROOT) {
2074                         error = EBUSY;
2075                         goto out;
2076                 }
2077 out:
2078                 if (!error) {
2079                         if (dvp != vp)
2080                                 vn_unlock(dvp);
2081                         if (vp) {
2082                                 vput(vp);
2083                                 vp = NULL;
2084                         }
2085                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2086                         vrele(dvp);
2087                         dvp = NULL;
2088                 }
2089         }
2090         if (dirp && info.v3)
2091                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2092         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2093         if (info.v3) {
2094                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2095                                  diraft_ret, &diraft);
2096                 error = 0;
2097         }
2098 nfsmout:
2099         *mrq = info.mreq;
2100         nlookup_done(&nd);
2101         if (dirp)
2102                 vrele(dirp);
2103         if (dvp) {
2104                 if (dvp == vp)
2105                         vrele(dvp);
2106                 else
2107                         vput(dvp);
2108         }
2109         if (vp)
2110                 vput(vp);
2111         return(error);
2112 }
2113
2114 /*
2115  * nfs rename service
2116  */
2117 int
2118 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2119              struct thread *td, struct mbuf **mrq)
2120 {
2121         struct sockaddr *nam = nfsd->nd_nam;
2122         struct ucred *cred = &nfsd->nd_cr;
2123         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2124         int tdirfor_ret = 1, tdiraft_ret = 1;
2125         struct nlookupdata fromnd, tond;
2126         struct vnode *fvp, *fdirp, *fdvp;
2127         struct vnode *tvp, *tdirp, *tdvp;
2128         struct namecache *ncp;
2129         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2130         nfsfh_t fnfh, tnfh;
2131         fhandle_t *ffhp, *tfhp;
2132         uid_t saved_uid;
2133         struct nfsm_info info;
2134
2135         info.mrep = nfsd->nd_mrep;
2136         info.mreq = NULL;
2137         info.md = nfsd->nd_md;
2138         info.dpos = nfsd->nd_dpos;
2139         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2140
2141         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2142 #ifndef nolint
2143         fvp = NULL;
2144 #endif
2145         ffhp = &fnfh.fh_generic;
2146         tfhp = &tnfh.fh_generic;
2147
2148         /*
2149          * Clear fields incase goto nfsmout occurs from macro.
2150          */
2151
2152         nlookup_zero(&fromnd);
2153         nlookup_zero(&tond);
2154         fdirp = NULL;
2155         tdirp = NULL;
2156
2157         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2158         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2159
2160         /*
2161          * Remember our original uid so that we can reset cr_uid before
2162          * the second nfs_namei() call, in case it is remapped.
2163          */
2164         saved_uid = cred->cr_uid;
2165         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2166                           NULL, NULL,
2167                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2168                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2169         if (fdirp) {
2170                 if (info.v3)
2171                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2172         }
2173         if (error) {
2174                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2175                                       2 * NFSX_WCCDATA(info.v3), &error));
2176                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2177                                  fdiraft_ret, &fdiraft);
2178                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2179                                  tdiraft_ret, &tdiraft);
2180                 error = 0;
2181                 goto nfsmout;
2182         }
2183
2184         /*
2185          * We have to unlock the from ncp before we can safely lookup
2186          * the target ncp.
2187          */
2188         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2189         cache_unlock(&fromnd.nl_nch);
2190         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2191         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2192         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2193         cred->cr_uid = saved_uid;
2194
2195         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2196                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2197                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2198         if (tdirp) {
2199                 if (info.v3)
2200                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2201         }
2202         if (error)
2203                 goto out1;
2204
2205         /*
2206          * relock the source
2207          */
2208         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2209                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2211                 cache_lock(&fromnd.nl_nch);
2212                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2213         } else {
2214                 cache_unlock(&tond.nl_nch);
2215                 cache_lock(&fromnd.nl_nch);
2216                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2217                 cache_lock(&tond.nl_nch);
2218                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2219         }
2220         fromnd.nl_flags |= NLC_NCPISLOCKED;
2221
2222         fvp = fromnd.nl_nch.ncp->nc_vp;
2223         tvp = tond.nl_nch.ncp->nc_vp;
2224
2225         /*
2226          * Set fdvp and tdvp.  We haven't done all the topology checks
2227          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2228          * point).  If we get through the checks these will be guarenteed
2229          * to be non-NULL.
2230          *
2231          * Holding the children ncp's should be sufficient to prevent
2232          * fdvp and tdvp ripouts.
2233          */
2234         if (fromnd.nl_nch.ncp->nc_parent)
2235                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2236         else
2237                 fdvp = NULL;
2238         if (tond.nl_nch.ncp->nc_parent)
2239                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2240         else
2241                 tdvp = NULL;
2242
2243         if (tvp != NULL) {
2244                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2245                         if (info.v3)
2246                                 error = EEXIST;
2247                         else
2248                                 error = EISDIR;
2249                         goto out;
2250                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2251                         if (info.v3)
2252                                 error = EEXIST;
2253                         else
2254                                 error = ENOTDIR;
2255                         goto out;
2256                 }
2257                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2258                         if (info.v3)
2259                                 error = EXDEV;
2260                         else
2261                                 error = ENOTEMPTY;
2262                         goto out;
2263                 }
2264         }
2265         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2266                 if (info.v3)
2267                         error = EXDEV;
2268                 else
2269                         error = ENOTEMPTY;
2270                 goto out;
2271         }
2272         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2273                 if (info.v3)
2274                         error = EXDEV;
2275                 else
2276                         error = ENOTEMPTY;
2277                 goto out;
2278         }
2279         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2280                 if (info.v3)
2281                         error = EINVAL;
2282                 else
2283                         error = ENOTEMPTY;
2284         }
2285
2286         /*
2287          * You cannot rename a source into itself or a subdirectory of itself.
2288          * We check this by travsering the target directory upwards looking
2289          * for a match against the source.
2290          */
2291         if (error == 0) {
2292                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2293                         if (fromnd.nl_nch.ncp == ncp) {
2294                                 error = EINVAL;
2295                                 break;
2296                         }
2297                 }
2298         }
2299
2300         /*
2301          * If source is the same as the destination (that is the
2302          * same vnode with the same name in the same directory),
2303          * then there is nothing to do.
2304          */
2305         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2306                 error = -1;
2307 out:
2308         if (!error) {
2309                 /*
2310                  * The VOP_NRENAME function releases all vnode references &
2311                  * locks prior to returning so we need to clear the pointers
2312                  * to bypass cleanup code later on.
2313                  */
2314                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2315                                     fdvp, tdvp, tond.nl_cred);
2316         } else {
2317                 if (error == -1)
2318                         error = 0;
2319         }
2320         /* fall through */
2321
2322 out1:
2323         if (fdirp)
2324                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2325         if (tdirp)
2326                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2327         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2328                               2 * NFSX_WCCDATA(info.v3), &error));
2329         if (info.v3) {
2330                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2331                                  fdiraft_ret, &fdiraft);
2332                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2333                                  tdiraft_ret, &tdiraft);
2334         }
2335         error = 0;
2336         /* fall through */
2337
2338 nfsmout:
2339         *mrq = info.mreq;
2340         if (tdirp)
2341                 vrele(tdirp);
2342         nlookup_done(&tond);
2343         if (fdirp)
2344                 vrele(fdirp);
2345         nlookup_done(&fromnd);
2346         return (error);
2347 }
2348
2349 /*
2350  * nfs link service
2351  */
2352 int
2353 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2354            struct thread *td, struct mbuf **mrq)
2355 {
2356         struct sockaddr *nam = nfsd->nd_nam;
2357         struct ucred *cred = &nfsd->nd_cr;
2358         struct nlookupdata nd;
2359         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2360         int getret = 1;
2361         struct vnode *dirp;
2362         struct vnode *dvp;
2363         struct vnode *vp;
2364         struct vnode *xp;
2365         struct mount *mp;
2366         struct mount *xmp;
2367         struct vattr dirfor, diraft, at;
2368         nfsfh_t nfh, dnfh;
2369         fhandle_t *fhp, *dfhp;
2370         struct nfsm_info info;
2371
2372         info.mrep = nfsd->nd_mrep;
2373         info.mreq = NULL;
2374         info.md = nfsd->nd_md;
2375         info.dpos = nfsd->nd_dpos;
2376         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2377
2378         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2379         nlookup_zero(&nd);
2380         dirp = dvp = vp = xp = NULL;
2381         mp = xmp = NULL;
2382
2383         fhp = &nfh.fh_generic;
2384         dfhp = &dnfh.fh_generic;
2385         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2386         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2387         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2388
2389         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2390                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2391         if (error) {
2392                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2393                                       NFSX_POSTOPATTR(info.v3) +
2394                                       NFSX_WCCDATA(info.v3),
2395                                       &error));
2396                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2397                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2398                                  diraft_ret, &diraft);
2399                 xp = NULL;
2400                 error = 0;
2401                 goto nfsmout;
2402         }
2403         if (xp->v_type == VDIR) {
2404                 error = EPERM;          /* POSIX */
2405                 goto out1;
2406         }
2407
2408         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2409                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2410                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2411         if (dirp) {
2412                 if (info.v3)
2413                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2414         }
2415         if (error)
2416                 goto out1;
2417
2418         if (vp != NULL) {
2419                 error = EEXIST;
2420                 goto out;
2421         }
2422         if (xp->v_mount != dvp->v_mount)
2423                 error = EXDEV;
2424 out:
2425         if (!error) {
2426                 vn_unlock(dvp);
2427                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2428                 vrele(dvp);
2429                 dvp = NULL;
2430         }
2431         /* fall through */
2432
2433 out1:
2434         if (info.v3)
2435                 getret = VOP_GETATTR(xp, &at);
2436         if (dirp)
2437                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2438         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2439                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2440                               &error));
2441         if (info.v3) {
2442                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2443                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2444                                  diraft_ret, &diraft);
2445                 error = 0;
2446         }
2447         /* fall through */
2448
2449 nfsmout:
2450         *mrq = info.mreq;
2451         nlookup_done(&nd);
2452         if (dirp)
2453                 vrele(dirp);
2454         if (xp)
2455                 vrele(xp);
2456         if (dvp) {
2457                 if (dvp == vp)
2458                         vrele(dvp);
2459                 else
2460                         vput(dvp);
2461         }
2462         if (vp)
2463                 vput(vp);
2464         return(error);
2465 }
2466
2467 /*
2468  * nfs symbolic link service
2469  */
2470 int
2471 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2472               struct thread *td, struct mbuf **mrq)
2473 {
2474         struct sockaddr *nam = nfsd->nd_nam;
2475         struct ucred *cred = &nfsd->nd_cr;
2476         struct vattr va, dirfor, diraft;
2477         struct nlookupdata nd;
2478         struct vattr *vap = &va;
2479         struct nfsv2_sattr *sp;
2480         char *pathcp = NULL;
2481         struct uio io;
2482         struct iovec iv;
2483         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2484         struct vnode *dirp;
2485         struct vnode *vp;
2486         struct vnode *dvp;
2487         nfsfh_t nfh;
2488         fhandle_t *fhp;
2489         struct nfsm_info info;
2490
2491         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2492         nlookup_zero(&nd);
2493         dirp = NULL;
2494         dvp = NULL;
2495         vp = NULL;
2496
2497         info.mrep = nfsd->nd_mrep;
2498         info.mreq =  NULL;
2499         info.md = nfsd->nd_md;
2500         info.dpos = nfsd->nd_dpos;
2501         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2502
2503         fhp = &nfh.fh_generic;
2504         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2505         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2506
2507         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2508                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2509                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2510         if (dirp) {
2511                 if (info.v3)
2512                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2513         }
2514         if (error)
2515                 goto out;
2516
2517         VATTR_NULL(vap);
2518         if (info.v3) {
2519                 ERROROUT(nfsm_srvsattr(&info, vap));
2520         }
2521         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2522         MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2523         iv.iov_base = pathcp;
2524         iv.iov_len = len2;
2525         io.uio_resid = len2;
2526         io.uio_offset = 0;
2527         io.uio_iov = &iv;
2528         io.uio_iovcnt = 1;
2529         io.uio_segflg = UIO_SYSSPACE;
2530         io.uio_rw = UIO_READ;
2531         io.uio_td = NULL;
2532         ERROROUT(nfsm_mtouio(&info, &io, len2));
2533         if (info.v3 == 0) {
2534                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2535                 vap->va_mode = nfstov_mode(sp->sa_mode);
2536         }
2537         *(pathcp + len2) = '\0';
2538         if (vp) {
2539                 error = EEXIST;
2540                 goto out;
2541         }
2542
2543         if (vap->va_mode == (mode_t)VNOVAL)
2544                 vap->va_mode = 0;
2545         if (dvp != vp)
2546                 vn_unlock(dvp);
2547         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2548         vrele(dvp);
2549         dvp = NULL;
2550         if (error == 0) {
2551                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2552                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2553                 if (!error)
2554                         error = VOP_GETATTR(vp, vap);
2555         }
2556
2557 out:
2558         if (dvp) {
2559                 if (dvp == vp)
2560                         vrele(dvp);
2561                 else
2562                         vput(dvp);
2563         }
2564         if (vp) {
2565                 vput(vp);
2566                 vp = NULL;
2567         }
2568         if (pathcp) {
2569                 FREE(pathcp, M_TEMP);
2570                 pathcp = NULL;
2571         }
2572         if (dirp) {
2573                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2574                 vrele(dirp);
2575                 dirp = NULL;
2576         }
2577         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2578                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2579                               NFSX_WCCDATA(info.v3),
2580                               &error));
2581         if (info.v3) {
2582                 if (!error) {
2583                         nfsm_srvpostop_fh(&info, fhp);
2584                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2585                 }
2586                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2587                                  diraft_ret, &diraft);
2588         }
2589         error = 0;
2590         /* fall through */
2591
2592 nfsmout:
2593         *mrq = info.mreq;
2594         nlookup_done(&nd);
2595         if (vp)
2596                 vput(vp);
2597         if (dirp)
2598                 vrele(dirp);
2599         if (pathcp)
2600                 FREE(pathcp, M_TEMP);
2601         return (error);
2602 }
2603
2604 /*
2605  * nfs mkdir service
2606  */
2607 int
2608 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2609             struct thread *td, struct mbuf **mrq)
2610 {
2611         struct sockaddr *nam = nfsd->nd_nam;
2612         struct ucred *cred = &nfsd->nd_cr;
2613         struct vattr va, dirfor, diraft;
2614         struct vattr *vap = &va;
2615         struct nfs_fattr *fp;
2616         struct nlookupdata nd;
2617         u_int32_t *tl;
2618         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2619         struct vnode *dirp;
2620         struct vnode *dvp;
2621         struct vnode *vp;
2622         nfsfh_t nfh;
2623         fhandle_t *fhp;
2624         struct nfsm_info info;
2625
2626         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2627         nlookup_zero(&nd);
2628         dirp = NULL;
2629         dvp = NULL;
2630         vp = NULL;
2631
2632         info.dpos = nfsd->nd_dpos;
2633         info.mrep = nfsd->nd_mrep;
2634         info.mreq =  NULL;
2635         info.md = nfsd->nd_md;
2636         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2637
2638         fhp = &nfh.fh_generic;
2639         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2640         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2641
2642         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2643                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2644                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2645         if (dirp) {
2646                 if (info.v3)
2647                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2648         }
2649         if (error) {
2650                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2651                                       NFSX_WCCDATA(info.v3), &error));
2652                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2653                                  diraft_ret, &diraft);
2654                 error = 0;
2655                 goto nfsmout;
2656         }
2657         VATTR_NULL(vap);
2658         if (info.v3) {
2659                 ERROROUT(nfsm_srvsattr(&info, vap));
2660         } else {
2661                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2662                 vap->va_mode = nfstov_mode(*tl++);
2663         }
2664
2665         /*
2666          * At this point nd.ni_dvp is referenced and exclusively locked and
2667          * nd.ni_vp, if it exists, is referenced but not locked.
2668          */
2669
2670         vap->va_type = VDIR;
2671         if (vp != NULL) {
2672                 error = EEXIST;
2673                 goto out;
2674         }
2675
2676         /*
2677          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2678          * component is freed by the VOP call.  This will fill-in
2679          * nd.ni_vp, reference, and exclusively lock it.
2680          */
2681         if (vap->va_mode == (mode_t)VNOVAL)
2682                 vap->va_mode = 0;
2683         vn_unlock(dvp);
2684         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2685         vrele(dvp);
2686         dvp = NULL;
2687
2688         if (error == 0) {
2689                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2690                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2691                 if (error == 0)
2692                         error = VOP_GETATTR(vp, vap);
2693         }
2694 out:
2695         if (dirp)
2696                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2697         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2698                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2699                               NFSX_WCCDATA(info.v3),
2700                               &error));
2701         if (info.v3) {
2702                 if (!error) {
2703                         nfsm_srvpostop_fh(&info, fhp);
2704                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2705                 }
2706                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2707                                  diraft_ret, &diraft);
2708         } else {
2709                 nfsm_srvfhtom(&info, fhp);
2710                 fp = nfsm_build(&info, NFSX_V2FATTR);
2711                 nfsm_srvfattr(nfsd, vap, fp);
2712         }
2713         error = 0;
2714         /* fall through */
2715
2716 nfsmout:
2717         *mrq = info.mreq;
2718         nlookup_done(&nd);
2719         if (dirp)
2720                 vrele(dirp);
2721         if (dvp) {
2722                 if (dvp == vp)
2723                         vrele(dvp);
2724                 else
2725                         vput(dvp);
2726         }
2727         if (vp)
2728                 vput(vp);
2729         return (error);
2730 }
2731
2732 /*
2733  * nfs rmdir service
2734  */
2735 int
2736 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2737             struct thread *td, struct mbuf **mrq)
2738 {
2739         struct sockaddr *nam = nfsd->nd_nam;
2740         struct ucred *cred = &nfsd->nd_cr;
2741         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2742         struct vnode *dirp;
2743         struct vnode *dvp;
2744         struct vnode *vp;
2745         struct vattr dirfor, diraft;
2746         nfsfh_t nfh;
2747         fhandle_t *fhp;
2748         struct nlookupdata nd;
2749         struct nfsm_info info;
2750
2751         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2752         nlookup_zero(&nd);
2753         dirp = NULL;
2754         dvp = NULL;
2755         vp = NULL;
2756
2757         info.mrep = nfsd->nd_mrep;
2758         info.mreq = NULL;
2759         info.md = nfsd->nd_md;
2760         info.dpos = nfsd->nd_dpos;
2761         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2762
2763         fhp = &nfh.fh_generic;
2764         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2765         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2766
2767         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2768                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2769                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2770         if (dirp) {
2771                 if (info.v3)
2772                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2773         }
2774         if (error) {
2775                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2776                                       NFSX_WCCDATA(info.v3), &error));
2777                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2778                                  diraft_ret, &diraft);
2779                 error = 0;
2780                 goto nfsmout;
2781         }
2782         if (vp->v_type != VDIR) {
2783                 error = ENOTDIR;
2784                 goto out;
2785         }
2786
2787         /*
2788          * The root of a mounted filesystem cannot be deleted.
2789          */
2790         if (vp->v_flag & VROOT)
2791                 error = EBUSY;
2792 out:
2793         /*
2794          * Issue or abort op.  Since SAVESTART is not set, path name
2795          * component is freed by the VOP after either.
2796          */
2797         if (!error) {
2798                 if (dvp != vp)
2799                         vn_unlock(dvp);
2800                 vput(vp);
2801                 vp = NULL;
2802                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2803                 vrele(dvp);
2804                 dvp = NULL;
2805         }
2806         nlookup_done(&nd);
2807
2808         if (dirp)
2809                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2810         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2811         if (info.v3) {
2812                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2813                                  diraft_ret, &diraft);
2814                 error = 0;
2815         }
2816         /* fall through */
2817
2818 nfsmout:
2819         *mrq = info.mreq;
2820         if (dvp) {
2821                 if (dvp == vp)
2822                         vrele(dvp);
2823                 else
2824                         vput(dvp);
2825         }
2826         nlookup_done(&nd);
2827         if (dirp)
2828                 vrele(dirp);
2829         if (vp)
2830                 vput(vp);
2831         return(error);
2832 }
2833
2834 /*
2835  * nfs readdir service
2836  * - mallocs what it thinks is enough to read
2837  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2838  * - calls VOP_READDIR()
2839  * - loops around building the reply
2840  *      if the output generated exceeds count break out of loop
2841  *      The nfsm_clget macro is used here so that the reply will be packed
2842  *      tightly in mbuf clusters.
2843  * - it only knows that it has encountered eof when the VOP_READDIR()
2844  *      reads nothing
2845  * - as such one readdir rpc will return eof false although you are there
2846  *      and then the next will return eof
2847  * - it trims out records with d_fileno == 0
2848  *      this doesn't matter for Unix clients, but they might confuse clients
2849  *      for other os'.
2850  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2851  *      than requested, but this may not apply to all filesystems. For
2852  *      example, client NFS does not { although it is never remote mounted
2853  *      anyhow }
2854  *     The alternate call nfsrv_readdirplus() does lookups as well.
2855  * PS: The NFS protocol spec. does not clarify what the "count" byte
2856  *      argument is a count of.. just name strings and file id's or the
2857  *      entire reply rpc or ...
2858  *      I tried just file name and id sizes and it confused the Sun client,
2859  *      so I am using the full rpc size now. The "paranoia.." comment refers
2860  *      to including the status longwords that are not a part of the dir.
2861  *      "entry" structures, but are in the rpc.
2862  */
2863 struct flrep {
2864         nfsuint64       fl_off;
2865         u_int32_t       fl_postopok;
2866         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2867         u_int32_t       fl_fhok;
2868         u_int32_t       fl_fhsize;
2869         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2870 };
2871
2872 int
2873 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2874               struct thread *td, struct mbuf **mrq)
2875 {
2876         struct sockaddr *nam = nfsd->nd_nam;
2877         struct ucred *cred = &nfsd->nd_cr;
2878         char *bp, *be;
2879         struct dirent *dp;
2880         caddr_t cp;
2881         u_int32_t *tl;
2882         struct mbuf *mp1, *mp2;
2883         char *cpos, *cend, *rbuf;
2884         struct vnode *vp = NULL;
2885         struct mount *mp = NULL;
2886         struct vattr at;
2887         nfsfh_t nfh;
2888         fhandle_t *fhp;
2889         struct uio io;
2890         struct iovec iv;
2891         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2892         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2893         u_quad_t off, toff, verf;
2894         off_t *cookies = NULL, *cookiep;
2895         struct nfsm_info info;
2896
2897         info.mrep = nfsd->nd_mrep;
2898         info.mreq = NULL;
2899         info.md = nfsd->nd_md;
2900         info.dpos = nfsd->nd_dpos;
2901         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2902
2903         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2904         fhp = &nfh.fh_generic;
2905         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2906         if (info.v3) {
2907                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2908                 toff = fxdr_hyper(tl);
2909                 tl += 2;
2910                 verf = fxdr_hyper(tl);
2911                 tl += 2;
2912         } else {
2913                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2914                 toff = fxdr_unsigned(u_quad_t, *tl++);
2915                 verf = 0;       /* shut up gcc */
2916         }
2917         off = toff;
2918         cnt = fxdr_unsigned(int, *tl);
2919         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2920         xfer = NFS_SRVMAXDATA(nfsd);
2921         if ((unsigned)cnt > xfer)
2922                 cnt = xfer;
2923         if ((unsigned)siz > xfer)
2924                 siz = xfer;
2925         fullsiz = siz;
2926         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2927                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2928         if (!error && vp->v_type != VDIR) {
2929                 error = ENOTDIR;
2930                 vput(vp);
2931                 vp = NULL;
2932         }
2933         if (error) {
2934                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2935                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2936                 error = 0;
2937                 goto nfsmout;
2938         }
2939
2940         /*
2941          * Obtain lock on vnode for this section of the code
2942          */
2943
2944         if (info.v3) {
2945                 error = getret = VOP_GETATTR(vp, &at);
2946 #if 0
2947                 /*
2948                  * XXX This check may be too strict for Solaris 2.5 clients.
2949                  */
2950                 if (!error && toff && verf && verf != at.va_filerev)
2951                         error = NFSERR_BAD_COOKIE;
2952 #endif
2953         }
2954         if (!error)
2955                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2956         if (error) {
2957                 vput(vp);
2958                 vp = NULL;
2959                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2960                                       NFSX_POSTOPATTR(info.v3), &error));
2961                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2962                 error = 0;
2963                 goto nfsmout;
2964         }
2965         vn_unlock(vp);
2966
2967         /*
2968          * end section.  Allocate rbuf and continue
2969          */
2970         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2971 again:
2972         iv.iov_base = rbuf;
2973         iv.iov_len = fullsiz;
2974         io.uio_iov = &iv;
2975         io.uio_iovcnt = 1;
2976         io.uio_offset = (off_t)off;
2977         io.uio_resid = fullsiz;
2978         io.uio_segflg = UIO_SYSSPACE;
2979         io.uio_rw = UIO_READ;
2980         io.uio_td = NULL;
2981         eofflag = 0;
2982         if (cookies) {
2983                 kfree((caddr_t)cookies, M_TEMP);
2984                 cookies = NULL;
2985         }
2986         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2987         off = (off_t)io.uio_offset;
2988         if (!cookies && !error)
2989                 error = NFSERR_PERM;
2990         if (info.v3) {
2991                 getret = VOP_GETATTR(vp, &at);
2992                 if (!error)
2993                         error = getret;
2994         }
2995         if (error) {
2996                 vrele(vp);
2997                 vp = NULL;
2998                 kfree((caddr_t)rbuf, M_TEMP);
2999                 if (cookies)
3000                         kfree((caddr_t)cookies, M_TEMP);
3001                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3002                                       NFSX_POSTOPATTR(info.v3), &error));
3003                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3004                 error = 0;
3005                 goto nfsmout;
3006         }
3007         if (io.uio_resid) {
3008                 siz -= io.uio_resid;
3009
3010                 /*
3011                  * If nothing read, return eof
3012                  * rpc reply
3013                  */
3014                 if (siz == 0) {
3015                         vrele(vp);
3016                         vp = NULL;
3017                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3018                                               NFSX_POSTOPATTR(info.v3) +
3019                                               NFSX_COOKIEVERF(info.v3) +
3020                                               2 * NFSX_UNSIGNED,
3021                                               &error));
3022                         if (info.v3) {
3023                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3024                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3025                                 txdr_hyper(at.va_filerev, tl);
3026                                 tl += 2;
3027                         } else
3028                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3029                         *tl++ = nfs_false;
3030                         *tl = nfs_true;
3031                         FREE((caddr_t)rbuf, M_TEMP);
3032                         FREE((caddr_t)cookies, M_TEMP);
3033                         error = 0;
3034                         goto nfsmout;
3035                 }
3036         }
3037
3038         /*
3039          * Check for degenerate cases of nothing useful read.
3040          * If so go try again
3041          */
3042         cpos = rbuf;
3043         cend = rbuf + siz;
3044         dp = (struct dirent *)cpos;
3045         cookiep = cookies;
3046         /*
3047          * For some reason FreeBSD's ufs_readdir() chooses to back the
3048          * directory offset up to a block boundary, so it is necessary to
3049          * skip over the records that preceed the requested offset. This
3050          * requires the assumption that file offset cookies monotonically
3051          * increase.
3052          */
3053         while (cpos < cend && ncookies > 0 &&
3054                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3055                  ((u_quad_t)(*cookiep)) <= toff)) {
3056                 dp = _DIRENT_NEXT(dp);
3057                 cpos = (char *)dp;
3058                 cookiep++;
3059                 ncookies--;
3060         }
3061         if (cpos >= cend || ncookies == 0) {
3062                 toff = off;
3063                 siz = fullsiz;
3064                 goto again;
3065         }
3066
3067         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3068         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3069                               NFSX_POSTOPATTR(info.v3) +
3070                               NFSX_COOKIEVERF(info.v3) + siz,
3071                               &error));
3072         if (info.v3) {
3073                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3074                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3075                 txdr_hyper(at.va_filerev, tl);
3076         }
3077         mp1 = mp2 = info.mb;
3078         bp = info.bpos;
3079         be = bp + M_TRAILINGSPACE(mp1);
3080
3081         /* Loop through the records and build reply */
3082         while (cpos < cend && ncookies > 0) {
3083                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3084                         nlen = dp->d_namlen;
3085                         rem = nfsm_rndup(nlen) - nlen;
3086                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3087                         if (info.v3)
3088                                 len += 2 * NFSX_UNSIGNED;
3089                         if (len > cnt) {
3090                                 eofflag = 0;
3091                                 break;
3092                         }
3093                         /*
3094                          * Build the directory record xdr from
3095                          * the dirent entry.
3096                          */
3097                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3098                         *tl = nfs_true;
3099                         bp += NFSX_UNSIGNED;
3100                         if (info.v3) {
3101                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3102                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3103                                 bp += NFSX_UNSIGNED;
3104                         }
3105                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3106                         *tl = txdr_unsigned(dp->d_ino);
3107                         bp += NFSX_UNSIGNED;
3108                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3109                         *tl = txdr_unsigned(nlen);
3110                         bp += NFSX_UNSIGNED;
3111
3112                         /* And loop around copying the name */
3113                         xfer = nlen;
3114                         cp = dp->d_name;
3115                         while (xfer > 0) {
3116                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3117                                 if ((bp+xfer) > be)
3118                                         tsiz = be-bp;
3119                                 else
3120                                         tsiz = xfer;
3121                                 bcopy(cp, bp, tsiz);
3122                                 bp += tsiz;
3123                                 xfer -= tsiz;
3124                                 if (xfer > 0)
3125                                         cp += tsiz;
3126                         }
3127                         /* And null pad to a int32_t boundary */
3128                         for (i = 0; i < rem; i++)
3129                                 *bp++ = '\0';
3130                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3131
3132                         /* Finish off the record */
3133                         if (info.v3) {
3134                                 *tl = txdr_unsigned(*cookiep >> 32);
3135                                 bp += NFSX_UNSIGNED;
3136                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3137                         }
3138                         *tl = txdr_unsigned(*cookiep);
3139                         bp += NFSX_UNSIGNED;
3140                 }
3141                 dp = _DIRENT_NEXT(dp);
3142                 cpos = (char *)dp;
3143                 cookiep++;
3144                 ncookies--;
3145         }
3146         vrele(vp);
3147         vp = NULL;
3148         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3149         *tl = nfs_false;
3150         bp += NFSX_UNSIGNED;
3151         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3152         if (eofflag)
3153                 *tl = nfs_true;
3154         else
3155                 *tl = nfs_false;
3156         bp += NFSX_UNSIGNED;
3157         if (mp1 != info.mb) {
3158                 if (bp < be)
3159                         mp1->m_len = bp - mtod(mp1, caddr_t);
3160         } else
3161                 mp1->m_len += bp - info.bpos;
3162         FREE((caddr_t)rbuf, M_TEMP);
3163         FREE((caddr_t)cookies, M_TEMP);
3164
3165 nfsmout:
3166         *mrq = info.mreq;
3167         if (vp)
3168                 vrele(vp);
3169         return(error);
3170 }
3171
3172 int
3173 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3174                   struct thread *td, struct mbuf **mrq)
3175 {
3176         struct sockaddr *nam = nfsd->nd_nam;
3177         struct ucred *cred = &nfsd->nd_cr;
3178         char *bp, *be;
3179         struct dirent *dp;
3180         caddr_t cp;
3181         u_int32_t *tl;
3182         struct mbuf *mp1, *mp2;
3183         char *cpos, *cend, *rbuf;
3184         struct vnode *vp = NULL, *nvp;
3185         struct mount *mp = NULL;
3186         struct flrep fl;
3187         nfsfh_t nfh;
3188         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3189         struct uio io;
3190         struct iovec iv;
3191         struct vattr va, at, *vap = &va;
3192         struct nfs_fattr *fp;
3193         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3194         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3195         u_quad_t off, toff, verf;
3196         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3197         struct nfsm_info info;
3198
3199         info.mrep = nfsd->nd_mrep;
3200         info.mreq = NULL;
3201         info.md = nfsd->nd_md;
3202         info.dpos = nfsd->nd_dpos;
3203         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3204
3205         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3206         fhp = &nfh.fh_generic;
3207         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3208         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3209         toff = fxdr_hyper(tl);
3210         tl += 2;
3211         verf = fxdr_hyper(tl);
3212         tl += 2;
3213         siz = fxdr_unsigned(int, *tl++);
3214         cnt = fxdr_unsigned(int, *tl);
3215         off = toff;
3216         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3217         xfer = NFS_SRVMAXDATA(nfsd);
3218         if ((unsigned)cnt > xfer)
3219                 cnt = xfer;
3220         if ((unsigned)siz > xfer)
3221                 siz = xfer;
3222         fullsiz = siz;
3223         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3224                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3225         if (!error && vp->v_type != VDIR) {
3226                 error = ENOTDIR;
3227                 vput(vp);
3228                 vp = NULL;
3229         }
3230         if (error) {
3231                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3232                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3233                 error = 0;
3234                 goto nfsmout;
3235         }
3236         error = getret = VOP_GETATTR(vp, &at);
3237 #if 0
3238         /*
3239          * XXX This check may be too strict for Solaris 2.5 clients.
3240          */
3241         if (!error && toff && verf && verf != at.va_filerev)
3242                 error = NFSERR_BAD_COOKIE;
3243 #endif
3244         if (!error) {
3245                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3246         }
3247         if (error) {
3248                 vput(vp);
3249                 vp = NULL;
3250                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3251                                       NFSX_V3POSTOPATTR, &error));
3252                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3253                 error = 0;
3254                 goto nfsmout;
3255         }
3256         vn_unlock(vp);
3257         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3258 again:
3259         iv.iov_base = rbuf;
3260         iv.iov_len = fullsiz;
3261         io.uio_iov = &iv;
3262         io.uio_iovcnt = 1;
3263         io.uio_offset = (off_t)off;
3264         io.uio_resid = fullsiz;
3265         io.uio_segflg = UIO_SYSSPACE;
3266         io.uio_rw = UIO_READ;
3267         io.uio_td = NULL;
3268         eofflag = 0;
3269         if (cookies) {
3270                 kfree((caddr_t)cookies, M_TEMP);
3271                 cookies = NULL;
3272         }
3273         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3274         off = (u_quad_t)io.uio_offset;
3275         getret = VOP_GETATTR(vp, &at);
3276         if (!cookies && !error)
3277                 error = NFSERR_PERM;
3278         if (!error)
3279                 error = getret;
3280         if (error) {
3281                 vrele(vp);
3282                 vp = NULL;
3283                 if (cookies)
3284                         kfree((caddr_t)cookies, M_TEMP);
3285                 kfree((caddr_t)rbuf, M_TEMP);
3286                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3287                                       NFSX_V3POSTOPATTR, &error));
3288                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3289                 error = 0;
3290                 goto nfsmout;
3291         }
3292         if (io.uio_resid) {
3293                 siz -= io.uio_resid;
3294
3295                 /*
3296                  * If nothing read, return eof
3297                  * rpc reply
3298                  */
3299                 if (siz == 0) {
3300                         vrele(vp);
3301                         vp = NULL;
3302                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3303                                               NFSX_V3POSTOPATTR +
3304                                               NFSX_V3COOKIEVERF +
3305                                               2 * NFSX_UNSIGNED,
3306                                               &error));
3307                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3308                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3309                         txdr_hyper(at.va_filerev, tl);
3310                         tl += 2;
3311                         *tl++ = nfs_false;
3312                         *tl = nfs_true;
3313                         FREE((caddr_t)cookies, M_TEMP);
3314                         FREE((caddr_t)rbuf, M_TEMP);
3315                         error = 0;
3316                         goto nfsmout;
3317                 }
3318         }
3319
3320         /*
3321          * Check for degenerate cases of nothing useful read.
3322          * If so go try again
3323          */
3324         cpos = rbuf;
3325         cend = rbuf + siz;
3326         dp = (struct dirent *)cpos;
3327         cookiep = cookies;
3328         /*
3329          * For some reason FreeBSD's ufs_readdir() chooses to back the
3330          * directory offset up to a block boundary, so it is necessary to
3331          * skip over the records that preceed the requested offset. This
3332          * requires the assumption that file offset cookies monotonically
3333          * increase.
3334          */
3335         while (cpos < cend && ncookies > 0 &&
3336                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3337                  ((u_quad_t)(*cookiep)) <= toff)) {
3338                 dp = _DIRENT_NEXT(dp);
3339                 cpos = (char *)dp;
3340                 cookiep++;
3341                 ncookies--;
3342         }
3343         if (cpos >= cend || ncookies == 0) {
3344                 toff = off;
3345                 siz = fullsiz;
3346                 goto again;
3347         }
3348
3349         /*
3350          * Probe one of the directory entries to see if the filesystem
3351          * supports VGET.
3352          */
3353         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3354                 error = NFSERR_NOTSUPP;
3355                 vrele(vp);
3356                 vp = NULL;
3357                 kfree((caddr_t)cookies, M_TEMP);
3358                 kfree((caddr_t)rbuf, M_TEMP);
3359                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3360                                       NFSX_V3POSTOPATTR, &error));
3361                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3362                 error = 0;
3363                 goto nfsmout;
3364         }
3365         if (nvp) {
3366                 vput(nvp);
3367                 nvp = NULL;
3368         }
3369             
3370         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3371                         2 * NFSX_UNSIGNED;
3372         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3373         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3374         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3375         txdr_hyper(at.va_filerev, tl);
3376         mp1 = mp2 = info.mb;
3377         bp = info.bpos;
3378         be = bp + M_TRAILINGSPACE(mp1);
3379
3380         /* Loop through the records and build reply */
3381         while (cpos < cend && ncookies > 0) {
3382                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3383                         nlen = dp->d_namlen;
3384                         rem = nfsm_rndup(nlen) - nlen;
3385
3386                         /*
3387                          * For readdir_and_lookup get the vnode using
3388                          * the file number.
3389                          */
3390                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3391                                 goto invalid;
3392                         bzero((caddr_t)nfhp, NFSX_V3FH);
3393                         nfhp->fh_fsid = fhp->fh_fsid;
3394                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3395                                 vput(nvp);
3396                                 nvp = NULL;
3397                                 goto invalid;
3398                         }
3399                         if (VOP_GETATTR(nvp, vap)) {
3400                                 vput(nvp);
3401                                 nvp = NULL;
3402                                 goto invalid;
3403                         }
3404                         vput(nvp);
3405                         nvp = NULL;
3406
3407                         /*
3408                          * If either the dircount or maxcount will be
3409                          * exceeded, get out now. Both of these lengths
3410                          * are calculated conservatively, including all
3411                          * XDR overheads.
3412                          */
3413                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3414                                 NFSX_V3POSTOPATTR);
3415                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3416                         if (len > cnt || dirlen > fullsiz) {
3417                                 eofflag = 0;
3418                                 break;
3419                         }
3420
3421                         /*
3422                          * Build the directory record xdr from
3423                          * the dirent entry.
3424                          */
3425                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3426                         nfsm_srvfattr(nfsd, vap, fp);
3427                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3428                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3429                         fl.fl_postopok = nfs_true;
3430                         fl.fl_fhok = nfs_true;
3431                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3432
3433                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3434                         *tl = nfs_true;
3435                         bp += NFSX_UNSIGNED;
3436                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3437                         *tl = txdr_unsigned(dp->d_ino >> 32);
3438                         bp += NFSX_UNSIGNED;
3439                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3440                         *tl = txdr_unsigned(dp->d_ino);
3441                         bp += NFSX_UNSIGNED;
3442                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3443                         *tl = txdr_unsigned(nlen);
3444                         bp += NFSX_UNSIGNED;
3445
3446                         /* And loop around copying the name */
3447                         xfer = nlen;
3448                         cp = dp->d_name;
3449                         while (xfer > 0) {
3450                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3451                                 if ((bp + xfer) > be)
3452                                         tsiz = be - bp;
3453                                 else
3454                                         tsiz = xfer;
3455                                 bcopy(cp, bp, tsiz);
3456                                 bp += tsiz;
3457                                 xfer -= tsiz;
3458                                 cp += tsiz;
3459                         }
3460                         /* And null pad to a int32_t boundary */
3461                         for (i = 0; i < rem; i++)
3462                                 *bp++ = '\0';
3463         
3464                         /*
3465                          * Now copy the flrep structure out.
3466                          */
3467                         xfer = sizeof (struct flrep);
3468                         cp = (caddr_t)&fl;
3469                         while (xfer > 0) {
3470                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3471                                 if ((bp + xfer) > be)
3472                                         tsiz = be - bp;
3473                                 else
3474                                         tsiz = xfer;
3475                                 bcopy(cp, bp, tsiz);
3476                                 bp += tsiz;
3477                                 xfer -= tsiz;
3478                                 cp += tsiz;
3479                         }
3480                 }
3481 invalid:
3482                 dp = _DIRENT_NEXT(dp);
3483                 cpos = (char *)dp;
3484                 cookiep++;
3485                 ncookies--;
3486         }
3487         vrele(vp);
3488         vp = NULL;
3489         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3490         *tl = nfs_false;
3491         bp += NFSX_UNSIGNED;
3492         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3493         if (eofflag)
3494                 *tl = nfs_true;
3495         else
3496                 *tl = nfs_false;
3497         bp += NFSX_UNSIGNED;
3498         if (mp1 != info.mb) {
3499                 if (bp < be)
3500                         mp1->m_len = bp - mtod(mp1, caddr_t);
3501         } else
3502                 mp1->m_len += bp - info.bpos;
3503         FREE((caddr_t)cookies, M_TEMP);
3504         FREE((caddr_t)rbuf, M_TEMP);
3505 nfsmout:
3506         *mrq = info.mreq;
3507         if (vp)
3508                 vrele(vp);
3509         return(error);
3510 }
3511
3512 /*
3513  * nfs commit service
3514  */
3515 int
3516 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3517              struct thread *td, struct mbuf **mrq)
3518 {
3519         struct sockaddr *nam = nfsd->nd_nam;
3520         struct ucred *cred = &nfsd->nd_cr;
3521         struct vattr bfor, aft;
3522         struct vnode *vp = NULL;
3523         struct mount *mp = NULL;
3524         nfsfh_t nfh;
3525         fhandle_t *fhp;
3526         u_int32_t *tl;
3527         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3528         u_quad_t off;
3529         struct nfsm_info info;
3530
3531         info.mrep = nfsd->nd_mrep;
3532         info.mreq = NULL;
3533         info.md = nfsd->nd_md;
3534         info.dpos = nfsd->nd_dpos;
3535
3536         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3537         fhp = &nfh.fh_generic;
3538         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3539         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3540
3541         /*
3542          * XXX At this time VOP_FSYNC() does not accept offset and byte
3543          * count parameters, so these arguments are useless (someday maybe).
3544          */
3545         off = fxdr_hyper(tl);
3546         tl += 2;
3547         cnt = fxdr_unsigned(int, *tl);
3548         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3549                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3550         if (error) {
3551                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3552                                       2 * NFSX_UNSIGNED, &error));
3553                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3554                                  aft_ret, &aft);
3555                 error = 0;
3556                 goto nfsmout;
3557         }
3558         for_ret = VOP_GETATTR(vp, &bfor);
3559
3560         if (cnt > MAX_COMMIT_COUNT) {
3561                 /*
3562                  * Give up and do the whole thing
3563                  */
3564                 if (vp->v_object &&
3565                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3566                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3567                 }
3568                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3569         } else {
3570                 /*
3571                  * Locate and synchronously write any buffers that fall
3572                  * into the requested range.  Note:  we are assuming that
3573                  * f_iosize is a power of 2.
3574                  */
3575                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3576                 int iomask = iosize - 1;
3577                 off_t loffset;
3578
3579                 /*
3580                  * Align to iosize boundry, super-align to page boundry.
3581                  */
3582                 if (off & iomask) {
3583                         cnt += off & iomask;
3584                         off &= ~(u_quad_t)iomask;
3585                 }
3586                 if (off & PAGE_MASK) {
3587                         cnt += off & PAGE_MASK;
3588                         off &= ~(u_quad_t)PAGE_MASK;
3589                 }
3590                 loffset = off;
3591
3592                 if (vp->v_object &&
3593                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3594                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3595                 }
3596
3597                 crit_enter();
3598                 while (cnt > 0) {
3599                         struct buf *bp;
3600
3601                         /*
3602                          * If we have a buffer and it is marked B_DELWRI we
3603                          * have to lock and write it.  Otherwise the prior
3604                          * write is assumed to have already been committed.
3605                          */
3606                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3607                                 if (bp->b_flags & B_DELWRI)
3608                                         bp = findblk(vp, loffset, 0);
3609                                 else
3610                                         bp = NULL;
3611                         }
3612                         if (bp) {
3613                                 if (bp->b_flags & B_DELWRI) {
3614                                         bremfree(bp);
3615                                         bwrite(bp);
3616                                         ++nfs_commit_miss;
3617                                 } else {
3618                                         BUF_UNLOCK(bp);
3619                                 }
3620                         }
3621                         ++nfs_commit_blks;
3622                         if (cnt < iosize)
3623                                 break;
3624                         cnt -= iosize;
3625                         loffset += iosize;
3626                 }
3627                 crit_exit();
3628         }
3629
3630         aft_ret = VOP_GETATTR(vp, &aft);
3631         vput(vp);
3632         vp = NULL;
3633         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3634                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3635                               &error));
3636         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3637                          aft_ret, &aft);
3638         if (!error) {
3639                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3640                 if (nfsver.tv_sec == 0)
3641                         nfsver = boottime;
3642                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3643                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3644         } else {
3645                 error = 0;
3646         }
3647 nfsmout:
3648         *mrq = info.mreq;
3649         if (vp)
3650                 vput(vp);
3651         return(error);
3652 }
3653
3654 /*
3655  * nfs statfs service
3656  */
3657 int
3658 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3659              struct thread *td, struct mbuf **mrq)
3660 {
3661         struct sockaddr *nam = nfsd->nd_nam;
3662         struct ucred *cred = &nfsd->nd_cr;
3663         struct statfs *sf;
3664         struct nfs_statfs *sfp;
3665         int error = 0, rdonly, getret = 1;
3666         struct vnode *vp = NULL;
3667         struct mount *mp = NULL;
3668         struct vattr at;
3669         nfsfh_t nfh;
3670         fhandle_t *fhp;
3671         struct statfs statfs;
3672         u_quad_t tval;
3673         struct nfsm_info info;
3674
3675         info.mrep = nfsd->nd_mrep;
3676         info.mreq = NULL;
3677         info.md = nfsd->nd_md;
3678         info.dpos = nfsd->nd_dpos;
3679         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3680
3681         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3682         fhp = &nfh.fh_generic;
3683         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3684         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3685                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3686         if (error) {
3687                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3688                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3689                 error = 0;
3690                 goto nfsmout;
3691         }
3692         sf = &statfs;
3693         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3694         getret = VOP_GETATTR(vp, &at);
3695         vput(vp);
3696         vp = NULL;
3697         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3698                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3699                               &error));
3700         if (info.v3)
3701                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3702         if (error) {
3703                 error = 0;
3704                 goto nfsmout;
3705         }
3706         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3707         if (info.v3) {
3708                 tval = (u_quad_t)sf->f_blocks;
3709                 tval *= (u_quad_t)sf->f_bsize;
3710                 txdr_hyper(tval, &sfp->sf_tbytes);
3711                 tval = (u_quad_t)sf->f_bfree;
3712                 tval *= (u_quad_t)sf->f_bsize;
3713                 txdr_hyper(tval, &sfp->sf_fbytes);
3714                 tval = (u_quad_t)sf->f_bavail;
3715                 tval *= (u_quad_t)sf->f_bsize;
3716                 txdr_hyper(tval, &sfp->sf_abytes);
3717                 sfp->sf_tfiles.nfsuquad[0] = 0;
3718                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3719                 sfp->sf_ffiles.nfsuquad[0] = 0;
3720                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3721                 sfp->sf_afiles.nfsuquad[0] = 0;
3722                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3723                 sfp->sf_invarsec = 0;
3724         } else {
3725                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3726                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3727                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3728                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3729                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3730         }
3731 nfsmout:
3732         *mrq = info.mreq;
3733         if (vp)
3734                 vput(vp);
3735         return(error);
3736 }
3737
3738 /*
3739  * nfs fsinfo service
3740  */
3741 int
3742 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3743              struct thread *td, struct mbuf **mrq)
3744 {
3745         struct sockaddr *nam = nfsd->nd_nam;
3746         struct ucred *cred = &nfsd->nd_cr;
3747         struct nfsv3_fsinfo *sip;
3748         int error = 0, rdonly, getret = 1, pref;
3749         struct vnode *vp = NULL;
3750         struct mount *mp = NULL;
3751         struct vattr at;
3752         nfsfh_t nfh;
3753         fhandle_t *fhp;
3754         u_quad_t maxfsize;
3755         struct statfs sb;
3756         struct nfsm_info info;
3757
3758         info.mrep = nfsd->nd_mrep;
3759         info.mreq = NULL;
3760         info.md = nfsd->nd_md;
3761         info.dpos = nfsd->nd_dpos;
3762
3763         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3764         fhp = &nfh.fh_generic;
3765         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3766         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3767                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3768         if (error) {
3769                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3770                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3771                 error = 0;
3772                 goto nfsmout;
3773         }
3774
3775         /* XXX Try to make a guess on the max file size. */
3776         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3777         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3778
3779         getret = VOP_GETATTR(vp, &at);
3780         vput(vp);
3781         vp = NULL;
3782         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3783                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3784         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3785         sip = nfsm_build(&info, NFSX_V3FSINFO);
3786
3787         /*
3788          * XXX
3789          * There should be file system VFS OP(s) to get this information.
3790          * For now, assume ufs.
3791          */
3792         if (slp->ns_so->so_type == SOCK_DGRAM)
3793                 pref = NFS_MAXDGRAMDATA;
3794         else
3795                 pref = NFS_MAXDATA;
3796         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3797         sip->fs_rtpref = txdr_unsigned(pref);
3798         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3799         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3800         sip->fs_wtpref = txdr_unsigned(pref);
3801         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3802         sip->fs_dtpref = txdr_unsigned(pref);
3803         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3804         sip->fs_timedelta.nfsv3_sec = 0;
3805         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3806         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3807                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3808                 NFSV3FSINFO_CANSETTIME);
3809 nfsmout:
3810         *mrq = info.mreq;
3811         if (vp)
3812                 vput(vp);
3813         return(error);
3814 }
3815
3816 /*
3817  * nfs pathconf service
3818  */
3819 int
3820 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3821                struct thread *td, struct mbuf **mrq)
3822 {
3823         struct sockaddr *nam = nfsd->nd_nam;
3824         struct ucred *cred = &nfsd->nd_cr;
3825         struct nfsv3_pathconf *pc;
3826         int error = 0, rdonly, getret = 1;
3827         register_t linkmax, namemax, chownres, notrunc;
3828         struct vnode *vp = NULL;
3829         struct mount *mp = NULL;
3830         struct vattr at;
3831         nfsfh_t nfh;
3832         fhandle_t *fhp;
3833         struct nfsm_info info;
3834
3835         info.mrep = nfsd->nd_mrep;
3836         info.mreq = NULL;
3837         info.md = nfsd->nd_md;
3838         info.dpos = nfsd->nd_dpos;
3839
3840         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3841         fhp = &nfh.fh_generic;
3842         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3843         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3844                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3845         if (error) {
3846                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3847                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3848                 error = 0;
3849                 goto nfsmout;
3850         }
3851         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3852         if (!error)
3853                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3854         if (!error)
3855                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3856         if (!error)
3857                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3858         getret = VOP_GETATTR(vp, &at);
3859         vput(vp);
3860         vp = NULL;
3861         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3862                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3863                               &error));
3864         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3865         if (error) {
3866                 error = 0;
3867                 goto nfsmout;
3868         }
3869         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3870
3871         pc->pc_linkmax = txdr_unsigned(linkmax);
3872         pc->pc_namemax = txdr_unsigned(namemax);
3873         pc->pc_notrunc = txdr_unsigned(notrunc);
3874         pc->pc_chownrestricted = txdr_unsigned(chownres);
3875
3876         /*
3877          * These should probably be supported by VOP_PATHCONF(), but
3878          * until msdosfs is exportable (why would you want to?), the
3879          * Unix defaults should be ok.
3880          */
3881         pc->pc_caseinsensitive = nfs_false;
3882         pc->pc_casepreserving = nfs_true;
3883 nfsmout:
3884         *mrq = info.mreq;
3885         if (vp) 
3886                 vput(vp);
3887         return(error);
3888 }
3889
3890 /*
3891  * Null operation, used by clients to ping server
3892  */
3893 /* ARGSUSED */
3894 int
3895 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3896            struct thread *td, struct mbuf **mrq)
3897 {
3898         struct nfsm_info info;
3899         int error = NFSERR_RETVOID;
3900
3901         info.mrep = nfsd->nd_mrep;
3902         info.mreq = NULL;
3903
3904         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3905         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3906 nfsmout:
3907         *mrq = info.mreq;
3908         return (error);
3909 }
3910
3911 /*
3912  * No operation, used for obsolete procedures
3913  */
3914 /* ARGSUSED */
3915 int
3916 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3917            struct thread *td, struct mbuf **mrq)
3918 {
3919         struct nfsm_info info;
3920         int error;
3921
3922         info.mrep = nfsd->nd_mrep;
3923         info.mreq = NULL;
3924
3925         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3926         if (nfsd->nd_repstat)
3927                 error = nfsd->nd_repstat;
3928         else
3929                 error = EPROCUNAVAIL;
3930         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3931         error = 0;
3932 nfsmout:
3933         *mrq = info.mreq;
3934         return (error);
3935 }
3936
3937 /*
3938  * Perform access checking for vnodes obtained from file handles that would
3939  * refer to files already opened by a Unix client. You cannot just use
3940  * vn_writechk() and VOP_ACCESS() for two reasons.
3941  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3942  * 2 - The owner is to be given access irrespective of mode bits for some
3943  *     operations, so that processes that chmod after opening a file don't
3944  *     break. I don't like this because it opens a security hole, but since
3945  *     the nfs server opens a security hole the size of a barn door anyhow,
3946  *     what the heck.
3947  *
3948  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3949  * will return EPERM instead of EACCESS. EPERM is always an error.
3950  */
3951 static int
3952 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3953              int rdonly, struct thread *td, int override)
3954 {
3955         struct vattr vattr;
3956         int error;
3957
3958         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3959         if (flags & VWRITE) {
3960                 /* Just vn_writechk() changed to check rdonly */
3961                 /*
3962                  * Disallow write attempts on read-only file systems;
3963                  * unless the file is a socket or a block or character
3964                  * device resident on the file system.
3965                  */
3966                 if (rdonly || 
3967                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3968                         switch (vp->v_type) {
3969                         case VREG:
3970                         case VDIR:
3971                         case VLNK:
3972                                 return (EROFS);
3973                         default:
3974                                 break;
3975                         }
3976                 }
3977                 /*
3978                  * If there's shared text associated with
3979                  * the inode, we can't allow writing.
3980                  */
3981                 if (vp->v_flag & VTEXT)
3982                         return (ETXTBSY);
3983         }
3984         error = VOP_GETATTR(vp, &vattr);
3985         if (error)
3986                 return (error);
3987         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
3988         /*
3989          * Allow certain operations for the owner (reads and writes
3990          * on files that are already open).
3991          */
3992         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3993                 error = 0;
3994         return error;
3995 }
3996 #endif /* NFS_NOSERVER */
3997