Merge branch 'apic_io'
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  * $DragonFly: src/sys/vfs/nfs/nfs_serv.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
39  */
40
41 /*
42  * nfs version 2 and 3 server calls to vnode ops
43  * - these routines generally have 3 phases
44  *   1 - break down and validate rpc request in mbuf list
45  *   2 - do the vnode ops for the request
46  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
47  *   3 - build the rpc reply in an mbuf list
48  *   nb:
49  *      - do not mix the phases, since the nfsm_?? macros can return failures
50  *        on a bad rpc or similar and do not do any vrele() or vput()'s
51  *
52  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
53  *      error number iff error != 0 whereas
54  *      returning an error from the server function implies a fatal error
55  *      such as a badly constructed rpc request that should be dropped without
56  *      a reply.
57  *      For Version 3, nfsm_reply() does not return for the error case, since
58  *      most version 3 rpcs return more than the status for error cases.
59  *
60  * Other notes:
61  *      Warning: always pay careful attention to resource cleanup on return
62  *      and note that nfsm_*() macros can terminate a procedure on certain
63  *      errors.
64  */
65
66 #include <sys/param.h>
67 #include <sys/systm.h>
68 #include <sys/proc.h>
69 #include <sys/priv.h>
70 #include <sys/nlookup.h>
71 #include <sys/namei.h>
72 #include <sys/unistd.h>
73 #include <sys/vnode.h>
74 #include <sys/mount.h>
75 #include <sys/socket.h>
76 #include <sys/socketvar.h>
77 #include <sys/malloc.h>
78 #include <sys/mbuf.h>
79 #include <sys/dirent.h>
80 #include <sys/stat.h>
81 #include <sys/kernel.h>
82 #include <sys/sysctl.h>
83 #include <sys/buf.h>
84
85 #include <vm/vm.h>
86 #include <vm/vm_extern.h>
87 #include <vm/vm_zone.h>
88 #include <vm/vm_object.h>
89
90 #include <sys/buf2.h>
91
92 #include <sys/thread2.h>
93
94 #include "nfsproto.h"
95 #include "rpcv2.h"
96 #include "nfs.h"
97 #include "xdr_subs.h"
98 #include "nfsm_subs.h"
99
100 #ifdef NFSRV_DEBUG
101 #define nfsdbprintf(info)       kprintf info
102 #else
103 #define nfsdbprintf(info)
104 #endif
105
106 #define MAX_COMMIT_COUNT        (1024 * 1024)
107
108 #define NUM_HEURISTIC           1017
109 #define NHUSE_INIT              64
110 #define NHUSE_INC               16
111 #define NHUSE_MAX               2048
112
113 static struct nfsheur {
114     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
115     off_t nh_nextr;             /* next offset for sequential detection */
116     int nh_use;                 /* use count for selection */
117     int nh_seqcount;            /* heuristic */
118 } nfsheur[NUM_HEURISTIC];
119
120 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
121                       NFFIFO, NFNON };
122 #ifndef NFS_NOSERVER 
123 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
124                       NFCHR, NFNON };
125
126 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
127 int nfsrvw_procrastinate_v3 = 0;
128
129 static struct timespec  nfsver;
130
131 SYSCTL_DECL(_vfs_nfs);
132
133 int nfs_async;
134 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0, "");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0, "");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0, "");
139
140 static int nfsrv_access (struct mount *, struct vnode *, int,
141                         struct ucred *, int, struct thread *, int);
142 static void nfsrvw_coalesce (struct nfsrv_descript *,
143                 struct nfsrv_descript *);
144
145 /*
146  * nfs v3 access service
147  */
148 int
149 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
150               struct thread *td, struct mbuf **mrq)
151 {
152         struct sockaddr *nam = nfsd->nd_nam;
153         struct ucred *cred = &nfsd->nd_cr;
154         struct vnode *vp = NULL;
155         struct mount *mp = NULL;
156         nfsfh_t nfh;
157         fhandle_t *fhp;
158         int error = 0, rdonly, getret;
159         struct vattr vattr, *vap = &vattr;
160         u_long testmode, nfsmode;
161         struct nfsm_info info;
162         u_int32_t *tl;
163
164         info.dpos = nfsd->nd_dpos;
165         info.md = nfsd->nd_md;
166         info.mrep = nfsd->nd_mrep;
167         info.mreq = NULL;
168
169         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
170         fhp = &nfh.fh_generic;
171         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
172         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
173         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
174             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
175         if (error) {
176                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
177                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
178                 error = 0;
179                 goto nfsmout;
180         }
181         nfsmode = fxdr_unsigned(u_int32_t, *tl);
182         if ((nfsmode & NFSV3ACCESS_READ) &&
183                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
184                 nfsmode &= ~NFSV3ACCESS_READ;
185         if (vp->v_type == VDIR)
186                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
187                         NFSV3ACCESS_DELETE);
188         else
189                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
190         if ((nfsmode & testmode) &&
191                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
192                 nfsmode &= ~testmode;
193         if (vp->v_type == VDIR)
194                 testmode = NFSV3ACCESS_LOOKUP;
195         else
196                 testmode = NFSV3ACCESS_EXECUTE;
197         if ((nfsmode & testmode) &&
198                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
199                 nfsmode &= ~testmode;
200         getret = VOP_GETATTR(vp, vap);
201         vput(vp);
202         vp = NULL;
203         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
204                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
205         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
206         tl = nfsm_build(&info, NFSX_UNSIGNED);
207         *tl = txdr_unsigned(nfsmode);
208 nfsmout:
209         *mrq = info.mreq;
210         if (vp)
211                 vput(vp);
212         return(error);
213 }
214
215 /*
216  * nfs getattr service
217  */
218 int
219 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
220               struct thread *td, struct mbuf **mrq)
221 {
222         struct sockaddr *nam = nfsd->nd_nam;
223         struct ucred *cred = &nfsd->nd_cr;
224         struct nfs_fattr *fp;
225         struct vattr va;
226         struct vattr *vap = &va;
227         struct vnode *vp = NULL;
228         struct mount *mp = NULL;
229         nfsfh_t nfh;
230         fhandle_t *fhp;
231         int error = 0, rdonly;
232         struct nfsm_info info;
233
234         info.mrep = nfsd->nd_mrep;
235         info.md = nfsd->nd_md;
236         info.dpos = nfsd->nd_dpos;
237         info.mreq = NULL;
238
239         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
240         fhp = &nfh.fh_generic;
241         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
242         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
243                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
244         if (error) {
245                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
246                 error = 0;
247                 goto nfsmout;
248         }
249         error = VOP_GETATTR(vp, vap);
250         vput(vp);
251         vp = NULL;
252         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
253                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
254         if (error) {
255                 error = 0;
256                 goto nfsmout;
257         }
258         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
259         nfsm_srvfattr(nfsd, vap, fp);
260         /* fall through */
261
262 nfsmout:
263         *mrq = info.mreq;
264         if (vp)
265                 vput(vp);
266         return(error);
267 }
268
269 /*
270  * nfs setattr service
271  */
272 int
273 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
274               struct thread *td, struct mbuf **mrq)
275 {
276         struct sockaddr *nam = nfsd->nd_nam;
277         struct ucred *cred = &nfsd->nd_cr;
278         struct vattr va, preat;
279         struct vattr *vap = &va;
280         struct nfsv2_sattr *sp;
281         struct nfs_fattr *fp;
282         struct vnode *vp = NULL;
283         struct mount *mp = NULL;
284         nfsfh_t nfh;
285         fhandle_t *fhp;
286         u_int32_t *tl;
287         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
288         int gcheck = 0;
289         struct timespec guard;
290         struct nfsm_info info;
291
292         info.mrep = nfsd->nd_mrep;
293         info.mreq = NULL;
294         info.md = nfsd->nd_md;
295         info.dpos = nfsd->nd_dpos;
296         info.v3 = (nfsd->nd_flag & ND_NFSV3);
297
298         guard.tv_sec = 0;       /* fix compiler warning */
299         guard.tv_nsec = 0;
300
301         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
302         fhp = &nfh.fh_generic;
303         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
304         VATTR_NULL(vap);
305         if (info.v3) {
306                 ERROROUT(nfsm_srvsattr(&info, vap));
307                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
308                 gcheck = fxdr_unsigned(int, *tl);
309                 if (gcheck) {
310                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
311                         fxdr_nfsv3time(tl, &guard);
312                 }
313         } else {
314                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
315                 /*
316                  * Nah nah nah nah na nah
317                  * There is a bug in the Sun client that puts 0xffff in the mode
318                  * field of sattr when it should put in 0xffffffff. The u_short
319                  * doesn't sign extend.
320                  * --> check the low order 2 bytes for 0xffff
321                  */
322                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
323                         vap->va_mode = nfstov_mode(sp->sa_mode);
324                 if (sp->sa_uid != nfs_xdrneg1)
325                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
326                 if (sp->sa_gid != nfs_xdrneg1)
327                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
328                 if (sp->sa_size != nfs_xdrneg1)
329                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
330                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
331 #ifdef notyet
332                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
333 #else
334                         vap->va_atime.tv_sec =
335                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
336                         vap->va_atime.tv_nsec = 0;
337 #endif
338                 }
339                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
340                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
341
342         }
343
344         /*
345          * Now that we have all the fields, lets do it.
346          */
347         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
348                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
349         if (error) {
350                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
351                                       2 * NFSX_UNSIGNED, &error));
352                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
353                                  postat_ret, vap);
354                 error = 0;
355                 goto nfsmout;
356         }
357
358         /*
359          * vp now an active resource, pay careful attention to cleanup
360          */
361
362         if (info.v3) {
363                 error = preat_ret = VOP_GETATTR(vp, &preat);
364                 if (!error && gcheck &&
365                         (preat.va_ctime.tv_sec != guard.tv_sec ||
366                          preat.va_ctime.tv_nsec != guard.tv_nsec))
367                         error = NFSERR_NOT_SYNC;
368                 if (error) {
369                         vput(vp);
370                         vp = NULL;
371                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
372                                               NFSX_WCCDATA(info.v3), &error));
373                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
374                                          postat_ret, vap);
375                         error = 0;
376                         goto nfsmout;
377                 }
378         }
379
380         /*
381          * If the size is being changed write acces is required, otherwise
382          * just check for a read only file system.
383          */
384         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
385                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
386                         error = EROFS;
387                         goto out;
388                 }
389         } else {
390                 if (vp->v_type == VDIR) {
391                         error = EISDIR;
392                         goto out;
393                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
394                             td, 0)) != 0){ 
395                         goto out;
396                 }
397         }
398         error = VOP_SETATTR(vp, vap, cred);
399         postat_ret = VOP_GETATTR(vp, vap);
400         if (!error)
401                 error = postat_ret;
402 out:
403         vput(vp);
404         vp = NULL;
405         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
406                    NFSX_WCCORFATTR(info.v3), &error));
407         if (info.v3) {
408                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
409                                  postat_ret, vap);
410                 error = 0;
411                 goto nfsmout;
412         } else {
413                 fp = nfsm_build(&info, NFSX_V2FATTR);
414                 nfsm_srvfattr(nfsd, vap, fp);
415         }
416         /* fall through */
417
418 nfsmout:
419         *mrq = info.mreq;
420         if (vp)
421                 vput(vp);
422         return(error);
423 }
424
425 /*
426  * nfs lookup rpc
427  */
428 int
429 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
430              struct thread *td, struct mbuf **mrq)
431 {
432         struct sockaddr *nam = nfsd->nd_nam;
433         struct ucred *cred = &nfsd->nd_cr;
434         struct nfs_fattr *fp;
435         struct nlookupdata nd;
436         struct vnode *vp;
437         struct vnode *dirp;
438         struct nchandle nch;
439         nfsfh_t nfh;
440         fhandle_t *fhp;
441         int error = 0, len, dirattr_ret = 1;
442         int pubflag;
443         struct vattr va, dirattr, *vap = &va;
444         struct nfsm_info info;
445
446         info.mrep = nfsd->nd_mrep;
447         info.mreq = NULL;
448         info.md = nfsd->nd_md;
449         info.dpos = nfsd->nd_dpos;
450         info.v3 = (nfsd->nd_flag & ND_NFSV3);
451
452         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
453         nlookup_zero(&nd);
454         dirp = NULL;
455         vp = NULL;
456
457         fhp = &nfh.fh_generic;
458         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
459         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
460
461         pubflag = nfs_ispublicfh(fhp);
462
463         error = nfs_namei(&nd, cred, 0, NULL, &vp,
464                 fhp, len, slp, nam, &info.md, &info.dpos,
465                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
466
467         /*
468          * namei failure, only dirp to cleanup.  Clear out garbarge from
469          * structure in case macros jump to nfsmout.
470          */
471
472         if (error) {
473                 if (dirp) {
474                         if (info.v3)
475                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
476                         vrele(dirp);
477                         dirp = NULL;
478                 }
479                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
480                                       NFSX_POSTOPATTR(info.v3), &error));
481                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
482                 error = 0;
483                 goto nfsmout;
484         }
485
486         /*
487          * Locate index file for public filehandle
488          *
489          * error is 0 on entry and 0 on exit from this block.
490          */
491
492         if (pubflag) {
493                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
494                         /*
495                          * Setup call to lookup() to see if we can find
496                          * the index file. Arguably, this doesn't belong
497                          * in a kernel.. Ugh.  If an error occurs, do not
498                          * try to install an index file and then clear the
499                          * error.
500                          *
501                          * When we replace nd with ind and redirect ndp,
502                          * maintenance of ni_startdir and ni_vp shift to
503                          * ind and we have to clean them up in the old nd.
504                          * However, the cnd resource continues to be maintained
505                          * via the original nd.  Confused?  You aren't alone!
506                          */
507                         vn_unlock(vp);
508                         cache_copy(&nd.nl_nch, &nch);
509                         nlookup_done(&nd);
510                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
511                                                 UIO_SYSSPACE, 0, cred, &nch);
512                         cache_drop(&nch);
513                         if (error == 0)
514                                 error = nlookup(&nd);
515
516                         if (error == 0) {
517                                 /*
518                                  * Found an index file. Get rid of
519                                  * the old references.  transfer vp and
520                                  * load up the new vp.  Fortunately we do
521                                  * not have to deal with dvp, that would be
522                                  * a huge mess.
523                                  */
524                                 if (dirp)       
525                                         vrele(dirp);
526                                 dirp = vp;
527                                 vp = NULL;
528                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
529                                                         LK_EXCLUSIVE, &vp);
530                                 KKASSERT(error == 0);
531                         }
532                         error = 0;
533                 }
534                 /*
535                  * If the public filehandle was used, check that this lookup
536                  * didn't result in a filehandle outside the publicly exported
537                  * filesystem.  We clear the poor vp here to avoid lockups due
538                  * to NFS I/O.
539                  */
540
541                 if (vp->v_mount != nfs_pub.np_mount) {
542                         vput(vp);
543                         vp = NULL;
544                         error = EPERM;
545                 }
546         }
547
548         if (dirp) {
549                 if (info.v3)
550                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
551                 vrele(dirp);
552                 dirp = NULL;
553         }
554
555         /*
556          * Resources at this point:
557          *      ndp->ni_vp      may not be NULL
558          *
559          */
560
561         if (error) {
562                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
563                                       NFSX_POSTOPATTR(info.v3), &error));
564                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
565                 error = 0;
566                 goto nfsmout;
567         }
568
569         /*
570          * Clear out some resources prior to potentially blocking.  This
571          * is not as critical as ni_dvp resources in other routines, but
572          * it helps.
573          */
574         nlookup_done(&nd);
575
576         /*
577          * Get underlying attribute, then release remaining resources ( for
578          * the same potential blocking reason ) and reply.
579          */
580         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
581         error = VFS_VPTOFH(vp, &fhp->fh_fid);
582         if (!error)
583                 error = VOP_GETATTR(vp, vap);
584
585         vput(vp);
586         vp = NULL;
587         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
588                               NFSX_SRVFH(info.v3) +
589                               NFSX_POSTOPORFATTR(info.v3) +
590                               NFSX_POSTOPATTR(info.v3),
591                               &error));
592         if (error) {
593                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
594                 error = 0;
595                 goto nfsmout;
596         }
597         nfsm_srvfhtom(&info, fhp);
598         if (info.v3) {
599                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
600                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
601         } else {
602                 fp = nfsm_build(&info, NFSX_V2FATTR);
603                 nfsm_srvfattr(nfsd, vap, fp);
604         }
605
606 nfsmout:
607         *mrq = info.mreq;
608         if (dirp)
609                 vrele(dirp);
610         nlookup_done(&nd);              /* may be called twice */
611         if (vp)
612                 vput(vp);
613         return (error);
614 }
615
616 /*
617  * nfs readlink service
618  */
619 int
620 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
621                struct thread *td, struct mbuf **mrq)
622 {
623         struct sockaddr *nam = nfsd->nd_nam;
624         struct ucred *cred = &nfsd->nd_cr;
625         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
626         struct iovec *ivp = iv;
627         u_int32_t *tl;
628         int error = 0, rdonly, i, tlen, len, getret;
629         struct mbuf *mp1, *mp2, *mp3;
630         struct vnode *vp = NULL;
631         struct mount *mp = NULL;
632         struct vattr attr;
633         nfsfh_t nfh;
634         fhandle_t *fhp;
635         struct uio io, *uiop = &io;
636         struct nfsm_info info;
637
638         info.mrep = nfsd->nd_mrep;
639         info.mreq = NULL;
640         info.md = nfsd->nd_md;
641         info.dpos = nfsd->nd_dpos;
642         info.v3 = (nfsd->nd_flag & ND_NFSV3);
643
644         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
645 #ifndef nolint
646         mp2 = NULL;
647 #endif
648         mp3 = NULL;
649         fhp = &nfh.fh_generic;
650         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
651         len = 0;
652         i = 0;
653         while (len < NFS_MAXPATHLEN) {
654                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
655                 mp1->m_len = MCLBYTES;
656                 if (len == 0)
657                         mp3 = mp2 = mp1;
658                 else {
659                         mp2->m_next = mp1;
660                         mp2 = mp1;
661                 }
662                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
663                         mp1->m_len = NFS_MAXPATHLEN-len;
664                         len = NFS_MAXPATHLEN;
665                 } else
666                         len += mp1->m_len;
667                 ivp->iov_base = mtod(mp1, caddr_t);
668                 ivp->iov_len = mp1->m_len;
669                 i++;
670                 ivp++;
671         }
672         uiop->uio_iov = iv;
673         uiop->uio_iovcnt = i;
674         uiop->uio_offset = 0;
675         uiop->uio_resid = len;
676         uiop->uio_rw = UIO_READ;
677         uiop->uio_segflg = UIO_SYSSPACE;
678         uiop->uio_td = NULL;
679         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
680                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
681         if (error) {
682                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
683                                       2 * NFSX_UNSIGNED, &error));
684                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
685                 error = 0;
686                 goto nfsmout;
687         }
688         if (vp->v_type != VLNK) {
689                 if (info.v3)
690                         error = EINVAL;
691                 else
692                         error = ENXIO;
693                 goto out;
694         }
695         error = VOP_READLINK(vp, uiop, cred);
696 out:
697         getret = VOP_GETATTR(vp, &attr);
698         vput(vp);
699         vp = NULL;
700         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
701                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
702                              &error));
703         if (info.v3) {
704                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
705                 if (error) {
706                         error = 0;
707                         goto nfsmout;
708                 }
709         }
710         if (uiop->uio_resid > 0) {
711                 len -= uiop->uio_resid;
712                 tlen = nfsm_rndup(len);
713                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
714         }
715         tl = nfsm_build(&info, NFSX_UNSIGNED);
716         *tl = txdr_unsigned(len);
717         info.mb->m_next = mp3;
718         mp3 = NULL;
719 nfsmout:
720         *mrq = info.mreq;
721         if (mp3)
722                 m_freem(mp3);
723         if (vp)
724                 vput(vp);
725         return(error);
726 }
727
728 /*
729  * nfs read service
730  */
731 int
732 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
733            struct thread *td, struct mbuf **mrq)
734 {
735         struct nfsm_info info;
736         struct sockaddr *nam = nfsd->nd_nam;
737         struct ucred *cred = &nfsd->nd_cr;
738         struct iovec *iv;
739         struct iovec *iv2;
740         struct mbuf *m;
741         struct nfs_fattr *fp;
742         u_int32_t *tl;
743         int i;
744         int reqlen;
745         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
746         struct mbuf *m2;
747         struct vnode *vp = NULL;
748         struct mount *mp = NULL;
749         nfsfh_t nfh;
750         fhandle_t *fhp;
751         struct uio io, *uiop = &io;
752         struct vattr va, *vap = &va;
753         struct nfsheur *nh;
754         off_t off;
755         int ioflag = 0;
756
757         info.mrep = nfsd->nd_mrep;
758         info.mreq = NULL;
759         info.md = nfsd->nd_md;
760         info.dpos = nfsd->nd_dpos;
761         info.v3 = (nfsd->nd_flag & ND_NFSV3);
762
763         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
764         fhp = &nfh.fh_generic;
765         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
766         if (info.v3) {
767                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
768                 off = fxdr_hyper(tl);
769         } else {
770                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
771                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
772         }
773         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
774                                             NFS_SRVMAXDATA(nfsd), &error));
775
776         /*
777          * Reference vp.  If an error occurs, vp will be invalid, but we
778          * have to NULL it just in case.  The macros might goto nfsmout
779          * as well.
780          */
781
782         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
783                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
784         if (error) {
785                 vp = NULL;
786                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
787                                       2 * NFSX_UNSIGNED, &error));
788                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
789                 error = 0;
790                 goto nfsmout;
791         }
792
793         if (vp->v_type != VREG) {
794                 if (info.v3)
795                         error = EINVAL;
796                 else
797                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
798         }
799         if (!error) {
800             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
801                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
802         }
803         getret = VOP_GETATTR(vp, vap);
804         if (!error)
805                 error = getret;
806         if (error) {
807                 vput(vp);
808                 vp = NULL;
809                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
810                                       NFSX_POSTOPATTR(info.v3), &error));
811                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
812                 error = 0;
813                 goto nfsmout;
814         }
815
816         /*
817          * Calculate byte count to read
818          */
819
820         if (off >= vap->va_size)
821                 cnt = 0;
822         else if ((off + reqlen) > vap->va_size)
823                 cnt = vap->va_size - off;
824         else
825                 cnt = reqlen;
826
827         /*
828          * Calculate seqcount for heuristic
829          */
830
831         {
832                 int hi;
833                 int try = 32;
834
835                 /*
836                  * Locate best candidate
837                  */
838
839                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
840                 nh = &nfsheur[hi];
841
842                 while (try--) {
843                         if (nfsheur[hi].nh_vp == vp) {
844                                 nh = &nfsheur[hi];
845                                 break;
846                         }
847                         if (nfsheur[hi].nh_use > 0)
848                                 --nfsheur[hi].nh_use;
849                         hi = (hi + 1) % NUM_HEURISTIC;
850                         if (nfsheur[hi].nh_use < nh->nh_use)
851                                 nh = &nfsheur[hi];
852                 }
853
854                 if (nh->nh_vp != vp) {
855                         nh->nh_vp = vp;
856                         nh->nh_nextr = off;
857                         nh->nh_use = NHUSE_INIT;
858                         if (off == 0)
859                                 nh->nh_seqcount = 4;
860                         else
861                                 nh->nh_seqcount = 1;
862                 }
863
864                 /*
865                  * Calculate heuristic
866                  */
867
868                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
869                         if (++nh->nh_seqcount > IO_SEQMAX)
870                                 nh->nh_seqcount = IO_SEQMAX;
871                 } else if (nh->nh_seqcount > 1) {
872                         nh->nh_seqcount = 1;
873                 } else {
874                         nh->nh_seqcount = 0;
875                 }
876                 nh->nh_use += NHUSE_INC;
877                 if (nh->nh_use > NHUSE_MAX)
878                         nh->nh_use = NHUSE_MAX;
879                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
880         }
881
882         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
883                               NFSX_POSTOPORFATTR(info.v3) +
884                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
885                               &error));
886         if (info.v3) {
887                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
888                 *tl++ = nfs_true;
889                 fp = (struct nfs_fattr *)tl;
890                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
891         } else {
892                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
893                 fp = (struct nfs_fattr *)tl;
894                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
895         }
896         len = left = nfsm_rndup(cnt);
897         if (cnt > 0) {
898                 /*
899                  * Generate the mbuf list with the uio_iov ref. to it.
900                  */
901                 i = 0;
902                 m = m2 = info.mb;
903                 while (left > 0) {
904                         siz = min(M_TRAILINGSPACE(m), left);
905                         if (siz > 0) {
906                                 left -= siz;
907                                 i++;
908                         }
909                         if (left > 0) {
910                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
911                                 m->m_len = 0;
912                                 m2->m_next = m;
913                                 m2 = m;
914                         }
915                 }
916                 MALLOC(iv, struct iovec *, i * sizeof (struct iovec),
917                        M_TEMP, M_WAITOK);
918                 uiop->uio_iov = iv2 = iv;
919                 m = info.mb;
920                 left = len;
921                 i = 0;
922                 while (left > 0) {
923                         if (m == NULL)
924                                 panic("nfsrv_read iov");
925                         siz = min(M_TRAILINGSPACE(m), left);
926                         if (siz > 0) {
927                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
928                                 iv->iov_len = siz;
929                                 m->m_len += siz;
930                                 left -= siz;
931                                 iv++;
932                                 i++;
933                         }
934                         m = m->m_next;
935                 }
936                 uiop->uio_iovcnt = i;
937                 uiop->uio_offset = off;
938                 uiop->uio_resid = len;
939                 uiop->uio_rw = UIO_READ;
940                 uiop->uio_segflg = UIO_SYSSPACE;
941                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
942                 off = uiop->uio_offset;
943                 nh->nh_nextr = off;
944                 FREE((caddr_t)iv2, M_TEMP);
945                 if (error || (getret = VOP_GETATTR(vp, vap))) {
946                         if (!error)
947                                 error = getret;
948                         m_freem(info.mreq);
949                         info.mreq = NULL;
950                         vput(vp);
951                         vp = NULL;
952                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
953                                               NFSX_POSTOPATTR(info.v3),
954                                               &error));
955                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
956                         error = 0;
957                         goto nfsmout;
958                 }
959         } else {
960                 uiop->uio_resid = 0;
961         }
962         vput(vp);
963         vp = NULL;
964         nfsm_srvfattr(nfsd, vap, fp);
965         tlen = len - uiop->uio_resid;
966         cnt = cnt < tlen ? cnt : tlen;
967         tlen = nfsm_rndup(cnt);
968         if (len != tlen || tlen != cnt)
969                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
970         if (info.v3) {
971                 *tl++ = txdr_unsigned(cnt);
972                 if (len < reqlen)
973                         *tl++ = nfs_true;
974                 else
975                         *tl++ = nfs_false;
976         }
977         *tl = txdr_unsigned(cnt);
978 nfsmout:
979         *mrq = info.mreq;
980         if (vp)
981                 vput(vp);
982         return(error);
983 }
984
985 /*
986  * nfs write service
987  */
988 int
989 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
990             struct thread *td, struct mbuf **mrq)
991 {
992         struct sockaddr *nam = nfsd->nd_nam;
993         struct ucred *cred = &nfsd->nd_cr;
994         struct iovec *ivp;
995         int i, cnt;
996         struct mbuf *mp1;
997         struct nfs_fattr *fp;
998         struct iovec *iv;
999         struct vattr va, forat;
1000         struct vattr *vap = &va;
1001         u_int32_t *tl;
1002         int error = 0, rdonly, len, forat_ret = 1;
1003         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1004         int stable = NFSV3WRITE_FILESYNC;
1005         struct vnode *vp = NULL;
1006         struct mount *mp = NULL;
1007         nfsfh_t nfh;
1008         fhandle_t *fhp;
1009         struct uio io, *uiop = &io;
1010         struct nfsm_info info;
1011         off_t off;
1012
1013         info.mrep = nfsd->nd_mrep;
1014         info.mreq = NULL;
1015         info.md = nfsd->nd_md;
1016         info.dpos = nfsd->nd_dpos;
1017         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1018
1019         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1020         if (info.mrep == NULL) {
1021                 error = 0;
1022                 goto nfsmout;
1023         }
1024         fhp = &nfh.fh_generic;
1025         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1026         if (info.v3) {
1027                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1028                 off = fxdr_hyper(tl);
1029                 tl += 3;
1030                 stable = fxdr_unsigned(int, *tl++);
1031         } else {
1032                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1033                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1034                 tl += 2;
1035                 if (nfs_async)
1036                         stable = NFSV3WRITE_UNSTABLE;
1037         }
1038         retlen = len = fxdr_unsigned(int32_t, *tl);
1039         cnt = i = 0;
1040
1041         /*
1042          * For NFS Version 2, it is not obvious what a write of zero length
1043          * should do, but I might as well be consistent with Version 3,
1044          * which is to return ok so long as there are no permission problems.
1045          */
1046         if (len > 0) {
1047             zeroing = 1;
1048             mp1 = info.mrep;
1049             while (mp1) {
1050                 if (mp1 == info.md) {
1051                         zeroing = 0;
1052                         adjust = info.dpos - mtod(mp1, caddr_t);
1053                         mp1->m_len -= adjust;
1054                         if (mp1->m_len > 0 && adjust > 0)
1055                                 mp1->m_data += adjust;
1056                 }
1057                 if (zeroing)
1058                         mp1->m_len = 0;
1059                 else if (mp1->m_len > 0) {
1060                         i += mp1->m_len;
1061                         if (i > len) {
1062                                 mp1->m_len -= (i - len);
1063                                 zeroing = 1;
1064                         }
1065                         if (mp1->m_len > 0)
1066                                 cnt++;
1067                 }
1068                 mp1 = mp1->m_next;
1069             }
1070         }
1071         if (len > NFS_MAXDATA || len < 0 || i < len) {
1072                 error = EIO;
1073                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1074                                       2 * NFSX_UNSIGNED, &error));
1075                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1076                                  aftat_ret, vap);
1077                 error = 0;
1078                 goto nfsmout;
1079         }
1080         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1081                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1082         if (error) {
1083                 vp = NULL;
1084                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1085                                       2 * NFSX_UNSIGNED, &error));
1086                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1087                                  aftat_ret, vap);
1088                 error = 0;
1089                 goto nfsmout;
1090         }
1091         if (info.v3)
1092                 forat_ret = VOP_GETATTR(vp, &forat);
1093         if (vp->v_type != VREG) {
1094                 if (info.v3)
1095                         error = EINVAL;
1096                 else
1097                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1098         }
1099         if (!error) {
1100                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1101         }
1102         if (error) {
1103                 vput(vp);
1104                 vp = NULL;
1105                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1106                                       NFSX_WCCDATA(info.v3), &error));
1107                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1108                                  aftat_ret, vap);
1109                 error = 0;
1110                 goto nfsmout;
1111         }
1112
1113         if (len > 0) {
1114             MALLOC(ivp, struct iovec *, cnt * sizeof (struct iovec), M_TEMP,
1115                 M_WAITOK);
1116             uiop->uio_iov = iv = ivp;
1117             uiop->uio_iovcnt = cnt;
1118             mp1 = info.mrep;
1119             while (mp1) {
1120                 if (mp1->m_len > 0) {
1121                         ivp->iov_base = mtod(mp1, caddr_t);
1122                         ivp->iov_len = mp1->m_len;
1123                         ivp++;
1124                 }
1125                 mp1 = mp1->m_next;
1126             }
1127
1128             /*
1129              * XXX
1130              * The IO_METASYNC flag indicates that all metadata (and not just
1131              * enough to ensure data integrity) mus be written to stable storage
1132              * synchronously.
1133              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1134              */
1135             if (stable == NFSV3WRITE_UNSTABLE)
1136                 ioflags = IO_NODELOCKED;
1137             else if (stable == NFSV3WRITE_DATASYNC)
1138                 ioflags = (IO_SYNC | IO_NODELOCKED);
1139             else
1140                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1141             uiop->uio_resid = len;
1142             uiop->uio_rw = UIO_WRITE;
1143             uiop->uio_segflg = UIO_SYSSPACE;
1144             uiop->uio_td = NULL;
1145             uiop->uio_offset = off;
1146             error = VOP_WRITE(vp, uiop, ioflags, cred);
1147             nfsstats.srvvop_writes++;
1148             FREE((caddr_t)iv, M_TEMP);
1149         }
1150         aftat_ret = VOP_GETATTR(vp, vap);
1151         vput(vp);
1152         vp = NULL;
1153         if (!error)
1154                 error = aftat_ret;
1155         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1156                               NFSX_PREOPATTR(info.v3) +
1157                               NFSX_POSTOPORFATTR(info.v3) +
1158                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1159                               &error));
1160         if (info.v3) {
1161                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1162                                  aftat_ret, vap);
1163                 if (error) {
1164                         error = 0;
1165                         goto nfsmout;
1166                 }
1167                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1168                 *tl++ = txdr_unsigned(retlen);
1169                 /*
1170                  * If nfs_async is set, then pretend the write was FILESYNC.
1171                  */
1172                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1173                         *tl++ = txdr_unsigned(stable);
1174                 else
1175                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1176                 /*
1177                  * Actually, there is no need to txdr these fields,
1178                  * but it may make the values more human readable,
1179                  * for debugging purposes.
1180                  */
1181                 if (nfsver.tv_sec == 0)
1182                         nfsver = boottime;
1183                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1184                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1185         } else {
1186                 fp = nfsm_build(&info, NFSX_V2FATTR);
1187                 nfsm_srvfattr(nfsd, vap, fp);
1188         }
1189 nfsmout:
1190         *mrq = info.mreq;
1191         if (vp)
1192                 vput(vp);
1193         return(error);
1194 }
1195
1196 /*
1197  * NFS write service with write gathering support. Called when
1198  * nfsrvw_procrastinate > 0.
1199  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1200  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1201  * Jan. 1994.
1202  */
1203 int
1204 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1205                   struct thread *td, struct mbuf **mrq)
1206 {
1207         struct iovec *ivp;
1208         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1209         struct nfs_fattr *fp;
1210         int i;
1211         struct iovec *iov;
1212         struct nfsrvw_delayhash *wpp;
1213         struct ucred *cred;
1214         struct vattr va, forat;
1215         u_int32_t *tl;
1216         int error = 0, rdonly, len, forat_ret = 1;
1217         int ioflags, aftat_ret = 1, adjust, zeroing;
1218         struct mbuf *mp1;
1219         struct vnode *vp = NULL;
1220         struct mount *mp = NULL;
1221         struct uio io, *uiop = &io;
1222         u_quad_t cur_usec;
1223         struct nfsm_info info;
1224
1225         info.mreq = NULL;
1226
1227         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1228 #ifndef nolint
1229         i = 0;
1230         len = 0;
1231 #endif
1232         if (*ndp) {
1233             nfsd = *ndp;
1234             *ndp = NULL;
1235             info.mrep = nfsd->nd_mrep;
1236             info.mreq = NULL;
1237             info.md = nfsd->nd_md;
1238             info.dpos = nfsd->nd_dpos;
1239             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1240             cred = &nfsd->nd_cr;
1241             LIST_INIT(&nfsd->nd_coalesce);
1242             nfsd->nd_mreq = NULL;
1243             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1244             cur_usec = nfs_curusec();
1245             nfsd->nd_time = cur_usec +
1246                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1247     
1248             /*
1249              * Now, get the write header..
1250              */
1251             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1252             if (info.v3) {
1253                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1254                 nfsd->nd_off = fxdr_hyper(tl);
1255                 tl += 3;
1256                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1257             } else {
1258                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1259                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1260                 tl += 2;
1261                 if (nfs_async)
1262                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1263             }
1264             len = fxdr_unsigned(int32_t, *tl);
1265             nfsd->nd_len = len;
1266             nfsd->nd_eoff = nfsd->nd_off + len;
1267     
1268             /*
1269              * Trim the header out of the mbuf list and trim off any trailing
1270              * junk so that the mbuf list has only the write data.
1271              */
1272             zeroing = 1;
1273             i = 0;
1274             mp1 = info.mrep;
1275             while (mp1) {
1276                 if (mp1 == info.md) {
1277                     zeroing = 0;
1278                     adjust = info.dpos - mtod(mp1, caddr_t);
1279                     mp1->m_len -= adjust;
1280                     if (mp1->m_len > 0 && adjust > 0)
1281                         mp1->m_data += adjust;
1282                 }
1283                 if (zeroing)
1284                     mp1->m_len = 0;
1285                 else {
1286                     i += mp1->m_len;
1287                     if (i > len) {
1288                         mp1->m_len -= (i - len);
1289                         zeroing = 1;
1290                     }
1291                 }
1292                 mp1 = mp1->m_next;
1293             }
1294             if (len > NFS_MAXDATA || len < 0  || i < len) {
1295 nfsmout:
1296                 m_freem(info.mrep);
1297                 info.mrep = NULL;
1298                 error = EIO;
1299                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1300                 if (info.v3) {
1301                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1302                                      aftat_ret, &va);
1303                 }
1304                 nfsd->nd_mreq = info.mreq;
1305                 nfsd->nd_mrep = NULL;
1306                 nfsd->nd_time = 0;
1307             }
1308     
1309             /*
1310              * Add this entry to the hash and time queues.
1311              */
1312             owp = NULL;
1313             wp = slp->ns_tq.lh_first;
1314             while (wp && wp->nd_time < nfsd->nd_time) {
1315                 owp = wp;
1316                 wp = wp->nd_tq.le_next;
1317             }
1318             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1319             if (owp) {
1320                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1321             } else {
1322                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1323             }
1324             if (nfsd->nd_mrep) {
1325                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1326                 owp = NULL;
1327                 wp = wpp->lh_first;
1328                 while (wp &&
1329                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1330                     owp = wp;
1331                     wp = wp->nd_hash.le_next;
1332                 }
1333                 while (wp && wp->nd_off < nfsd->nd_off &&
1334                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1335                     owp = wp;
1336                     wp = wp->nd_hash.le_next;
1337                 }
1338                 if (owp) {
1339                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1340
1341                     /*
1342                      * Search the hash list for overlapping entries and
1343                      * coalesce.
1344                      */
1345                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1346                         wp = nfsd->nd_hash.le_next;
1347                         if (NFSW_SAMECRED(owp, nfsd))
1348                             nfsrvw_coalesce(owp, nfsd);
1349                     }
1350                 } else {
1351                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1352                 }
1353             }
1354         }
1355     
1356         /*
1357          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1358          * and generate the associated reply mbuf list(s).
1359          */
1360 loop1:
1361         cur_usec = nfs_curusec();
1362         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1363                 owp = nfsd->nd_tq.le_next;
1364                 if (nfsd->nd_time > cur_usec)
1365                     break;
1366                 if (nfsd->nd_mreq)
1367                     continue;
1368                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1369                 LIST_REMOVE(nfsd, nd_tq);
1370                 LIST_REMOVE(nfsd, nd_hash);
1371                 info.mrep = nfsd->nd_mrep;
1372                 info.mreq = NULL;
1373                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1374                 nfsd->nd_mrep = NULL;
1375                 cred = &nfsd->nd_cr;
1376                 forat_ret = aftat_ret = 1;
1377                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1378                                      nfsd->nd_nam, &rdonly,
1379                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1380                 if (!error) {
1381                     if (info.v3)
1382                         forat_ret = VOP_GETATTR(vp, &forat);
1383                     if (vp->v_type != VREG) {
1384                         if (info.v3)
1385                             error = EINVAL;
1386                         else
1387                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1388                     }
1389                 } else {
1390                     vp = NULL;
1391                 }
1392                 if (!error) {
1393                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1394                 }
1395     
1396                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1397                     ioflags = IO_NODELOCKED;
1398                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1399                     ioflags = (IO_SYNC | IO_NODELOCKED);
1400                 else
1401                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1402                 uiop->uio_rw = UIO_WRITE;
1403                 uiop->uio_segflg = UIO_SYSSPACE;
1404                 uiop->uio_td = NULL;
1405                 uiop->uio_offset = nfsd->nd_off;
1406                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1407                 if (uiop->uio_resid > 0) {
1408                     mp1 = info.mrep;
1409                     i = 0;
1410                     while (mp1) {
1411                         if (mp1->m_len > 0)
1412                             i++;
1413                         mp1 = mp1->m_next;
1414                     }
1415                     uiop->uio_iovcnt = i;
1416                     MALLOC(iov, struct iovec *, i * sizeof (struct iovec), 
1417                         M_TEMP, M_WAITOK);
1418                     uiop->uio_iov = ivp = iov;
1419                     mp1 = info.mrep;
1420                     while (mp1) {
1421                         if (mp1->m_len > 0) {
1422                             ivp->iov_base = mtod(mp1, caddr_t);
1423                             ivp->iov_len = mp1->m_len;
1424                             ivp++;
1425                         }
1426                         mp1 = mp1->m_next;
1427                     }
1428                     if (!error) {
1429                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1430                         nfsstats.srvvop_writes++;
1431                     }
1432                     FREE((caddr_t)iov, M_TEMP);
1433                 }
1434                 m_freem(info.mrep);
1435                 info.mrep = NULL;
1436                 if (vp) {
1437                     aftat_ret = VOP_GETATTR(vp, &va);
1438                     vput(vp);
1439                     vp = NULL;
1440                 }
1441
1442                 /*
1443                  * Loop around generating replies for all write rpcs that have
1444                  * now been completed.
1445                  */
1446                 swp = nfsd;
1447                 do {
1448                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1449                     if (error) {
1450                         nfsm_writereply(&info, nfsd, slp, error,
1451                                         NFSX_WCCDATA(info.v3));
1452                         if (info.v3) {
1453                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1454                                              aftat_ret, &va);
1455                         }
1456                     } else {
1457                         nfsm_writereply(&info, nfsd, slp, error,
1458                                         NFSX_PREOPATTR(info.v3) +
1459                                         NFSX_POSTOPORFATTR(info.v3) +
1460                                         2 * NFSX_UNSIGNED +
1461                                         NFSX_WRITEVERF(info.v3));
1462                         if (info.v3) {
1463                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1464                                              aftat_ret, &va);
1465                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1466                             *tl++ = txdr_unsigned(nfsd->nd_len);
1467                             *tl++ = txdr_unsigned(swp->nd_stable);
1468                             /*
1469                              * Actually, there is no need to txdr these fields,
1470                              * but it may make the values more human readable,
1471                              * for debugging purposes.
1472                              */
1473                             if (nfsver.tv_sec == 0)
1474                                     nfsver = boottime;
1475                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1476                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1477                         } else {
1478                             fp = nfsm_build(&info, NFSX_V2FATTR);
1479                             nfsm_srvfattr(nfsd, &va, fp);
1480                         }
1481                     }
1482                     nfsd->nd_mreq = info.mreq;
1483                     if (nfsd->nd_mrep)
1484                         panic("nfsrv_write: nd_mrep not free");
1485
1486                     /*
1487                      * Done. Put it at the head of the timer queue so that
1488                      * the final phase can return the reply.
1489                      */
1490                     if (nfsd != swp) {
1491                         nfsd->nd_time = 0;
1492                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1493                     }
1494                     nfsd = swp->nd_coalesce.lh_first;
1495                     if (nfsd) {
1496                         LIST_REMOVE(nfsd, nd_tq);
1497                     }
1498                 } while (nfsd);
1499                 swp->nd_time = 0;
1500                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1501                 goto loop1;
1502         }
1503
1504         /*
1505          * Search for a reply to return.
1506          */
1507         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1508                 if (nfsd->nd_mreq) {
1509                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1510                     LIST_REMOVE(nfsd, nd_tq);
1511                     break;
1512                 }
1513         }
1514         if (nfsd) {
1515                 *ndp = nfsd;
1516                 *mrq = nfsd->nd_mreq;
1517         } else {
1518                 *ndp = NULL;
1519                 *mrq = NULL;
1520         }
1521         return (0);
1522 }
1523
1524 /*
1525  * Coalesce the write request nfsd into owp. To do this we must:
1526  * - remove nfsd from the queues
1527  * - merge nfsd->nd_mrep into owp->nd_mrep
1528  * - update the nd_eoff and nd_stable for owp
1529  * - put nfsd on owp's nd_coalesce list
1530  * NB: Must be called at splsoftclock().
1531  */
1532 static void
1533 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1534 {
1535         int overlap;
1536         struct mbuf *mp1;
1537         struct nfsrv_descript *p;
1538
1539         NFS_DPF(WG, ("C%03x-%03x",
1540                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1541         LIST_REMOVE(nfsd, nd_hash);
1542         LIST_REMOVE(nfsd, nd_tq);
1543         if (owp->nd_eoff < nfsd->nd_eoff) {
1544             overlap = owp->nd_eoff - nfsd->nd_off;
1545             if (overlap < 0)
1546                 panic("nfsrv_coalesce: bad off");
1547             if (overlap > 0)
1548                 m_adj(nfsd->nd_mrep, overlap);
1549             mp1 = owp->nd_mrep;
1550             while (mp1->m_next)
1551                 mp1 = mp1->m_next;
1552             mp1->m_next = nfsd->nd_mrep;
1553             owp->nd_eoff = nfsd->nd_eoff;
1554         } else
1555             m_freem(nfsd->nd_mrep);
1556         nfsd->nd_mrep = NULL;
1557         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1558             owp->nd_stable = NFSV3WRITE_FILESYNC;
1559         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1560             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1561             owp->nd_stable = NFSV3WRITE_DATASYNC;
1562         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1563
1564         /*
1565          * If nfsd had anything else coalesced into it, transfer them
1566          * to owp, otherwise their replies will never get sent.
1567          */
1568         for (p = nfsd->nd_coalesce.lh_first; p;
1569              p = nfsd->nd_coalesce.lh_first) {
1570             LIST_REMOVE(p, nd_tq);
1571             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1572         }
1573 }
1574
1575 /*
1576  * nfs create service
1577  * now does a truncate to 0 length via. setattr if it already exists
1578  */
1579 int
1580 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1581              struct thread *td, struct mbuf **mrq)
1582 {
1583         struct sockaddr *nam = nfsd->nd_nam;
1584         struct ucred *cred = &nfsd->nd_cr;
1585         struct nfs_fattr *fp;
1586         struct vattr va, dirfor, diraft;
1587         struct vattr *vap = &va;
1588         struct nfsv2_sattr *sp;
1589         u_int32_t *tl;
1590         struct nlookupdata nd;
1591         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1592         udev_t rdev = NOUDEV;
1593         caddr_t cp;
1594         int how, exclusive_flag = 0;
1595         struct vnode *dirp;
1596         struct vnode *dvp;
1597         struct vnode *vp;
1598         struct mount *mp;
1599         nfsfh_t nfh;
1600         fhandle_t *fhp;
1601         u_quad_t tempsize;
1602         u_char cverf[NFSX_V3CREATEVERF];
1603         struct nfsm_info info;
1604
1605         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1606         nlookup_zero(&nd);
1607         dirp = NULL;
1608         dvp = NULL;
1609         vp = NULL;
1610
1611         info.mrep = nfsd->nd_mrep;
1612         info.mreq = NULL;
1613         info.md = nfsd->nd_md;
1614         info.dpos = nfsd->nd_dpos;
1615         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1616
1617         fhp = &nfh.fh_generic;
1618         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1619         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1620
1621         /*
1622          * Call namei and do initial cleanup to get a few things
1623          * out of the way.  If we get an initial error we cleanup
1624          * and return here to avoid special-casing the invalid nd
1625          * structure through the rest of the case.  dirp may be
1626          * set even if an error occurs, but the nd structure will not
1627          * be valid at all if an error occurs so we have to invalidate it
1628          * prior to calling nfsm_reply ( which might goto nfsmout ).
1629          */
1630         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1631                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1632                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1633         mp = vfs_getvfs(&fhp->fh_fsid);
1634
1635         if (dirp) {
1636                 if (info.v3) {
1637                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1638                 } else {
1639                         vrele(dirp);
1640                         dirp = NULL;
1641                 }
1642         }
1643         if (error) {
1644                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1645                                       NFSX_WCCDATA(info.v3), &error));
1646                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1647                                  diraft_ret, &diraft);
1648                 error = 0;
1649                 goto nfsmout;
1650         }
1651
1652         /*
1653          * No error.  Continue.  State:
1654          *
1655          *      dirp            may be valid
1656          *      vp              may be valid or NULL if the target does not
1657          *                      exist.
1658          *      dvp             is valid
1659          *
1660          * The error state is set through the code and we may also do some
1661          * opportunistic releasing of vnodes to avoid holding locks through
1662          * NFS I/O.  The cleanup at the end is a catch-all
1663          */
1664
1665         VATTR_NULL(vap);
1666         if (info.v3) {
1667                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1668                 how = fxdr_unsigned(int, *tl);
1669                 switch (how) {
1670                 case NFSV3CREATE_GUARDED:
1671                         if (vp) {
1672                                 error = EEXIST;
1673                                 break;
1674                         }
1675                         /* fall through */
1676                 case NFSV3CREATE_UNCHECKED:
1677                         ERROROUT(nfsm_srvsattr(&info, vap));
1678                         break;
1679                 case NFSV3CREATE_EXCLUSIVE:
1680                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1681                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1682                         exclusive_flag = 1;
1683                         break;
1684                 };
1685                 vap->va_type = VREG;
1686         } else {
1687                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1688                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1689                 if (vap->va_type == VNON)
1690                         vap->va_type = VREG;
1691                 vap->va_mode = nfstov_mode(sp->sa_mode);
1692                 switch (vap->va_type) {
1693                 case VREG:
1694                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1695                         if (tsize != -1)
1696                                 vap->va_size = (u_quad_t)tsize;
1697                         break;
1698                 case VCHR:
1699                 case VBLK:
1700                 case VFIFO:
1701                         rdev = fxdr_unsigned(long, sp->sa_size);
1702                         break;
1703                 default:
1704                         break;
1705                 };
1706         }
1707
1708         /*
1709          * Iff doesn't exist, create it
1710          * otherwise just truncate to 0 length
1711          *   should I set the mode too ?
1712          *
1713          * The only possible error we can have at this point is EEXIST. 
1714          * nd.ni_vp will also be non-NULL in that case.
1715          */
1716         if (vp == NULL) {
1717                 if (vap->va_mode == (mode_t)VNOVAL)
1718                         vap->va_mode = 0;
1719                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1720                         vn_unlock(dvp);
1721                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1722                                             nd.nl_cred, vap);
1723                         vrele(dvp);
1724                         dvp = NULL;
1725                         if (error == 0) {
1726                                 if (exclusive_flag) {
1727                                         exclusive_flag = 0;
1728                                         VATTR_NULL(vap);
1729                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1730                                                 NFSX_V3CREATEVERF);
1731                                         error = VOP_SETATTR(vp, vap, cred);
1732                                 }
1733                         }
1734                 } else if (
1735                         vap->va_type == VCHR || 
1736                         vap->va_type == VBLK ||
1737                         vap->va_type == VFIFO
1738                 ) {
1739                         /*
1740                          * Handle SysV FIFO node special cases.  All other
1741                          * devices require super user to access.
1742                          */
1743                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1744                                 vap->va_type = VFIFO;
1745                         if (vap->va_type != VFIFO &&
1746                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1747                                 goto nfsmreply0;
1748                         }
1749                         vap->va_rmajor = umajor(rdev);
1750                         vap->va_rminor = uminor(rdev);
1751
1752                         vn_unlock(dvp);
1753                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1754                         vrele(dvp);
1755                         dvp = NULL;
1756                         if (error)
1757                                 goto nfsmreply0;
1758 #if 0
1759                         /*
1760                          * XXX what is this junk supposed to do ?
1761                          */
1762
1763                         vput(vp);
1764                         vp = NULL;
1765
1766                         /*
1767                          * release dvp prior to lookup
1768                          */
1769                         vput(dvp);
1770                         dvp = NULL;
1771
1772                         /*
1773                          * Setup for lookup. 
1774                          *
1775                          * Even though LOCKPARENT was cleared, ni_dvp may
1776                          * be garbage. 
1777                          */
1778                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1779                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1780                         nd.ni_cnd.cn_td = td;
1781                         nd.ni_cnd.cn_cred = cred;
1782
1783                         error = lookup(&nd);
1784                         nd.ni_dvp = NULL;
1785
1786                         if (error != 0) {
1787                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1788                                                       0, &error));
1789                                 /* fall through on certain errors */
1790                         }
1791                         nfsrv_object_create(nd.ni_vp);
1792                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1793                                 error = EINVAL;
1794                                 goto nfsmreply0;
1795                         }
1796 #endif
1797                 } else {
1798                         error = ENXIO;
1799                 }
1800         } else {
1801                 if (vap->va_size != -1) {
1802                         error = nfsrv_access(mp, vp, VWRITE, cred,
1803                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1804                         if (!error) {
1805                                 tempsize = vap->va_size;
1806                                 VATTR_NULL(vap);
1807                                 vap->va_size = tempsize;
1808                                 error = VOP_SETATTR(vp, vap, cred);
1809                         }
1810                 }
1811         }
1812
1813         if (!error) {
1814                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1815                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1816                 if (!error)
1817                         error = VOP_GETATTR(vp, vap);
1818         }
1819         if (info.v3) {
1820                 if (exclusive_flag && !error &&
1821                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1822                         error = EEXIST;
1823                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1824                 vrele(dirp);
1825                 dirp = NULL;
1826         }
1827         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1828                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1829                               NFSX_WCCDATA(info.v3),
1830                               &error));
1831         if (info.v3) {
1832                 if (!error) {
1833                         nfsm_srvpostop_fh(&info, fhp);
1834                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1835                 }
1836                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1837                                  diraft_ret, &diraft);
1838                 error = 0;
1839         } else {
1840                 nfsm_srvfhtom(&info, fhp);
1841                 fp = nfsm_build(&info, NFSX_V2FATTR);
1842                 nfsm_srvfattr(nfsd, vap, fp);
1843         }
1844         goto nfsmout;
1845
1846 nfsmreply0:
1847         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1848         error = 0;
1849         /* fall through */
1850
1851 nfsmout:
1852         *mrq = info.mreq;
1853         if (dirp)
1854                 vrele(dirp);
1855         nlookup_done(&nd);
1856         if (dvp) {
1857                 if (dvp == vp)
1858                         vrele(dvp);
1859                 else
1860                         vput(dvp);
1861         }
1862         if (vp)
1863                 vput(vp);
1864         return (error);
1865 }
1866
1867 /*
1868  * nfs v3 mknod service
1869  */
1870 int
1871 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1872             struct thread *td, struct mbuf **mrq)
1873 {
1874         struct sockaddr *nam = nfsd->nd_nam;
1875         struct ucred *cred = &nfsd->nd_cr;
1876         struct vattr va, dirfor, diraft;
1877         struct vattr *vap = &va;
1878         u_int32_t *tl;
1879         struct nlookupdata nd;
1880         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1881         enum vtype vtyp;
1882         struct vnode *dirp;
1883         struct vnode *dvp;
1884         struct vnode *vp;
1885         nfsfh_t nfh;
1886         fhandle_t *fhp;
1887         struct nfsm_info info;
1888
1889         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1890         nlookup_zero(&nd);
1891         dirp = NULL;
1892         dvp = NULL;
1893         vp = NULL;
1894
1895         info.mrep = nfsd->nd_mrep;
1896         info.mreq = NULL;
1897         info.md = nfsd->nd_md;
1898         info.dpos = nfsd->nd_dpos;
1899
1900         fhp = &nfh.fh_generic;
1901         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1902         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1903
1904         /*
1905          * Handle nfs_namei() call.  If an error occurs, the nd structure
1906          * is not valid.  However, nfsm_*() routines may still jump to
1907          * nfsmout.
1908          */
1909
1910         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1911                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1912                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1913         if (dirp)
1914                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1915         if (error) {
1916                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1917                            NFSX_WCCDATA(1), &error));
1918                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1919                                  diraft_ret, &diraft);
1920                 error = 0;
1921                 goto nfsmout;
1922         }
1923         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1924         vtyp = nfsv3tov_type(*tl);
1925         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1926                 error = NFSERR_BADTYPE;
1927                 goto out;
1928         }
1929         VATTR_NULL(vap);
1930         ERROROUT(nfsm_srvsattr(&info, vap));
1931         if (vtyp == VCHR || vtyp == VBLK) {
1932                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1933                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1934                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1935         }
1936
1937         /*
1938          * Iff doesn't exist, create it.
1939          */
1940         if (vp) {
1941                 error = EEXIST;
1942                 goto out;
1943         }
1944         vap->va_type = vtyp;
1945         if (vap->va_mode == (mode_t)VNOVAL)
1946                 vap->va_mode = 0;
1947         if (vtyp == VSOCK) {
1948                 vn_unlock(dvp);
1949                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1950                 vrele(dvp);
1951                 dvp = NULL;
1952         } else {
1953                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1954                         goto out;
1955
1956                 vn_unlock(dvp);
1957                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1958                 vrele(dvp);
1959                 dvp = NULL;
1960                 if (error)
1961                         goto out;
1962         }
1963
1964         /*
1965          * send response, cleanup, return.
1966          */
1967 out:
1968         nlookup_done(&nd);
1969         if (dvp) {
1970                 if (dvp == vp)
1971                         vrele(dvp);
1972                 else
1973                         vput(dvp);
1974                 dvp = NULL;
1975         }
1976         if (!error) {
1977                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1978                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1979                 if (!error)
1980                         error = VOP_GETATTR(vp, vap);
1981         }
1982         if (vp) {
1983                 vput(vp);
1984                 vp = NULL;
1985         }
1986         diraft_ret = VOP_GETATTR(dirp, &diraft);
1987         if (dirp) {
1988                 vrele(dirp);
1989                 dirp = NULL;
1990         }
1991         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1992                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1993                               NFSX_WCCDATA(1), &error));
1994         if (!error) {
1995                 nfsm_srvpostop_fh(&info, fhp);
1996                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1997         }
1998         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1999                          diraft_ret, &diraft);
2000         *mrq = info.mreq;
2001         return (0);
2002 nfsmout:
2003         *mrq = info.mreq;
2004         if (dirp)
2005                 vrele(dirp);
2006         nlookup_done(&nd);
2007         if (dvp) {
2008                 if (dvp == vp)
2009                         vrele(dvp);
2010                 else
2011                         vput(dvp);
2012         }
2013         if (vp)
2014                 vput(vp);
2015         return (error);
2016 }
2017
2018 /*
2019  * nfs remove service
2020  */
2021 int
2022 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2023              struct thread *td, struct mbuf **mrq)
2024 {
2025         struct sockaddr *nam = nfsd->nd_nam;
2026         struct ucred *cred = &nfsd->nd_cr;
2027         struct nlookupdata nd;
2028         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2029         struct vnode *dirp;
2030         struct vnode *dvp;
2031         struct vnode *vp;
2032         struct vattr dirfor, diraft;
2033         nfsfh_t nfh;
2034         fhandle_t *fhp;
2035         struct nfsm_info info;
2036
2037         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2038         nlookup_zero(&nd);
2039         dirp = NULL;
2040         dvp = NULL;
2041         vp = NULL;
2042
2043         info.mrep = nfsd->nd_mrep;
2044         info.mreq = NULL;
2045         info.md = nfsd->nd_md;
2046         info.dpos = nfsd->nd_dpos;
2047         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2048
2049         fhp = &nfh.fh_generic;
2050         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2051         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2052
2053         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2054                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2055                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2056         if (dirp) {
2057                 if (info.v3)
2058                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2059         }
2060         if (error == 0) {
2061                 if (vp->v_type == VDIR) {
2062                         error = EPERM;          /* POSIX */
2063                         goto out;
2064                 }
2065                 /*
2066                  * The root of a mounted filesystem cannot be deleted.
2067                  */
2068                 if (vp->v_flag & VROOT) {
2069                         error = EBUSY;
2070                         goto out;
2071                 }
2072 out:
2073                 if (!error) {
2074                         if (dvp != vp)
2075                                 vn_unlock(dvp);
2076                         if (vp) {
2077                                 vput(vp);
2078                                 vp = NULL;
2079                         }
2080                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2081                         vrele(dvp);
2082                         dvp = NULL;
2083                 }
2084         }
2085         if (dirp && info.v3)
2086                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2087         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2088         if (info.v3) {
2089                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2090                                  diraft_ret, &diraft);
2091                 error = 0;
2092         }
2093 nfsmout:
2094         *mrq = info.mreq;
2095         nlookup_done(&nd);
2096         if (dirp)
2097                 vrele(dirp);
2098         if (dvp) {
2099                 if (dvp == vp)
2100                         vrele(dvp);
2101                 else
2102                         vput(dvp);
2103         }
2104         if (vp)
2105                 vput(vp);
2106         return(error);
2107 }
2108
2109 /*
2110  * nfs rename service
2111  */
2112 int
2113 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2114              struct thread *td, struct mbuf **mrq)
2115 {
2116         struct sockaddr *nam = nfsd->nd_nam;
2117         struct ucred *cred = &nfsd->nd_cr;
2118         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2119         int tdirfor_ret = 1, tdiraft_ret = 1;
2120         struct nlookupdata fromnd, tond;
2121         struct vnode *fvp, *fdirp, *fdvp;
2122         struct vnode *tvp, *tdirp, *tdvp;
2123         struct namecache *ncp;
2124         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2125         nfsfh_t fnfh, tnfh;
2126         fhandle_t *ffhp, *tfhp;
2127         uid_t saved_uid;
2128         struct nfsm_info info;
2129
2130         info.mrep = nfsd->nd_mrep;
2131         info.mreq = NULL;
2132         info.md = nfsd->nd_md;
2133         info.dpos = nfsd->nd_dpos;
2134         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2135
2136         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2137 #ifndef nolint
2138         fvp = NULL;
2139 #endif
2140         ffhp = &fnfh.fh_generic;
2141         tfhp = &tnfh.fh_generic;
2142
2143         /*
2144          * Clear fields incase goto nfsmout occurs from macro.
2145          */
2146
2147         nlookup_zero(&fromnd);
2148         nlookup_zero(&tond);
2149         fdirp = NULL;
2150         tdirp = NULL;
2151
2152         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2153         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2154
2155         /*
2156          * Remember our original uid so that we can reset cr_uid before
2157          * the second nfs_namei() call, in case it is remapped.
2158          */
2159         saved_uid = cred->cr_uid;
2160         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2161                           NULL, NULL,
2162                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2163                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2164         if (fdirp) {
2165                 if (info.v3)
2166                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2167         }
2168         if (error) {
2169                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2170                                       2 * NFSX_WCCDATA(info.v3), &error));
2171                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2172                                  fdiraft_ret, &fdiraft);
2173                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2174                                  tdiraft_ret, &tdiraft);
2175                 error = 0;
2176                 goto nfsmout;
2177         }
2178
2179         /*
2180          * We have to unlock the from ncp before we can safely lookup
2181          * the target ncp.
2182          */
2183         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2184         cache_unlock(&fromnd.nl_nch);
2185         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2186         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2187         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2188         cred->cr_uid = saved_uid;
2189
2190         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2191                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2192                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2193         if (tdirp) {
2194                 if (info.v3)
2195                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2196         }
2197         if (error)
2198                 goto out1;
2199
2200         /*
2201          * relock the source
2202          */
2203         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2204                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2205         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2206                 cache_lock(&fromnd.nl_nch);
2207                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2208         } else {
2209                 cache_unlock(&tond.nl_nch);
2210                 cache_lock(&fromnd.nl_nch);
2211                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2212                 cache_lock(&tond.nl_nch);
2213                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2214         }
2215         fromnd.nl_flags |= NLC_NCPISLOCKED;
2216
2217         fvp = fromnd.nl_nch.ncp->nc_vp;
2218         tvp = tond.nl_nch.ncp->nc_vp;
2219
2220         /*
2221          * Set fdvp and tdvp.  We haven't done all the topology checks
2222          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2223          * point).  If we get through the checks these will be guarenteed
2224          * to be non-NULL.
2225          *
2226          * Holding the children ncp's should be sufficient to prevent
2227          * fdvp and tdvp ripouts.
2228          */
2229         if (fromnd.nl_nch.ncp->nc_parent)
2230                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2231         else
2232                 fdvp = NULL;
2233         if (tond.nl_nch.ncp->nc_parent)
2234                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2235         else
2236                 tdvp = NULL;
2237
2238         if (tvp != NULL) {
2239                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2240                         if (info.v3)
2241                                 error = EEXIST;
2242                         else
2243                                 error = EISDIR;
2244                         goto out;
2245                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2246                         if (info.v3)
2247                                 error = EEXIST;
2248                         else
2249                                 error = ENOTDIR;
2250                         goto out;
2251                 }
2252                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2253                         if (info.v3)
2254                                 error = EXDEV;
2255                         else
2256                                 error = ENOTEMPTY;
2257                         goto out;
2258                 }
2259         }
2260         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2261                 if (info.v3)
2262                         error = EXDEV;
2263                 else
2264                         error = ENOTEMPTY;
2265                 goto out;
2266         }
2267         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2268                 if (info.v3)
2269                         error = EXDEV;
2270                 else
2271                         error = ENOTEMPTY;
2272                 goto out;
2273         }
2274         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2275                 if (info.v3)
2276                         error = EINVAL;
2277                 else
2278                         error = ENOTEMPTY;
2279         }
2280
2281         /*
2282          * You cannot rename a source into itself or a subdirectory of itself.
2283          * We check this by travsering the target directory upwards looking
2284          * for a match against the source.
2285          */
2286         if (error == 0) {
2287                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2288                         if (fromnd.nl_nch.ncp == ncp) {
2289                                 error = EINVAL;
2290                                 break;
2291                         }
2292                 }
2293         }
2294
2295         /*
2296          * If source is the same as the destination (that is the
2297          * same vnode with the same name in the same directory),
2298          * then there is nothing to do.
2299          */
2300         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2301                 error = -1;
2302 out:
2303         if (!error) {
2304                 /*
2305                  * The VOP_NRENAME function releases all vnode references &
2306                  * locks prior to returning so we need to clear the pointers
2307                  * to bypass cleanup code later on.
2308                  */
2309                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2310                                     fdvp, tdvp, tond.nl_cred);
2311         } else {
2312                 if (error == -1)
2313                         error = 0;
2314         }
2315         /* fall through */
2316
2317 out1:
2318         if (fdirp)
2319                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2320         if (tdirp)
2321                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2322         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2323                               2 * NFSX_WCCDATA(info.v3), &error));
2324         if (info.v3) {
2325                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2326                                  fdiraft_ret, &fdiraft);
2327                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2328                                  tdiraft_ret, &tdiraft);
2329         }
2330         error = 0;
2331         /* fall through */
2332
2333 nfsmout:
2334         *mrq = info.mreq;
2335         if (tdirp)
2336                 vrele(tdirp);
2337         nlookup_done(&tond);
2338         if (fdirp)
2339                 vrele(fdirp);
2340         nlookup_done(&fromnd);
2341         return (error);
2342 }
2343
2344 /*
2345  * nfs link service
2346  */
2347 int
2348 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2349            struct thread *td, struct mbuf **mrq)
2350 {
2351         struct sockaddr *nam = nfsd->nd_nam;
2352         struct ucred *cred = &nfsd->nd_cr;
2353         struct nlookupdata nd;
2354         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2355         int getret = 1;
2356         struct vnode *dirp;
2357         struct vnode *dvp;
2358         struct vnode *vp;
2359         struct vnode *xp;
2360         struct mount *mp;
2361         struct mount *xmp;
2362         struct vattr dirfor, diraft, at;
2363         nfsfh_t nfh, dnfh;
2364         fhandle_t *fhp, *dfhp;
2365         struct nfsm_info info;
2366
2367         info.mrep = nfsd->nd_mrep;
2368         info.mreq = NULL;
2369         info.md = nfsd->nd_md;
2370         info.dpos = nfsd->nd_dpos;
2371         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2372
2373         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2374         nlookup_zero(&nd);
2375         dirp = dvp = vp = xp = NULL;
2376         mp = xmp = NULL;
2377
2378         fhp = &nfh.fh_generic;
2379         dfhp = &dnfh.fh_generic;
2380         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2381         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2382         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2383
2384         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2385                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2386         if (error) {
2387                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2388                                       NFSX_POSTOPATTR(info.v3) +
2389                                       NFSX_WCCDATA(info.v3),
2390                                       &error));
2391                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2392                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2393                                  diraft_ret, &diraft);
2394                 xp = NULL;
2395                 error = 0;
2396                 goto nfsmout;
2397         }
2398         if (xp->v_type == VDIR) {
2399                 error = EPERM;          /* POSIX */
2400                 goto out1;
2401         }
2402
2403         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2404                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2405                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2406         if (dirp) {
2407                 if (info.v3)
2408                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2409         }
2410         if (error)
2411                 goto out1;
2412
2413         if (vp != NULL) {
2414                 error = EEXIST;
2415                 goto out;
2416         }
2417         if (xp->v_mount != dvp->v_mount)
2418                 error = EXDEV;
2419 out:
2420         if (!error) {
2421                 vn_unlock(dvp);
2422                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2423                 vrele(dvp);
2424                 dvp = NULL;
2425         }
2426         /* fall through */
2427
2428 out1:
2429         if (info.v3)
2430                 getret = VOP_GETATTR(xp, &at);
2431         if (dirp)
2432                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2433         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2434                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2435                               &error));
2436         if (info.v3) {
2437                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2438                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2439                                  diraft_ret, &diraft);
2440                 error = 0;
2441         }
2442         /* fall through */
2443
2444 nfsmout:
2445         *mrq = info.mreq;
2446         nlookup_done(&nd);
2447         if (dirp)
2448                 vrele(dirp);
2449         if (xp)
2450                 vrele(xp);
2451         if (dvp) {
2452                 if (dvp == vp)
2453                         vrele(dvp);
2454                 else
2455                         vput(dvp);
2456         }
2457         if (vp)
2458                 vput(vp);
2459         return(error);
2460 }
2461
2462 /*
2463  * nfs symbolic link service
2464  */
2465 int
2466 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2467               struct thread *td, struct mbuf **mrq)
2468 {
2469         struct sockaddr *nam = nfsd->nd_nam;
2470         struct ucred *cred = &nfsd->nd_cr;
2471         struct vattr va, dirfor, diraft;
2472         struct nlookupdata nd;
2473         struct vattr *vap = &va;
2474         struct nfsv2_sattr *sp;
2475         char *pathcp = NULL;
2476         struct uio io;
2477         struct iovec iv;
2478         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2479         struct vnode *dirp;
2480         struct vnode *vp;
2481         struct vnode *dvp;
2482         nfsfh_t nfh;
2483         fhandle_t *fhp;
2484         struct nfsm_info info;
2485
2486         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2487         nlookup_zero(&nd);
2488         dirp = NULL;
2489         dvp = NULL;
2490         vp = NULL;
2491
2492         info.mrep = nfsd->nd_mrep;
2493         info.mreq =  NULL;
2494         info.md = nfsd->nd_md;
2495         info.dpos = nfsd->nd_dpos;
2496         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2497
2498         fhp = &nfh.fh_generic;
2499         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2500         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2501
2502         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2503                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2504                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2505         if (dirp) {
2506                 if (info.v3)
2507                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2508         }
2509         if (error)
2510                 goto out;
2511
2512         VATTR_NULL(vap);
2513         if (info.v3) {
2514                 ERROROUT(nfsm_srvsattr(&info, vap));
2515         }
2516         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2517         MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
2518         iv.iov_base = pathcp;
2519         iv.iov_len = len2;
2520         io.uio_resid = len2;
2521         io.uio_offset = 0;
2522         io.uio_iov = &iv;
2523         io.uio_iovcnt = 1;
2524         io.uio_segflg = UIO_SYSSPACE;
2525         io.uio_rw = UIO_READ;
2526         io.uio_td = NULL;
2527         ERROROUT(nfsm_mtouio(&info, &io, len2));
2528         if (info.v3 == 0) {
2529                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2530                 vap->va_mode = nfstov_mode(sp->sa_mode);
2531         }
2532         *(pathcp + len2) = '\0';
2533         if (vp) {
2534                 error = EEXIST;
2535                 goto out;
2536         }
2537
2538         if (vap->va_mode == (mode_t)VNOVAL)
2539                 vap->va_mode = 0;
2540         if (dvp != vp)
2541                 vn_unlock(dvp);
2542         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2543         vrele(dvp);
2544         dvp = NULL;
2545         if (error == 0) {
2546                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2547                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2548                 if (!error)
2549                         error = VOP_GETATTR(vp, vap);
2550         }
2551
2552 out:
2553         if (dvp) {
2554                 if (dvp == vp)
2555                         vrele(dvp);
2556                 else
2557                         vput(dvp);
2558         }
2559         if (vp) {
2560                 vput(vp);
2561                 vp = NULL;
2562         }
2563         if (pathcp) {
2564                 FREE(pathcp, M_TEMP);
2565                 pathcp = NULL;
2566         }
2567         if (dirp) {
2568                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2569                 vrele(dirp);
2570                 dirp = NULL;
2571         }
2572         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2573                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2574                               NFSX_WCCDATA(info.v3),
2575                               &error));
2576         if (info.v3) {
2577                 if (!error) {
2578                         nfsm_srvpostop_fh(&info, fhp);
2579                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2580                 }
2581                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2582                                  diraft_ret, &diraft);
2583         }
2584         error = 0;
2585         /* fall through */
2586
2587 nfsmout:
2588         *mrq = info.mreq;
2589         nlookup_done(&nd);
2590         if (vp)
2591                 vput(vp);
2592         if (dirp)
2593                 vrele(dirp);
2594         if (pathcp)
2595                 FREE(pathcp, M_TEMP);
2596         return (error);
2597 }
2598
2599 /*
2600  * nfs mkdir service
2601  */
2602 int
2603 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2604             struct thread *td, struct mbuf **mrq)
2605 {
2606         struct sockaddr *nam = nfsd->nd_nam;
2607         struct ucred *cred = &nfsd->nd_cr;
2608         struct vattr va, dirfor, diraft;
2609         struct vattr *vap = &va;
2610         struct nfs_fattr *fp;
2611         struct nlookupdata nd;
2612         u_int32_t *tl;
2613         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2614         struct vnode *dirp;
2615         struct vnode *dvp;
2616         struct vnode *vp;
2617         nfsfh_t nfh;
2618         fhandle_t *fhp;
2619         struct nfsm_info info;
2620
2621         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2622         nlookup_zero(&nd);
2623         dirp = NULL;
2624         dvp = NULL;
2625         vp = NULL;
2626
2627         info.dpos = nfsd->nd_dpos;
2628         info.mrep = nfsd->nd_mrep;
2629         info.mreq =  NULL;
2630         info.md = nfsd->nd_md;
2631         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2632
2633         fhp = &nfh.fh_generic;
2634         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2635         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2636
2637         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2638                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2639                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2640         if (dirp) {
2641                 if (info.v3)
2642                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2643         }
2644         if (error) {
2645                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2646                                       NFSX_WCCDATA(info.v3), &error));
2647                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2648                                  diraft_ret, &diraft);
2649                 error = 0;
2650                 goto nfsmout;
2651         }
2652         VATTR_NULL(vap);
2653         if (info.v3) {
2654                 ERROROUT(nfsm_srvsattr(&info, vap));
2655         } else {
2656                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2657                 vap->va_mode = nfstov_mode(*tl++);
2658         }
2659
2660         /*
2661          * At this point nd.ni_dvp is referenced and exclusively locked and
2662          * nd.ni_vp, if it exists, is referenced but not locked.
2663          */
2664
2665         vap->va_type = VDIR;
2666         if (vp != NULL) {
2667                 error = EEXIST;
2668                 goto out;
2669         }
2670
2671         /*
2672          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2673          * component is freed by the VOP call.  This will fill-in
2674          * nd.ni_vp, reference, and exclusively lock it.
2675          */
2676         if (vap->va_mode == (mode_t)VNOVAL)
2677                 vap->va_mode = 0;
2678         vn_unlock(dvp);
2679         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2680         vrele(dvp);
2681         dvp = NULL;
2682
2683         if (error == 0) {
2684                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2685                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2686                 if (error == 0)
2687                         error = VOP_GETATTR(vp, vap);
2688         }
2689 out:
2690         if (dirp)
2691                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2692         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2693                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2694                               NFSX_WCCDATA(info.v3),
2695                               &error));
2696         if (info.v3) {
2697                 if (!error) {
2698                         nfsm_srvpostop_fh(&info, fhp);
2699                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2700                 }
2701                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2702                                  diraft_ret, &diraft);
2703         } else {
2704                 nfsm_srvfhtom(&info, fhp);
2705                 fp = nfsm_build(&info, NFSX_V2FATTR);
2706                 nfsm_srvfattr(nfsd, vap, fp);
2707         }
2708         error = 0;
2709         /* fall through */
2710
2711 nfsmout:
2712         *mrq = info.mreq;
2713         nlookup_done(&nd);
2714         if (dirp)
2715                 vrele(dirp);
2716         if (dvp) {
2717                 if (dvp == vp)
2718                         vrele(dvp);
2719                 else
2720                         vput(dvp);
2721         }
2722         if (vp)
2723                 vput(vp);
2724         return (error);
2725 }
2726
2727 /*
2728  * nfs rmdir service
2729  */
2730 int
2731 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2732             struct thread *td, struct mbuf **mrq)
2733 {
2734         struct sockaddr *nam = nfsd->nd_nam;
2735         struct ucred *cred = &nfsd->nd_cr;
2736         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2737         struct vnode *dirp;
2738         struct vnode *dvp;
2739         struct vnode *vp;
2740         struct vattr dirfor, diraft;
2741         nfsfh_t nfh;
2742         fhandle_t *fhp;
2743         struct nlookupdata nd;
2744         struct nfsm_info info;
2745
2746         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2747         nlookup_zero(&nd);
2748         dirp = NULL;
2749         dvp = NULL;
2750         vp = NULL;
2751
2752         info.mrep = nfsd->nd_mrep;
2753         info.mreq = NULL;
2754         info.md = nfsd->nd_md;
2755         info.dpos = nfsd->nd_dpos;
2756         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2757
2758         fhp = &nfh.fh_generic;
2759         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2760         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2761
2762         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2763                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2764                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2765         if (dirp) {
2766                 if (info.v3)
2767                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2768         }
2769         if (error) {
2770                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2771                                       NFSX_WCCDATA(info.v3), &error));
2772                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2773                                  diraft_ret, &diraft);
2774                 error = 0;
2775                 goto nfsmout;
2776         }
2777         if (vp->v_type != VDIR) {
2778                 error = ENOTDIR;
2779                 goto out;
2780         }
2781
2782         /*
2783          * The root of a mounted filesystem cannot be deleted.
2784          */
2785         if (vp->v_flag & VROOT)
2786                 error = EBUSY;
2787 out:
2788         /*
2789          * Issue or abort op.  Since SAVESTART is not set, path name
2790          * component is freed by the VOP after either.
2791          */
2792         if (!error) {
2793                 if (dvp != vp)
2794                         vn_unlock(dvp);
2795                 vput(vp);
2796                 vp = NULL;
2797                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2798                 vrele(dvp);
2799                 dvp = NULL;
2800         }
2801         nlookup_done(&nd);
2802
2803         if (dirp)
2804                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2805         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2806         if (info.v3) {
2807                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2808                                  diraft_ret, &diraft);
2809                 error = 0;
2810         }
2811         /* fall through */
2812
2813 nfsmout:
2814         *mrq = info.mreq;
2815         if (dvp) {
2816                 if (dvp == vp)
2817                         vrele(dvp);
2818                 else
2819                         vput(dvp);
2820         }
2821         nlookup_done(&nd);
2822         if (dirp)
2823                 vrele(dirp);
2824         if (vp)
2825                 vput(vp);
2826         return(error);
2827 }
2828
2829 /*
2830  * nfs readdir service
2831  * - mallocs what it thinks is enough to read
2832  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2833  * - calls VOP_READDIR()
2834  * - loops around building the reply
2835  *      if the output generated exceeds count break out of loop
2836  *      The nfsm_clget macro is used here so that the reply will be packed
2837  *      tightly in mbuf clusters.
2838  * - it only knows that it has encountered eof when the VOP_READDIR()
2839  *      reads nothing
2840  * - as such one readdir rpc will return eof false although you are there
2841  *      and then the next will return eof
2842  * - it trims out records with d_fileno == 0
2843  *      this doesn't matter for Unix clients, but they might confuse clients
2844  *      for other os'.
2845  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2846  *      than requested, but this may not apply to all filesystems. For
2847  *      example, client NFS does not { although it is never remote mounted
2848  *      anyhow }
2849  *     The alternate call nfsrv_readdirplus() does lookups as well.
2850  * PS: The NFS protocol spec. does not clarify what the "count" byte
2851  *      argument is a count of.. just name strings and file id's or the
2852  *      entire reply rpc or ...
2853  *      I tried just file name and id sizes and it confused the Sun client,
2854  *      so I am using the full rpc size now. The "paranoia.." comment refers
2855  *      to including the status longwords that are not a part of the dir.
2856  *      "entry" structures, but are in the rpc.
2857  */
2858 struct flrep {
2859         nfsuint64       fl_off;
2860         u_int32_t       fl_postopok;
2861         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2862         u_int32_t       fl_fhok;
2863         u_int32_t       fl_fhsize;
2864         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2865 };
2866
2867 int
2868 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2869               struct thread *td, struct mbuf **mrq)
2870 {
2871         struct sockaddr *nam = nfsd->nd_nam;
2872         struct ucred *cred = &nfsd->nd_cr;
2873         char *bp, *be;
2874         struct dirent *dp;
2875         caddr_t cp;
2876         u_int32_t *tl;
2877         struct mbuf *mp1, *mp2;
2878         char *cpos, *cend, *rbuf;
2879         struct vnode *vp = NULL;
2880         struct mount *mp = NULL;
2881         struct vattr at;
2882         nfsfh_t nfh;
2883         fhandle_t *fhp;
2884         struct uio io;
2885         struct iovec iv;
2886         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2887         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2888         u_quad_t off, toff, verf;
2889         off_t *cookies = NULL, *cookiep;
2890         struct nfsm_info info;
2891
2892         info.mrep = nfsd->nd_mrep;
2893         info.mreq = NULL;
2894         info.md = nfsd->nd_md;
2895         info.dpos = nfsd->nd_dpos;
2896         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2897
2898         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2899         fhp = &nfh.fh_generic;
2900         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2901         if (info.v3) {
2902                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2903                 toff = fxdr_hyper(tl);
2904                 tl += 2;
2905                 verf = fxdr_hyper(tl);
2906                 tl += 2;
2907         } else {
2908                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2909                 toff = fxdr_unsigned(u_quad_t, *tl++);
2910                 verf = 0;       /* shut up gcc */
2911         }
2912         off = toff;
2913         cnt = fxdr_unsigned(int, *tl);
2914         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2915         xfer = NFS_SRVMAXDATA(nfsd);
2916         if ((unsigned)cnt > xfer)
2917                 cnt = xfer;
2918         if ((unsigned)siz > xfer)
2919                 siz = xfer;
2920         fullsiz = siz;
2921         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2922                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2923         if (!error && vp->v_type != VDIR) {
2924                 error = ENOTDIR;
2925                 vput(vp);
2926                 vp = NULL;
2927         }
2928         if (error) {
2929                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2930                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2931                 error = 0;
2932                 goto nfsmout;
2933         }
2934
2935         /*
2936          * Obtain lock on vnode for this section of the code
2937          */
2938
2939         if (info.v3) {
2940                 error = getret = VOP_GETATTR(vp, &at);
2941 #if 0
2942                 /*
2943                  * XXX This check may be too strict for Solaris 2.5 clients.
2944                  */
2945                 if (!error && toff && verf && verf != at.va_filerev)
2946                         error = NFSERR_BAD_COOKIE;
2947 #endif
2948         }
2949         if (!error)
2950                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2951         if (error) {
2952                 vput(vp);
2953                 vp = NULL;
2954                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2955                                       NFSX_POSTOPATTR(info.v3), &error));
2956                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2957                 error = 0;
2958                 goto nfsmout;
2959         }
2960         vn_unlock(vp);
2961
2962         /*
2963          * end section.  Allocate rbuf and continue
2964          */
2965         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
2966 again:
2967         iv.iov_base = rbuf;
2968         iv.iov_len = fullsiz;
2969         io.uio_iov = &iv;
2970         io.uio_iovcnt = 1;
2971         io.uio_offset = (off_t)off;
2972         io.uio_resid = fullsiz;
2973         io.uio_segflg = UIO_SYSSPACE;
2974         io.uio_rw = UIO_READ;
2975         io.uio_td = NULL;
2976         eofflag = 0;
2977         if (cookies) {
2978                 kfree((caddr_t)cookies, M_TEMP);
2979                 cookies = NULL;
2980         }
2981         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2982         off = (off_t)io.uio_offset;
2983         if (!cookies && !error)
2984                 error = NFSERR_PERM;
2985         if (info.v3) {
2986                 getret = VOP_GETATTR(vp, &at);
2987                 if (!error)
2988                         error = getret;
2989         }
2990         if (error) {
2991                 vrele(vp);
2992                 vp = NULL;
2993                 kfree((caddr_t)rbuf, M_TEMP);
2994                 if (cookies)
2995                         kfree((caddr_t)cookies, M_TEMP);
2996                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2997                                       NFSX_POSTOPATTR(info.v3), &error));
2998                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2999                 error = 0;
3000                 goto nfsmout;
3001         }
3002         if (io.uio_resid) {
3003                 siz -= io.uio_resid;
3004
3005                 /*
3006                  * If nothing read, return eof
3007                  * rpc reply
3008                  */
3009                 if (siz == 0) {
3010                         vrele(vp);
3011                         vp = NULL;
3012                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3013                                               NFSX_POSTOPATTR(info.v3) +
3014                                               NFSX_COOKIEVERF(info.v3) +
3015                                               2 * NFSX_UNSIGNED,
3016                                               &error));
3017                         if (info.v3) {
3018                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3019                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3020                                 txdr_hyper(at.va_filerev, tl);
3021                                 tl += 2;
3022                         } else
3023                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3024                         *tl++ = nfs_false;
3025                         *tl = nfs_true;
3026                         FREE((caddr_t)rbuf, M_TEMP);
3027                         FREE((caddr_t)cookies, M_TEMP);
3028                         error = 0;
3029                         goto nfsmout;
3030                 }
3031         }
3032
3033         /*
3034          * Check for degenerate cases of nothing useful read.
3035          * If so go try again
3036          */
3037         cpos = rbuf;
3038         cend = rbuf + siz;
3039         dp = (struct dirent *)cpos;
3040         cookiep = cookies;
3041         /*
3042          * For some reason FreeBSD's ufs_readdir() chooses to back the
3043          * directory offset up to a block boundary, so it is necessary to
3044          * skip over the records that preceed the requested offset. This
3045          * requires the assumption that file offset cookies monotonically
3046          * increase.
3047          */
3048         while (cpos < cend && ncookies > 0 &&
3049                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3050                  ((u_quad_t)(*cookiep)) <= toff)) {
3051                 dp = _DIRENT_NEXT(dp);
3052                 cpos = (char *)dp;
3053                 cookiep++;
3054                 ncookies--;
3055         }
3056         if (cpos >= cend || ncookies == 0) {
3057                 toff = off;
3058                 siz = fullsiz;
3059                 goto again;
3060         }
3061
3062         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3063         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3064                               NFSX_POSTOPATTR(info.v3) +
3065                               NFSX_COOKIEVERF(info.v3) + siz,
3066                               &error));
3067         if (info.v3) {
3068                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3069                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3070                 txdr_hyper(at.va_filerev, tl);
3071         }
3072         mp1 = mp2 = info.mb;
3073         bp = info.bpos;
3074         be = bp + M_TRAILINGSPACE(mp1);
3075
3076         /* Loop through the records and build reply */
3077         while (cpos < cend && ncookies > 0) {
3078                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3079                         nlen = dp->d_namlen;
3080                         rem = nfsm_rndup(nlen) - nlen;
3081                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3082                         if (info.v3)
3083                                 len += 2 * NFSX_UNSIGNED;
3084                         if (len > cnt) {
3085                                 eofflag = 0;
3086                                 break;
3087                         }
3088                         /*
3089                          * Build the directory record xdr from
3090                          * the dirent entry.
3091                          */
3092                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3093                         *tl = nfs_true;
3094                         bp += NFSX_UNSIGNED;
3095                         if (info.v3) {
3096                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3097                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3098                                 bp += NFSX_UNSIGNED;
3099                         }
3100                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3101                         *tl = txdr_unsigned(dp->d_ino);
3102                         bp += NFSX_UNSIGNED;
3103                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3104                         *tl = txdr_unsigned(nlen);
3105                         bp += NFSX_UNSIGNED;
3106
3107                         /* And loop around copying the name */
3108                         xfer = nlen;
3109                         cp = dp->d_name;
3110                         while (xfer > 0) {
3111                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3112                                 if ((bp+xfer) > be)
3113                                         tsiz = be-bp;
3114                                 else
3115                                         tsiz = xfer;
3116                                 bcopy(cp, bp, tsiz);
3117                                 bp += tsiz;
3118                                 xfer -= tsiz;
3119                                 if (xfer > 0)
3120                                         cp += tsiz;
3121                         }
3122                         /* And null pad to a int32_t boundary */
3123                         for (i = 0; i < rem; i++)
3124                                 *bp++ = '\0';
3125                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3126
3127                         /* Finish off the record */
3128                         if (info.v3) {
3129                                 *tl = txdr_unsigned(*cookiep >> 32);
3130                                 bp += NFSX_UNSIGNED;
3131                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3132                         }
3133                         *tl = txdr_unsigned(*cookiep);
3134                         bp += NFSX_UNSIGNED;
3135                 }
3136                 dp = _DIRENT_NEXT(dp);
3137                 cpos = (char *)dp;
3138                 cookiep++;
3139                 ncookies--;
3140         }
3141         vrele(vp);
3142         vp = NULL;
3143         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3144         *tl = nfs_false;
3145         bp += NFSX_UNSIGNED;
3146         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3147         if (eofflag)
3148                 *tl = nfs_true;
3149         else
3150                 *tl = nfs_false;
3151         bp += NFSX_UNSIGNED;
3152         if (mp1 != info.mb) {
3153                 if (bp < be)
3154                         mp1->m_len = bp - mtod(mp1, caddr_t);
3155         } else
3156                 mp1->m_len += bp - info.bpos;
3157         FREE((caddr_t)rbuf, M_TEMP);
3158         FREE((caddr_t)cookies, M_TEMP);
3159
3160 nfsmout:
3161         *mrq = info.mreq;
3162         if (vp)
3163                 vrele(vp);
3164         return(error);
3165 }
3166
3167 int
3168 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3169                   struct thread *td, struct mbuf **mrq)
3170 {
3171         struct sockaddr *nam = nfsd->nd_nam;
3172         struct ucred *cred = &nfsd->nd_cr;
3173         char *bp, *be;
3174         struct dirent *dp;
3175         caddr_t cp;
3176         u_int32_t *tl;
3177         struct mbuf *mp1, *mp2;
3178         char *cpos, *cend, *rbuf;
3179         struct vnode *vp = NULL, *nvp;
3180         struct mount *mp = NULL;
3181         struct flrep fl;
3182         nfsfh_t nfh;
3183         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3184         struct uio io;
3185         struct iovec iv;
3186         struct vattr va, at, *vap = &va;
3187         struct nfs_fattr *fp;
3188         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3189         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3190         u_quad_t off, toff, verf;
3191         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3192         struct nfsm_info info;
3193
3194         info.mrep = nfsd->nd_mrep;
3195         info.mreq = NULL;
3196         info.md = nfsd->nd_md;
3197         info.dpos = nfsd->nd_dpos;
3198         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3199
3200         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3201         fhp = &nfh.fh_generic;
3202         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3203         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3204         toff = fxdr_hyper(tl);
3205         tl += 2;
3206         verf = fxdr_hyper(tl);
3207         tl += 2;
3208         siz = fxdr_unsigned(int, *tl++);
3209         cnt = fxdr_unsigned(int, *tl);
3210         off = toff;
3211         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3212         xfer = NFS_SRVMAXDATA(nfsd);
3213         if ((unsigned)cnt > xfer)
3214                 cnt = xfer;
3215         if ((unsigned)siz > xfer)
3216                 siz = xfer;
3217         fullsiz = siz;
3218         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3219                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3220         if (!error && vp->v_type != VDIR) {
3221                 error = ENOTDIR;
3222                 vput(vp);
3223                 vp = NULL;
3224         }
3225         if (error) {
3226                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3227                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3228                 error = 0;
3229                 goto nfsmout;
3230         }
3231         error = getret = VOP_GETATTR(vp, &at);
3232 #if 0
3233         /*
3234          * XXX This check may be too strict for Solaris 2.5 clients.
3235          */
3236         if (!error && toff && verf && verf != at.va_filerev)
3237                 error = NFSERR_BAD_COOKIE;
3238 #endif
3239         if (!error) {
3240                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3241         }
3242         if (error) {
3243                 vput(vp);
3244                 vp = NULL;
3245                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3246                                       NFSX_V3POSTOPATTR, &error));
3247                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3248                 error = 0;
3249                 goto nfsmout;
3250         }
3251         vn_unlock(vp);
3252         MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
3253 again:
3254         iv.iov_base = rbuf;
3255         iv.iov_len = fullsiz;
3256         io.uio_iov = &iv;
3257         io.uio_iovcnt = 1;
3258         io.uio_offset = (off_t)off;
3259         io.uio_resid = fullsiz;
3260         io.uio_segflg = UIO_SYSSPACE;
3261         io.uio_rw = UIO_READ;
3262         io.uio_td = NULL;
3263         eofflag = 0;
3264         if (cookies) {
3265                 kfree((caddr_t)cookies, M_TEMP);
3266                 cookies = NULL;
3267         }
3268         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3269         off = (u_quad_t)io.uio_offset;
3270         getret = VOP_GETATTR(vp, &at);
3271         if (!cookies && !error)
3272                 error = NFSERR_PERM;
3273         if (!error)
3274                 error = getret;
3275         if (error) {
3276                 vrele(vp);
3277                 vp = NULL;
3278                 if (cookies)
3279                         kfree((caddr_t)cookies, M_TEMP);
3280                 kfree((caddr_t)rbuf, M_TEMP);
3281                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3282                                       NFSX_V3POSTOPATTR, &error));
3283                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3284                 error = 0;
3285                 goto nfsmout;
3286         }
3287         if (io.uio_resid) {
3288                 siz -= io.uio_resid;
3289
3290                 /*
3291                  * If nothing read, return eof
3292                  * rpc reply
3293                  */
3294                 if (siz == 0) {
3295                         vrele(vp);
3296                         vp = NULL;
3297                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3298                                               NFSX_V3POSTOPATTR +
3299                                               NFSX_V3COOKIEVERF +
3300                                               2 * NFSX_UNSIGNED,
3301                                               &error));
3302                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3303                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3304                         txdr_hyper(at.va_filerev, tl);
3305                         tl += 2;
3306                         *tl++ = nfs_false;
3307                         *tl = nfs_true;
3308                         FREE((caddr_t)cookies, M_TEMP);
3309                         FREE((caddr_t)rbuf, M_TEMP);
3310                         error = 0;
3311                         goto nfsmout;
3312                 }
3313         }
3314
3315         /*
3316          * Check for degenerate cases of nothing useful read.
3317          * If so go try again
3318          */
3319         cpos = rbuf;
3320         cend = rbuf + siz;
3321         dp = (struct dirent *)cpos;
3322         cookiep = cookies;
3323         /*
3324          * For some reason FreeBSD's ufs_readdir() chooses to back the
3325          * directory offset up to a block boundary, so it is necessary to
3326          * skip over the records that preceed the requested offset. This
3327          * requires the assumption that file offset cookies monotonically
3328          * increase.
3329          */
3330         while (cpos < cend && ncookies > 0 &&
3331                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3332                  ((u_quad_t)(*cookiep)) <= toff)) {
3333                 dp = _DIRENT_NEXT(dp);
3334                 cpos = (char *)dp;
3335                 cookiep++;
3336                 ncookies--;
3337         }
3338         if (cpos >= cend || ncookies == 0) {
3339                 toff = off;
3340                 siz = fullsiz;
3341                 goto again;
3342         }
3343
3344         /*
3345          * Probe one of the directory entries to see if the filesystem
3346          * supports VGET.
3347          */
3348         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3349                 error = NFSERR_NOTSUPP;
3350                 vrele(vp);
3351                 vp = NULL;
3352                 kfree((caddr_t)cookies, M_TEMP);
3353                 kfree((caddr_t)rbuf, M_TEMP);
3354                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3355                                       NFSX_V3POSTOPATTR, &error));
3356                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3357                 error = 0;
3358                 goto nfsmout;
3359         }
3360         if (nvp) {
3361                 vput(nvp);
3362                 nvp = NULL;
3363         }
3364             
3365         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3366                         2 * NFSX_UNSIGNED;
3367         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3368         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3369         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3370         txdr_hyper(at.va_filerev, tl);
3371         mp1 = mp2 = info.mb;
3372         bp = info.bpos;
3373         be = bp + M_TRAILINGSPACE(mp1);
3374
3375         /* Loop through the records and build reply */
3376         while (cpos < cend && ncookies > 0) {
3377                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3378                         nlen = dp->d_namlen;
3379                         rem = nfsm_rndup(nlen) - nlen;
3380
3381                         /*
3382                          * For readdir_and_lookup get the vnode using
3383                          * the file number.
3384                          */
3385                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3386                                 goto invalid;
3387                         bzero((caddr_t)nfhp, NFSX_V3FH);
3388                         nfhp->fh_fsid = fhp->fh_fsid;
3389                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3390                                 vput(nvp);
3391                                 nvp = NULL;
3392                                 goto invalid;
3393                         }
3394                         if (VOP_GETATTR(nvp, vap)) {
3395                                 vput(nvp);
3396                                 nvp = NULL;
3397                                 goto invalid;
3398                         }
3399                         vput(nvp);
3400                         nvp = NULL;
3401
3402                         /*
3403                          * If either the dircount or maxcount will be
3404                          * exceeded, get out now. Both of these lengths
3405                          * are calculated conservatively, including all
3406                          * XDR overheads.
3407                          */
3408                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3409                                 NFSX_V3POSTOPATTR);
3410                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3411                         if (len > cnt || dirlen > fullsiz) {
3412                                 eofflag = 0;
3413                                 break;
3414                         }
3415
3416                         /*
3417                          * Build the directory record xdr from
3418                          * the dirent entry.
3419                          */
3420                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3421                         nfsm_srvfattr(nfsd, vap, fp);
3422                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3423                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3424                         fl.fl_postopok = nfs_true;
3425                         fl.fl_fhok = nfs_true;
3426                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3427
3428                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3429                         *tl = nfs_true;
3430                         bp += NFSX_UNSIGNED;
3431                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3432                         *tl = txdr_unsigned(dp->d_ino >> 32);
3433                         bp += NFSX_UNSIGNED;
3434                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3435                         *tl = txdr_unsigned(dp->d_ino);
3436                         bp += NFSX_UNSIGNED;
3437                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3438                         *tl = txdr_unsigned(nlen);
3439                         bp += NFSX_UNSIGNED;
3440
3441                         /* And loop around copying the name */
3442                         xfer = nlen;
3443                         cp = dp->d_name;
3444                         while (xfer > 0) {
3445                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3446                                 if ((bp + xfer) > be)
3447                                         tsiz = be - bp;
3448                                 else
3449                                         tsiz = xfer;
3450                                 bcopy(cp, bp, tsiz);
3451                                 bp += tsiz;
3452                                 xfer -= tsiz;
3453                                 cp += tsiz;
3454                         }
3455                         /* And null pad to a int32_t boundary */
3456                         for (i = 0; i < rem; i++)
3457                                 *bp++ = '\0';
3458         
3459                         /*
3460                          * Now copy the flrep structure out.
3461                          */
3462                         xfer = sizeof (struct flrep);
3463                         cp = (caddr_t)&fl;
3464                         while (xfer > 0) {
3465                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3466                                 if ((bp + xfer) > be)
3467                                         tsiz = be - bp;
3468                                 else
3469                                         tsiz = xfer;
3470                                 bcopy(cp, bp, tsiz);
3471                                 bp += tsiz;
3472                                 xfer -= tsiz;
3473                                 cp += tsiz;
3474                         }
3475                 }
3476 invalid:
3477                 dp = _DIRENT_NEXT(dp);
3478                 cpos = (char *)dp;
3479                 cookiep++;
3480                 ncookies--;
3481         }
3482         vrele(vp);
3483         vp = NULL;
3484         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3485         *tl = nfs_false;
3486         bp += NFSX_UNSIGNED;
3487         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3488         if (eofflag)
3489                 *tl = nfs_true;
3490         else
3491                 *tl = nfs_false;
3492         bp += NFSX_UNSIGNED;
3493         if (mp1 != info.mb) {
3494                 if (bp < be)
3495                         mp1->m_len = bp - mtod(mp1, caddr_t);
3496         } else
3497                 mp1->m_len += bp - info.bpos;
3498         FREE((caddr_t)cookies, M_TEMP);
3499         FREE((caddr_t)rbuf, M_TEMP);
3500 nfsmout:
3501         *mrq = info.mreq;
3502         if (vp)
3503                 vrele(vp);
3504         return(error);
3505 }
3506
3507 /*
3508  * nfs commit service
3509  */
3510 int
3511 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3512              struct thread *td, struct mbuf **mrq)
3513 {
3514         struct sockaddr *nam = nfsd->nd_nam;
3515         struct ucred *cred = &nfsd->nd_cr;
3516         struct vattr bfor, aft;
3517         struct vnode *vp = NULL;
3518         struct mount *mp = NULL;
3519         nfsfh_t nfh;
3520         fhandle_t *fhp;
3521         u_int32_t *tl;
3522         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3523         u_quad_t off;
3524         struct nfsm_info info;
3525
3526         info.mrep = nfsd->nd_mrep;
3527         info.mreq = NULL;
3528         info.md = nfsd->nd_md;
3529         info.dpos = nfsd->nd_dpos;
3530
3531         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3532         fhp = &nfh.fh_generic;
3533         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3534         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3535
3536         /*
3537          * XXX At this time VOP_FSYNC() does not accept offset and byte
3538          * count parameters, so these arguments are useless (someday maybe).
3539          */
3540         off = fxdr_hyper(tl);
3541         tl += 2;
3542         cnt = fxdr_unsigned(int, *tl);
3543         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3544                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3545         if (error) {
3546                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3547                                       2 * NFSX_UNSIGNED, &error));
3548                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3549                                  aft_ret, &aft);
3550                 error = 0;
3551                 goto nfsmout;
3552         }
3553         for_ret = VOP_GETATTR(vp, &bfor);
3554
3555         if (cnt > MAX_COMMIT_COUNT) {
3556                 /*
3557                  * Give up and do the whole thing
3558                  */
3559                 if (vp->v_object &&
3560                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3561                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3562                 }
3563                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3564         } else {
3565                 /*
3566                  * Locate and synchronously write any buffers that fall
3567                  * into the requested range.  Note:  we are assuming that
3568                  * f_iosize is a power of 2.
3569                  */
3570                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3571                 int iomask = iosize - 1;
3572                 off_t loffset;
3573
3574                 /*
3575                  * Align to iosize boundry, super-align to page boundry.
3576                  */
3577                 if (off & iomask) {
3578                         cnt += off & iomask;
3579                         off &= ~(u_quad_t)iomask;
3580                 }
3581                 if (off & PAGE_MASK) {
3582                         cnt += off & PAGE_MASK;
3583                         off &= ~(u_quad_t)PAGE_MASK;
3584                 }
3585                 loffset = off;
3586
3587                 if (vp->v_object &&
3588                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3589                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE, (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3590                 }
3591
3592                 crit_enter();
3593                 while (cnt > 0) {
3594                         struct buf *bp;
3595
3596                         /*
3597                          * If we have a buffer and it is marked B_DELWRI we
3598                          * have to lock and write it.  Otherwise the prior
3599                          * write is assumed to have already been committed.
3600                          *
3601                          * WARNING: FINDBLK_TEST buffers represent stable
3602                          *          storage but not necessarily stable
3603                          *          content.  It is ok in this case.
3604                          */
3605                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3606                                 if (bp->b_flags & B_DELWRI)
3607                                         bp = findblk(vp, loffset, 0);
3608                                 else
3609                                         bp = NULL;
3610                         }
3611                         if (bp) {
3612                                 if (bp->b_flags & B_DELWRI) {
3613                                         bremfree(bp);
3614                                         bwrite(bp);
3615                                         ++nfs_commit_miss;
3616                                 } else {
3617                                         BUF_UNLOCK(bp);
3618                                 }
3619                         }
3620                         ++nfs_commit_blks;
3621                         if (cnt < iosize)
3622                                 break;
3623                         cnt -= iosize;
3624                         loffset += iosize;
3625                 }
3626                 crit_exit();
3627         }
3628
3629         aft_ret = VOP_GETATTR(vp, &aft);
3630         vput(vp);
3631         vp = NULL;
3632         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3633                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3634                               &error));
3635         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3636                          aft_ret, &aft);
3637         if (!error) {
3638                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3639                 if (nfsver.tv_sec == 0)
3640                         nfsver = boottime;
3641                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3642                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3643         } else {
3644                 error = 0;
3645         }
3646 nfsmout:
3647         *mrq = info.mreq;
3648         if (vp)
3649                 vput(vp);
3650         return(error);
3651 }
3652
3653 /*
3654  * nfs statfs service
3655  */
3656 int
3657 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3658              struct thread *td, struct mbuf **mrq)
3659 {
3660         struct sockaddr *nam = nfsd->nd_nam;
3661         struct ucred *cred = &nfsd->nd_cr;
3662         struct statfs *sf;
3663         struct nfs_statfs *sfp;
3664         int error = 0, rdonly, getret = 1;
3665         struct vnode *vp = NULL;
3666         struct mount *mp = NULL;
3667         struct vattr at;
3668         nfsfh_t nfh;
3669         fhandle_t *fhp;
3670         struct statfs statfs;
3671         u_quad_t tval;
3672         struct nfsm_info info;
3673
3674         info.mrep = nfsd->nd_mrep;
3675         info.mreq = NULL;
3676         info.md = nfsd->nd_md;
3677         info.dpos = nfsd->nd_dpos;
3678         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3679
3680         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3681         fhp = &nfh.fh_generic;
3682         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3683         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3684                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3685         if (error) {
3686                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3687                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3688                 error = 0;
3689                 goto nfsmout;
3690         }
3691         sf = &statfs;
3692         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3693         getret = VOP_GETATTR(vp, &at);
3694         vput(vp);
3695         vp = NULL;
3696         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3697                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3698                               &error));
3699         if (info.v3)
3700                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3701         if (error) {
3702                 error = 0;
3703                 goto nfsmout;
3704         }
3705         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3706         if (info.v3) {
3707                 tval = (u_quad_t)sf->f_blocks;
3708                 tval *= (u_quad_t)sf->f_bsize;
3709                 txdr_hyper(tval, &sfp->sf_tbytes);
3710                 tval = (u_quad_t)sf->f_bfree;
3711                 tval *= (u_quad_t)sf->f_bsize;
3712                 txdr_hyper(tval, &sfp->sf_fbytes);
3713                 tval = (u_quad_t)sf->f_bavail;
3714                 tval *= (u_quad_t)sf->f_bsize;
3715                 txdr_hyper(tval, &sfp->sf_abytes);
3716                 sfp->sf_tfiles.nfsuquad[0] = 0;
3717                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3718                 sfp->sf_ffiles.nfsuquad[0] = 0;
3719                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3720                 sfp->sf_afiles.nfsuquad[0] = 0;
3721                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3722                 sfp->sf_invarsec = 0;
3723         } else {
3724                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3725                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3726                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3727                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3728                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3729         }
3730 nfsmout:
3731         *mrq = info.mreq;
3732         if (vp)
3733                 vput(vp);
3734         return(error);
3735 }
3736
3737 /*
3738  * nfs fsinfo service
3739  */
3740 int
3741 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3742              struct thread *td, struct mbuf **mrq)
3743 {
3744         struct sockaddr *nam = nfsd->nd_nam;
3745         struct ucred *cred = &nfsd->nd_cr;
3746         struct nfsv3_fsinfo *sip;
3747         int error = 0, rdonly, getret = 1, pref;
3748         struct vnode *vp = NULL;
3749         struct mount *mp = NULL;
3750         struct vattr at;
3751         nfsfh_t nfh;
3752         fhandle_t *fhp;
3753         u_quad_t maxfsize;
3754         struct statfs sb;
3755         struct nfsm_info info;
3756
3757         info.mrep = nfsd->nd_mrep;
3758         info.mreq = NULL;
3759         info.md = nfsd->nd_md;
3760         info.dpos = nfsd->nd_dpos;
3761
3762         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3763         fhp = &nfh.fh_generic;
3764         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3765         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3766                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3767         if (error) {
3768                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3769                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3770                 error = 0;
3771                 goto nfsmout;
3772         }
3773
3774         /* XXX Try to make a guess on the max file size. */
3775         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3776         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3777
3778         getret = VOP_GETATTR(vp, &at);
3779         vput(vp);
3780         vp = NULL;
3781         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3782                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3783         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3784         sip = nfsm_build(&info, NFSX_V3FSINFO);
3785
3786         /*
3787          * XXX
3788          * There should be file system VFS OP(s) to get this information.
3789          * For now, assume ufs.
3790          */
3791         if (slp->ns_so->so_type == SOCK_DGRAM)
3792                 pref = NFS_MAXDGRAMDATA;
3793         else
3794                 pref = NFS_MAXDATA;
3795         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3796         sip->fs_rtpref = txdr_unsigned(pref);
3797         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3798         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3799         sip->fs_wtpref = txdr_unsigned(pref);
3800         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3801         sip->fs_dtpref = txdr_unsigned(pref);
3802         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3803         sip->fs_timedelta.nfsv3_sec = 0;
3804         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3805         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3806                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3807                 NFSV3FSINFO_CANSETTIME);
3808 nfsmout:
3809         *mrq = info.mreq;
3810         if (vp)
3811                 vput(vp);
3812         return(error);
3813 }
3814
3815 /*
3816  * nfs pathconf service
3817  */
3818 int
3819 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3820                struct thread *td, struct mbuf **mrq)
3821 {
3822         struct sockaddr *nam = nfsd->nd_nam;
3823         struct ucred *cred = &nfsd->nd_cr;
3824         struct nfsv3_pathconf *pc;
3825         int error = 0, rdonly, getret = 1;
3826         register_t linkmax, namemax, chownres, notrunc;
3827         struct vnode *vp = NULL;
3828         struct mount *mp = NULL;
3829         struct vattr at;
3830         nfsfh_t nfh;
3831         fhandle_t *fhp;
3832         struct nfsm_info info;
3833
3834         info.mrep = nfsd->nd_mrep;
3835         info.mreq = NULL;
3836         info.md = nfsd->nd_md;
3837         info.dpos = nfsd->nd_dpos;
3838
3839         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3840         fhp = &nfh.fh_generic;
3841         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3842         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3843                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3844         if (error) {
3845                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3846                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3847                 error = 0;
3848                 goto nfsmout;
3849         }
3850         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3851         if (!error)
3852                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3853         if (!error)
3854                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3855         if (!error)
3856                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3857         getret = VOP_GETATTR(vp, &at);
3858         vput(vp);
3859         vp = NULL;
3860         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3861                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3862                               &error));
3863         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3864         if (error) {
3865                 error = 0;
3866                 goto nfsmout;
3867         }
3868         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3869
3870         pc->pc_linkmax = txdr_unsigned(linkmax);
3871         pc->pc_namemax = txdr_unsigned(namemax);
3872         pc->pc_notrunc = txdr_unsigned(notrunc);
3873         pc->pc_chownrestricted = txdr_unsigned(chownres);
3874
3875         /*
3876          * These should probably be supported by VOP_PATHCONF(), but
3877          * until msdosfs is exportable (why would you want to?), the
3878          * Unix defaults should be ok.
3879          */
3880         pc->pc_caseinsensitive = nfs_false;
3881         pc->pc_casepreserving = nfs_true;
3882 nfsmout:
3883         *mrq = info.mreq;
3884         if (vp) 
3885                 vput(vp);
3886         return(error);
3887 }
3888
3889 /*
3890  * Null operation, used by clients to ping server
3891  */
3892 /* ARGSUSED */
3893 int
3894 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3895            struct thread *td, struct mbuf **mrq)
3896 {
3897         struct nfsm_info info;
3898         int error = NFSERR_RETVOID;
3899
3900         info.mrep = nfsd->nd_mrep;
3901         info.mreq = NULL;
3902
3903         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3904         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3905 nfsmout:
3906         *mrq = info.mreq;
3907         return (error);
3908 }
3909
3910 /*
3911  * No operation, used for obsolete procedures
3912  */
3913 /* ARGSUSED */
3914 int
3915 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3916            struct thread *td, struct mbuf **mrq)
3917 {
3918         struct nfsm_info info;
3919         int error;
3920
3921         info.mrep = nfsd->nd_mrep;
3922         info.mreq = NULL;
3923
3924         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3925         if (nfsd->nd_repstat)
3926                 error = nfsd->nd_repstat;
3927         else
3928                 error = EPROCUNAVAIL;
3929         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3930         error = 0;
3931 nfsmout:
3932         *mrq = info.mreq;
3933         return (error);
3934 }
3935
3936 /*
3937  * Perform access checking for vnodes obtained from file handles that would
3938  * refer to files already opened by a Unix client. You cannot just use
3939  * vn_writechk() and VOP_ACCESS() for two reasons.
3940  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3941  * 2 - The owner is to be given access irrespective of mode bits for some
3942  *     operations, so that processes that chmod after opening a file don't
3943  *     break. I don't like this because it opens a security hole, but since
3944  *     the nfs server opens a security hole the size of a barn door anyhow,
3945  *     what the heck.
3946  *
3947  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3948  * will return EPERM instead of EACCESS. EPERM is always an error.
3949  */
3950 static int
3951 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3952              int rdonly, struct thread *td, int override)
3953 {
3954         struct vattr vattr;
3955         int error;
3956
3957         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3958         if (flags & VWRITE) {
3959                 /* Just vn_writechk() changed to check rdonly */
3960                 /*
3961                  * Disallow write attempts on read-only file systems;
3962                  * unless the file is a socket or a block or character
3963                  * device resident on the file system.
3964                  */
3965                 if (rdonly || 
3966                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3967                         switch (vp->v_type) {
3968                         case VREG:
3969                         case VDIR:
3970                         case VLNK:
3971                                 return (EROFS);
3972                         default:
3973                                 break;
3974                         }
3975                 }
3976                 /*
3977                  * If there's shared text associated with
3978                  * the inode, we can't allow writing.
3979                  */
3980                 if (vp->v_flag & VTEXT)
3981                         return (ETXTBSY);
3982         }
3983         error = VOP_GETATTR(vp, &vattr);
3984         if (error)
3985                 return (error);
3986         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
3987         /*
3988          * Allow certain operations for the owner (reads and writes
3989          * on files that are already open).
3990          */
3991         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3992                 error = 0;
3993         return error;
3994 }
3995 #endif /* NFS_NOSERVER */
3996