NFS server: Record bwrite() error in nfsrv_commit.
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  */
39
40 /*
41  * nfs version 2 and 3 server calls to vnode ops
42  * - these routines generally have 3 phases
43  *   1 - break down and validate rpc request in mbuf list
44  *   2 - do the vnode ops for the request
45  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
46  *   3 - build the rpc reply in an mbuf list
47  *   nb:
48  *      - do not mix the phases, since the nfsm_?? macros can return failures
49  *        on a bad rpc or similar and do not do any vrele() or vput()'s
50  *
51  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
52  *      error number iff error != 0 whereas
53  *      returning an error from the server function implies a fatal error
54  *      such as a badly constructed rpc request that should be dropped without
55  *      a reply.
56  *      For Version 3, nfsm_reply() does not return for the error case, since
57  *      most version 3 rpcs return more than the status for error cases.
58  *
59  * Other notes:
60  *      Warning: always pay careful attention to resource cleanup on return
61  *      and note that nfsm_*() macros can terminate a procedure on certain
62  *      errors.
63  */
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/proc.h>
68 #include <sys/priv.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87
88 #include <sys/buf2.h>
89
90 #include <sys/thread2.h>
91
92 #include "nfsproto.h"
93 #include "rpcv2.h"
94 #include "nfs.h"
95 #include "xdr_subs.h"
96 #include "nfsm_subs.h"
97
98 #ifdef NFSRV_DEBUG
99 #define nfsdbprintf(info)       kprintf info
100 #else
101 #define nfsdbprintf(info)
102 #endif
103
104 #define MAX_COMMIT_COUNT        (1024 * 1024)
105
106 #define NUM_HEURISTIC           1031
107 #define NHUSE_INIT              64
108 #define NHUSE_INC               16
109 #define NHUSE_MAX               2048
110
111 static struct nfsheur {
112     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
113     off_t nh_nextoff;           /* next offset for sequential detection */
114     int nh_use;                 /* use count for selection */
115     int nh_seqcount;            /* heuristic */
116 } nfsheur[NUM_HEURISTIC];
117
118 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
119                       NFFIFO, NFNON };
120 #ifndef NFS_NOSERVER 
121 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
122                       NFCHR, NFNON };
123
124 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
125 int nfsrvw_procrastinate_v3 = 0;
126
127 static struct timespec  nfsver;
128
129 SYSCTL_DECL(_vfs_nfs);
130
131 int nfs_async;
132 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
133     "Enable unstable and fast writes");
134 static int nfs_commit_blks;
135 static int nfs_commit_miss;
136 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
137     "Number of committed blocks");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
139     "Number of nfs blocks committed from dirty buffers");
140
141 static int nfsrv_access (struct mount *, struct vnode *, int,
142                         struct ucred *, int, struct thread *, int);
143 static void nfsrvw_coalesce (struct nfsrv_descript *,
144                 struct nfsrv_descript *);
145
146 /*
147  * Heuristic to detect sequential operation.
148  */
149 static struct nfsheur *
150 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp, int writeop)
151 {
152         struct nfsheur *nh;
153         int hi, try;
154
155         /* Locate best candidate */
156         try = 32;
157         hi = ((int)(vm_offset_t) vp / sizeof(struct vnode)) % NUM_HEURISTIC;
158         nh = &nfsheur[hi];
159
160         while (try--) {
161                 if (nfsheur[hi].nh_vp == vp) {
162                         nh = &nfsheur[hi];
163                         break;
164                 }
165                 if (nfsheur[hi].nh_use > 0)
166                         --nfsheur[hi].nh_use;
167                 hi = (hi + 1) % NUM_HEURISTIC;
168                 if (nfsheur[hi].nh_use < nh->nh_use)
169                         nh = &nfsheur[hi];
170         }
171
172         /* Initialize hint if this is a new file */
173         if (nh->nh_vp != vp) {
174                 nh->nh_vp = vp;
175                 nh->nh_nextoff = uio->uio_offset;
176                 nh->nh_use = NHUSE_INIT;
177                 if (uio->uio_offset == 0)
178                         nh->nh_seqcount = 4;
179                 else
180                         nh->nh_seqcount = 1;
181         }
182
183         /*
184          * Calculate heuristic
185          *
186          * See vfs_vnops.c:sequential_heuristic().
187          */
188         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
189             uio->uio_offset == nh->nh_nextoff) {
190                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
191                 if (nh->nh_seqcount > IO_SEQMAX)
192                         nh->nh_seqcount = IO_SEQMAX;
193         } else if (nh->nh_seqcount > 1) {
194                 nh->nh_seqcount = 1;
195         } else {
196                 nh->nh_seqcount = 0;
197         }
198         nh->nh_use += NHUSE_INC;
199         if (nh->nh_use > NHUSE_MAX)
200                 nh->nh_use = NHUSE_MAX;
201         return (nh);
202 }
203
204 /*
205  * nfs v3 access service
206  */
207 int
208 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
209               struct thread *td, struct mbuf **mrq)
210 {
211         struct sockaddr *nam = nfsd->nd_nam;
212         struct ucred *cred = &nfsd->nd_cr;
213         struct vnode *vp = NULL;
214         struct mount *mp = NULL;
215         nfsfh_t nfh;
216         fhandle_t *fhp;
217         int error = 0, rdonly, getret;
218         struct vattr vattr, *vap = &vattr;
219         u_long testmode, nfsmode;
220         struct nfsm_info info;
221         u_int32_t *tl;
222
223         info.dpos = nfsd->nd_dpos;
224         info.md = nfsd->nd_md;
225         info.mrep = nfsd->nd_mrep;
226         info.mreq = NULL;
227
228         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
229         fhp = &nfh.fh_generic;
230         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
231         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
232         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
233             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
234         if (error) {
235                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
236                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
237                 error = 0;
238                 goto nfsmout;
239         }
240         nfsmode = fxdr_unsigned(u_int32_t, *tl);
241         if ((nfsmode & NFSV3ACCESS_READ) &&
242                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
243                 nfsmode &= ~NFSV3ACCESS_READ;
244         if (vp->v_type == VDIR)
245                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
246                         NFSV3ACCESS_DELETE);
247         else
248                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
249         if ((nfsmode & testmode) &&
250                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
251                 nfsmode &= ~testmode;
252         if (vp->v_type == VDIR)
253                 testmode = NFSV3ACCESS_LOOKUP;
254         else
255                 testmode = NFSV3ACCESS_EXECUTE;
256         if ((nfsmode & testmode) &&
257                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
258                 nfsmode &= ~testmode;
259         getret = VOP_GETATTR(vp, vap);
260         vput(vp);
261         vp = NULL;
262         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
263                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
264         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
265         tl = nfsm_build(&info, NFSX_UNSIGNED);
266         *tl = txdr_unsigned(nfsmode);
267 nfsmout:
268         *mrq = info.mreq;
269         if (vp)
270                 vput(vp);
271         return(error);
272 }
273
274 /*
275  * nfs getattr service
276  */
277 int
278 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
279               struct thread *td, struct mbuf **mrq)
280 {
281         struct sockaddr *nam = nfsd->nd_nam;
282         struct ucred *cred = &nfsd->nd_cr;
283         struct nfs_fattr *fp;
284         struct vattr va;
285         struct vattr *vap = &va;
286         struct vnode *vp = NULL;
287         struct mount *mp = NULL;
288         nfsfh_t nfh;
289         fhandle_t *fhp;
290         int error = 0, rdonly;
291         struct nfsm_info info;
292
293         info.mrep = nfsd->nd_mrep;
294         info.md = nfsd->nd_md;
295         info.dpos = nfsd->nd_dpos;
296         info.mreq = NULL;
297
298         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
299         fhp = &nfh.fh_generic;
300         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
301         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
302                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
303         if (error) {
304                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
305                 error = 0;
306                 goto nfsmout;
307         }
308         error = VOP_GETATTR(vp, vap);
309         vput(vp);
310         vp = NULL;
311         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
312                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
313         if (error) {
314                 error = 0;
315                 goto nfsmout;
316         }
317         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
318         nfsm_srvfattr(nfsd, vap, fp);
319         /* fall through */
320
321 nfsmout:
322         *mrq = info.mreq;
323         if (vp)
324                 vput(vp);
325         return(error);
326 }
327
328 /*
329  * nfs setattr service
330  */
331 int
332 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
333               struct thread *td, struct mbuf **mrq)
334 {
335         struct sockaddr *nam = nfsd->nd_nam;
336         struct ucred *cred = &nfsd->nd_cr;
337         struct vattr va, preat;
338         struct vattr *vap = &va;
339         struct nfsv2_sattr *sp;
340         struct nfs_fattr *fp;
341         struct vnode *vp = NULL;
342         struct mount *mp = NULL;
343         nfsfh_t nfh;
344         fhandle_t *fhp;
345         u_int32_t *tl;
346         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
347         int gcheck = 0;
348         struct timespec guard;
349         struct nfsm_info info;
350
351         info.mrep = nfsd->nd_mrep;
352         info.mreq = NULL;
353         info.md = nfsd->nd_md;
354         info.dpos = nfsd->nd_dpos;
355         info.v3 = (nfsd->nd_flag & ND_NFSV3);
356
357         guard.tv_sec = 0;       /* fix compiler warning */
358         guard.tv_nsec = 0;
359
360         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
361         fhp = &nfh.fh_generic;
362         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
363         VATTR_NULL(vap);
364         if (info.v3) {
365                 ERROROUT(nfsm_srvsattr(&info, vap));
366                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
367                 gcheck = fxdr_unsigned(int, *tl);
368                 if (gcheck) {
369                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
370                         fxdr_nfsv3time(tl, &guard);
371                 }
372         } else {
373                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
374                 /*
375                  * Nah nah nah nah na nah
376                  * There is a bug in the Sun client that puts 0xffff in the mode
377                  * field of sattr when it should put in 0xffffffff. The u_short
378                  * doesn't sign extend.
379                  * --> check the low order 2 bytes for 0xffff
380                  */
381                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
382                         vap->va_mode = nfstov_mode(sp->sa_mode);
383                 if (sp->sa_uid != nfs_xdrneg1)
384                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
385                 if (sp->sa_gid != nfs_xdrneg1)
386                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
387                 if (sp->sa_size != nfs_xdrneg1)
388                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
389                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
390 #ifdef notyet
391                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
392 #else
393                         vap->va_atime.tv_sec =
394                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
395                         vap->va_atime.tv_nsec = 0;
396 #endif
397                 }
398                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
399                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
400
401         }
402
403         /*
404          * Now that we have all the fields, lets do it.
405          */
406         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
407                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
408         if (error) {
409                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
410                                       2 * NFSX_UNSIGNED, &error));
411                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
412                                  postat_ret, vap);
413                 error = 0;
414                 goto nfsmout;
415         }
416
417         /*
418          * vp now an active resource, pay careful attention to cleanup
419          */
420
421         if (info.v3) {
422                 error = preat_ret = VOP_GETATTR(vp, &preat);
423                 if (!error && gcheck &&
424                         (preat.va_ctime.tv_sec != guard.tv_sec ||
425                          preat.va_ctime.tv_nsec != guard.tv_nsec))
426                         error = NFSERR_NOT_SYNC;
427                 if (error) {
428                         vput(vp);
429                         vp = NULL;
430                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
431                                               NFSX_WCCDATA(info.v3), &error));
432                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
433                                          postat_ret, vap);
434                         error = 0;
435                         goto nfsmout;
436                 }
437         }
438
439         /*
440          * If the size is being changed write acces is required, otherwise
441          * just check for a read only file system.
442          */
443         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
444                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
445                         error = EROFS;
446                         goto out;
447                 }
448         } else {
449                 if (vp->v_type == VDIR) {
450                         error = EISDIR;
451                         goto out;
452                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
453                             td, 0)) != 0){ 
454                         goto out;
455                 }
456         }
457         error = VOP_SETATTR(vp, vap, cred);
458         postat_ret = VOP_GETATTR(vp, vap);
459         if (!error)
460                 error = postat_ret;
461 out:
462         vput(vp);
463         vp = NULL;
464         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
465                    NFSX_WCCORFATTR(info.v3), &error));
466         if (info.v3) {
467                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
468                                  postat_ret, vap);
469                 error = 0;
470                 goto nfsmout;
471         } else {
472                 fp = nfsm_build(&info, NFSX_V2FATTR);
473                 nfsm_srvfattr(nfsd, vap, fp);
474         }
475         /* fall through */
476
477 nfsmout:
478         *mrq = info.mreq;
479         if (vp)
480                 vput(vp);
481         return(error);
482 }
483
484 /*
485  * nfs lookup rpc
486  */
487 int
488 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
489              struct thread *td, struct mbuf **mrq)
490 {
491         struct sockaddr *nam = nfsd->nd_nam;
492         struct ucred *cred = &nfsd->nd_cr;
493         struct nfs_fattr *fp;
494         struct nlookupdata nd;
495         struct vnode *vp;
496         struct vnode *dirp;
497         struct nchandle nch;
498         nfsfh_t nfh;
499         fhandle_t *fhp;
500         int error = 0, len, dirattr_ret = 1;
501         int pubflag;
502         struct vattr va, dirattr, *vap = &va;
503         struct nfsm_info info;
504
505         info.mrep = nfsd->nd_mrep;
506         info.mreq = NULL;
507         info.md = nfsd->nd_md;
508         info.dpos = nfsd->nd_dpos;
509         info.v3 = (nfsd->nd_flag & ND_NFSV3);
510
511         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
512         nlookup_zero(&nd);
513         dirp = NULL;
514         vp = NULL;
515
516         fhp = &nfh.fh_generic;
517         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
518         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
519
520         pubflag = nfs_ispublicfh(fhp);
521
522         error = nfs_namei(&nd, cred, 0, NULL, &vp,
523                 fhp, len, slp, nam, &info.md, &info.dpos,
524                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
525
526         /*
527          * namei failure, only dirp to cleanup.  Clear out garbarge from
528          * structure in case macros jump to nfsmout.
529          */
530
531         if (error) {
532                 if (dirp) {
533                         if (info.v3)
534                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
535                         vrele(dirp);
536                         dirp = NULL;
537                 }
538                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
539                                       NFSX_POSTOPATTR(info.v3), &error));
540                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
541                 error = 0;
542                 goto nfsmout;
543         }
544
545         /*
546          * Locate index file for public filehandle
547          *
548          * error is 0 on entry and 0 on exit from this block.
549          */
550
551         if (pubflag) {
552                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
553                         /*
554                          * Setup call to lookup() to see if we can find
555                          * the index file. Arguably, this doesn't belong
556                          * in a kernel.. Ugh.  If an error occurs, do not
557                          * try to install an index file and then clear the
558                          * error.
559                          *
560                          * When we replace nd with ind and redirect ndp,
561                          * maintenance of ni_startdir and ni_vp shift to
562                          * ind and we have to clean them up in the old nd.
563                          * However, the cnd resource continues to be maintained
564                          * via the original nd.  Confused?  You aren't alone!
565                          */
566                         vn_unlock(vp);
567                         cache_copy(&nd.nl_nch, &nch);
568                         nlookup_done(&nd);
569                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
570                                                 UIO_SYSSPACE, 0, cred, &nch);
571                         cache_drop(&nch);
572                         if (error == 0)
573                                 error = nlookup(&nd);
574
575                         if (error == 0) {
576                                 /*
577                                  * Found an index file. Get rid of
578                                  * the old references.  transfer vp and
579                                  * load up the new vp.  Fortunately we do
580                                  * not have to deal with dvp, that would be
581                                  * a huge mess.
582                                  */
583                                 if (dirp)       
584                                         vrele(dirp);
585                                 dirp = vp;
586                                 vp = NULL;
587                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
588                                                         LK_EXCLUSIVE, &vp);
589                                 KKASSERT(error == 0);
590                         }
591                         error = 0;
592                 }
593                 /*
594                  * If the public filehandle was used, check that this lookup
595                  * didn't result in a filehandle outside the publicly exported
596                  * filesystem.  We clear the poor vp here to avoid lockups due
597                  * to NFS I/O.
598                  */
599
600                 if (vp->v_mount != nfs_pub.np_mount) {
601                         vput(vp);
602                         vp = NULL;
603                         error = EPERM;
604                 }
605         }
606
607         if (dirp) {
608                 if (info.v3)
609                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
610                 vrele(dirp);
611                 dirp = NULL;
612         }
613
614         /*
615          * Resources at this point:
616          *      ndp->ni_vp      may not be NULL
617          *
618          */
619
620         if (error) {
621                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
622                                       NFSX_POSTOPATTR(info.v3), &error));
623                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
624                 error = 0;
625                 goto nfsmout;
626         }
627
628         /*
629          * Clear out some resources prior to potentially blocking.  This
630          * is not as critical as ni_dvp resources in other routines, but
631          * it helps.
632          */
633         nlookup_done(&nd);
634
635         /*
636          * Get underlying attribute, then release remaining resources ( for
637          * the same potential blocking reason ) and reply.
638          */
639         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
640         error = VFS_VPTOFH(vp, &fhp->fh_fid);
641         if (!error)
642                 error = VOP_GETATTR(vp, vap);
643
644         vput(vp);
645         vp = NULL;
646         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
647                               NFSX_SRVFH(info.v3) +
648                               NFSX_POSTOPORFATTR(info.v3) +
649                               NFSX_POSTOPATTR(info.v3),
650                               &error));
651         if (error) {
652                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
653                 error = 0;
654                 goto nfsmout;
655         }
656         nfsm_srvfhtom(&info, fhp);
657         if (info.v3) {
658                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
659                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
660         } else {
661                 fp = nfsm_build(&info, NFSX_V2FATTR);
662                 nfsm_srvfattr(nfsd, vap, fp);
663         }
664
665 nfsmout:
666         *mrq = info.mreq;
667         if (dirp)
668                 vrele(dirp);
669         nlookup_done(&nd);              /* may be called twice */
670         if (vp)
671                 vput(vp);
672         return (error);
673 }
674
675 /*
676  * nfs readlink service
677  */
678 int
679 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
680                struct thread *td, struct mbuf **mrq)
681 {
682         struct sockaddr *nam = nfsd->nd_nam;
683         struct ucred *cred = &nfsd->nd_cr;
684         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
685         struct iovec *ivp = iv;
686         u_int32_t *tl;
687         int error = 0, rdonly, i, tlen, len, getret;
688         struct mbuf *mp1, *mp2, *mp3;
689         struct vnode *vp = NULL;
690         struct mount *mp = NULL;
691         struct vattr attr;
692         nfsfh_t nfh;
693         fhandle_t *fhp;
694         struct uio io, *uiop = &io;
695         struct nfsm_info info;
696
697         info.mrep = nfsd->nd_mrep;
698         info.mreq = NULL;
699         info.md = nfsd->nd_md;
700         info.dpos = nfsd->nd_dpos;
701         info.v3 = (nfsd->nd_flag & ND_NFSV3);
702
703         bzero(&io, sizeof(struct uio));
704
705         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
706 #ifndef nolint
707         mp2 = NULL;
708 #endif
709         mp3 = NULL;
710         fhp = &nfh.fh_generic;
711         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
712         len = 0;
713         i = 0;
714         while (len < NFS_MAXPATHLEN) {
715                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
716                 mp1->m_len = MCLBYTES;
717                 if (len == 0)
718                         mp3 = mp2 = mp1;
719                 else {
720                         mp2->m_next = mp1;
721                         mp2 = mp1;
722                 }
723                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
724                         mp1->m_len = NFS_MAXPATHLEN-len;
725                         len = NFS_MAXPATHLEN;
726                 } else
727                         len += mp1->m_len;
728                 ivp->iov_base = mtod(mp1, caddr_t);
729                 ivp->iov_len = mp1->m_len;
730                 i++;
731                 ivp++;
732         }
733         uiop->uio_iov = iv;
734         uiop->uio_iovcnt = i;
735         uiop->uio_offset = 0;
736         uiop->uio_resid = len;
737         uiop->uio_rw = UIO_READ;
738         uiop->uio_segflg = UIO_SYSSPACE;
739         uiop->uio_td = NULL;
740         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
741                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
742         if (error) {
743                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
744                                       2 * NFSX_UNSIGNED, &error));
745                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
746                 error = 0;
747                 goto nfsmout;
748         }
749         if (vp->v_type != VLNK) {
750                 if (info.v3)
751                         error = EINVAL;
752                 else
753                         error = ENXIO;
754                 goto out;
755         }
756         error = VOP_READLINK(vp, uiop, cred);
757 out:
758         getret = VOP_GETATTR(vp, &attr);
759         vput(vp);
760         vp = NULL;
761         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
762                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
763                              &error));
764         if (info.v3) {
765                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
766                 if (error) {
767                         error = 0;
768                         goto nfsmout;
769                 }
770         }
771         if (uiop->uio_resid > 0) {
772                 len -= uiop->uio_resid;
773                 tlen = nfsm_rndup(len);
774                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
775         }
776         tl = nfsm_build(&info, NFSX_UNSIGNED);
777         *tl = txdr_unsigned(len);
778         info.mb->m_next = mp3;
779         mp3 = NULL;
780 nfsmout:
781         *mrq = info.mreq;
782         if (mp3)
783                 m_freem(mp3);
784         if (vp)
785                 vput(vp);
786         return(error);
787 }
788
789 /*
790  * nfs read service
791  */
792 int
793 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
794            struct thread *td, struct mbuf **mrq)
795 {
796         struct nfsm_info info;
797         struct sockaddr *nam = nfsd->nd_nam;
798         struct ucred *cred = &nfsd->nd_cr;
799         struct iovec *iv;
800         struct iovec *iv2;
801         struct mbuf *m;
802         struct nfs_fattr *fp;
803         u_int32_t *tl;
804         int i;
805         int reqlen;
806         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
807         struct mbuf *m2;
808         struct vnode *vp = NULL;
809         struct mount *mp = NULL;
810         nfsfh_t nfh;
811         fhandle_t *fhp;
812         struct uio io, *uiop = &io;
813         struct vattr va, *vap = &va;
814         struct nfsheur *nh;
815         off_t off;
816         int ioflag = 0;
817
818         info.mrep = nfsd->nd_mrep;
819         info.mreq = NULL;
820         info.md = nfsd->nd_md;
821         info.dpos = nfsd->nd_dpos;
822         info.v3 = (nfsd->nd_flag & ND_NFSV3);
823
824         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
825         fhp = &nfh.fh_generic;
826         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
827         if (info.v3) {
828                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
829                 off = fxdr_hyper(tl);
830         } else {
831                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
832                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
833         }
834         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
835                                             NFS_SRVMAXDATA(nfsd), &error));
836
837         /*
838          * Reference vp.  If an error occurs, vp will be invalid, but we
839          * have to NULL it just in case.  The macros might goto nfsmout
840          * as well.
841          */
842
843         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
844                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
845         if (error) {
846                 vp = NULL;
847                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
848                                       2 * NFSX_UNSIGNED, &error));
849                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
850                 error = 0;
851                 goto nfsmout;
852         }
853
854         if (vp->v_type != VREG) {
855                 if (info.v3)
856                         error = EINVAL;
857                 else
858                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
859         }
860         if (!error) {
861             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
862                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
863         }
864         getret = VOP_GETATTR(vp, vap);
865         if (!error)
866                 error = getret;
867         if (error) {
868                 vput(vp);
869                 vp = NULL;
870                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
871                                       NFSX_POSTOPATTR(info.v3), &error));
872                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
873                 error = 0;
874                 goto nfsmout;
875         }
876
877         /*
878          * Calculate byte count to read
879          */
880
881         if (off >= vap->va_size)
882                 cnt = 0;
883         else if ((off + reqlen) > vap->va_size)
884                 cnt = vap->va_size - off;
885         else
886                 cnt = reqlen;
887
888         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
889                               NFSX_POSTOPORFATTR(info.v3) +
890                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
891                               &error));
892         if (info.v3) {
893                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
894                 *tl++ = nfs_true;
895                 fp = (struct nfs_fattr *)tl;
896                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
897         } else {
898                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
899                 fp = (struct nfs_fattr *)tl;
900                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
901         }
902         len = left = nfsm_rndup(cnt);
903         if (cnt > 0) {
904                 /*
905                  * Generate the mbuf list with the uio_iov ref. to it.
906                  */
907                 i = 0;
908                 m = m2 = info.mb;
909                 while (left > 0) {
910                         siz = min(M_TRAILINGSPACE(m), left);
911                         if (siz > 0) {
912                                 left -= siz;
913                                 i++;
914                         }
915                         if (left > 0) {
916                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
917                                 m->m_len = 0;
918                                 m2->m_next = m;
919                                 m2 = m;
920                         }
921                 }
922                 iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
923                 uiop->uio_iov = iv2 = iv;
924                 m = info.mb;
925                 left = len;
926                 i = 0;
927                 while (left > 0) {
928                         if (m == NULL)
929                                 panic("nfsrv_read iov");
930                         siz = min(M_TRAILINGSPACE(m), left);
931                         if (siz > 0) {
932                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
933                                 iv->iov_len = siz;
934                                 m->m_len += siz;
935                                 left -= siz;
936                                 iv++;
937                                 i++;
938                         }
939                         m = m->m_next;
940                 }
941                 uiop->uio_iovcnt = i;
942                 uiop->uio_offset = off;
943                 uiop->uio_resid = len;
944                 uiop->uio_rw = UIO_READ;
945                 uiop->uio_segflg = UIO_SYSSPACE;
946                 nh = nfsrv_sequential_heuristic(uiop, vp, 0);
947                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
948                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
949                 if (error == 0) {
950                         off = uiop->uio_offset;
951                         nh->nh_nextoff = off;
952                 }
953                 kfree((caddr_t)iv2, M_TEMP);
954                 if (error || (getret = VOP_GETATTR(vp, vap))) {
955                         if (!error)
956                                 error = getret;
957                         m_freem(info.mreq);
958                         info.mreq = NULL;
959                         vput(vp);
960                         vp = NULL;
961                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
962                                               NFSX_POSTOPATTR(info.v3),
963                                               &error));
964                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
965                         error = 0;
966                         goto nfsmout;
967                 }
968         } else {
969                 uiop->uio_resid = 0;
970         }
971         vput(vp);
972         vp = NULL;
973         nfsm_srvfattr(nfsd, vap, fp);
974         tlen = len - uiop->uio_resid;
975         cnt = cnt < tlen ? cnt : tlen;
976         tlen = nfsm_rndup(cnt);
977         if (len != tlen || tlen != cnt)
978                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
979         if (info.v3) {
980                 *tl++ = txdr_unsigned(cnt);
981                 if (cnt < reqlen)
982                         *tl++ = nfs_true;
983                 else
984                         *tl++ = nfs_false;
985         }
986         *tl = txdr_unsigned(cnt);
987 nfsmout:
988         *mrq = info.mreq;
989         if (vp)
990                 vput(vp);
991         return(error);
992 }
993
994 /*
995  * nfs write service
996  */
997 int
998 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
999             struct thread *td, struct mbuf **mrq)
1000 {
1001         struct sockaddr *nam = nfsd->nd_nam;
1002         struct ucred *cred = &nfsd->nd_cr;
1003         struct iovec *ivp;
1004         int i, cnt;
1005         struct mbuf *mp1;
1006         struct nfs_fattr *fp;
1007         struct iovec *iv;
1008         struct vattr va, forat;
1009         struct vattr *vap = &va;
1010         u_int32_t *tl;
1011         int error = 0, rdonly, len, forat_ret = 1;
1012         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1013         int stable = NFSV3WRITE_FILESYNC;
1014         struct vnode *vp = NULL;
1015         struct mount *mp = NULL;
1016         struct nfsheur *nh;
1017         nfsfh_t nfh;
1018         fhandle_t *fhp;
1019         struct uio io, *uiop = &io;
1020         struct nfsm_info info;
1021         off_t off;
1022
1023         info.mrep = nfsd->nd_mrep;
1024         info.mreq = NULL;
1025         info.md = nfsd->nd_md;
1026         info.dpos = nfsd->nd_dpos;
1027         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1028
1029         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1030         if (info.mrep == NULL) {
1031                 error = 0;
1032                 goto nfsmout;
1033         }
1034         fhp = &nfh.fh_generic;
1035         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1036         if (info.v3) {
1037                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1038                 off = fxdr_hyper(tl);
1039                 tl += 3;
1040                 stable = fxdr_unsigned(int, *tl++);
1041         } else {
1042                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1043                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1044                 tl += 2;
1045                 if (nfs_async)
1046                         stable = NFSV3WRITE_UNSTABLE;
1047         }
1048         retlen = len = fxdr_unsigned(int32_t, *tl);
1049         cnt = i = 0;
1050
1051         /*
1052          * For NFS Version 2, it is not obvious what a write of zero length
1053          * should do, but I might as well be consistent with Version 3,
1054          * which is to return ok so long as there are no permission problems.
1055          */
1056         if (len > 0) {
1057             zeroing = 1;
1058             mp1 = info.mrep;
1059             while (mp1) {
1060                 if (mp1 == info.md) {
1061                         zeroing = 0;
1062                         adjust = info.dpos - mtod(mp1, caddr_t);
1063                         mp1->m_len -= adjust;
1064                         if (mp1->m_len > 0 && adjust > 0)
1065                                 mp1->m_data += adjust;
1066                 }
1067                 if (zeroing)
1068                         mp1->m_len = 0;
1069                 else if (mp1->m_len > 0) {
1070                         i += mp1->m_len;
1071                         if (i > len) {
1072                                 mp1->m_len -= (i - len);
1073                                 zeroing = 1;
1074                         }
1075                         if (mp1->m_len > 0)
1076                                 cnt++;
1077                 }
1078                 mp1 = mp1->m_next;
1079             }
1080         }
1081         if (len > NFS_MAXDATA || len < 0 || i < len) {
1082                 error = EIO;
1083                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1084                                       2 * NFSX_UNSIGNED, &error));
1085                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1086                                  aftat_ret, vap);
1087                 error = 0;
1088                 goto nfsmout;
1089         }
1090         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1091                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1092         if (error) {
1093                 vp = NULL;
1094                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1095                                       2 * NFSX_UNSIGNED, &error));
1096                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1097                                  aftat_ret, vap);
1098                 error = 0;
1099                 goto nfsmout;
1100         }
1101         if (info.v3)
1102                 forat_ret = VOP_GETATTR(vp, &forat);
1103         if (vp->v_type != VREG) {
1104                 if (info.v3)
1105                         error = EINVAL;
1106                 else
1107                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1108         }
1109         if (!error) {
1110                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1111         }
1112         if (error) {
1113                 vput(vp);
1114                 vp = NULL;
1115                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1116                                       NFSX_WCCDATA(info.v3), &error));
1117                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1118                                  aftat_ret, vap);
1119                 error = 0;
1120                 goto nfsmout;
1121         }
1122
1123         if (len > 0) {
1124             ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1125             uiop->uio_iov = iv = ivp;
1126             uiop->uio_iovcnt = cnt;
1127             mp1 = info.mrep;
1128             while (mp1) {
1129                 if (mp1->m_len > 0) {
1130                         ivp->iov_base = mtod(mp1, caddr_t);
1131                         ivp->iov_len = mp1->m_len;
1132                         ivp++;
1133                 }
1134                 mp1 = mp1->m_next;
1135             }
1136
1137             /*
1138              * XXX
1139              * The IO_METASYNC flag indicates that all metadata (and not just
1140              * enough to ensure data integrity) mus be written to stable storage
1141              * synchronously.
1142              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1143              */
1144             if (stable == NFSV3WRITE_UNSTABLE)
1145                 ioflags = IO_NODELOCKED;
1146             else if (stable == NFSV3WRITE_DATASYNC)
1147                 ioflags = (IO_SYNC | IO_NODELOCKED);
1148             else
1149                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1150             uiop->uio_resid = len;
1151             uiop->uio_rw = UIO_WRITE;
1152             uiop->uio_segflg = UIO_SYSSPACE;
1153             uiop->uio_td = NULL;
1154             uiop->uio_offset = off;
1155             nh = nfsrv_sequential_heuristic(uiop, vp, 1);
1156             ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1157             error = VOP_WRITE(vp, uiop, ioflags, cred);
1158             if (error == 0)
1159                 nh->nh_nextoff = uiop->uio_offset;
1160             nfsstats.srvvop_writes++;
1161             kfree((caddr_t)iv, M_TEMP);
1162         }
1163         aftat_ret = VOP_GETATTR(vp, vap);
1164         vput(vp);
1165         vp = NULL;
1166         if (!error)
1167                 error = aftat_ret;
1168         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1169                               NFSX_PREOPATTR(info.v3) +
1170                               NFSX_POSTOPORFATTR(info.v3) +
1171                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1172                               &error));
1173         if (info.v3) {
1174                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1175                                  aftat_ret, vap);
1176                 if (error) {
1177                         error = 0;
1178                         goto nfsmout;
1179                 }
1180                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1181                 *tl++ = txdr_unsigned(retlen);
1182                 /*
1183                  * If nfs_async is set, then pretend the write was FILESYNC.
1184                  */
1185                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1186                         *tl++ = txdr_unsigned(stable);
1187                 else
1188                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1189                 /*
1190                  * Actually, there is no need to txdr these fields,
1191                  * but it may make the values more human readable,
1192                  * for debugging purposes.
1193                  */
1194                 if (nfsver.tv_sec == 0)
1195                         nfsver = boottime;
1196                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1197                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1198         } else {
1199                 fp = nfsm_build(&info, NFSX_V2FATTR);
1200                 nfsm_srvfattr(nfsd, vap, fp);
1201         }
1202 nfsmout:
1203         *mrq = info.mreq;
1204         if (vp)
1205                 vput(vp);
1206         return(error);
1207 }
1208
1209 /*
1210  * NFS write service with write gathering support. Called when
1211  * nfsrvw_procrastinate > 0.
1212  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1213  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1214  * Jan. 1994.
1215  */
1216 int
1217 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1218                   struct thread *td, struct mbuf **mrq)
1219 {
1220         struct iovec *ivp;
1221         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1222         struct nfs_fattr *fp;
1223         int i;
1224         struct iovec *iov;
1225         struct nfsrvw_delayhash *wpp;
1226         struct ucred *cred;
1227         struct vattr va, forat;
1228         u_int32_t *tl;
1229         int error = 0, rdonly, len, forat_ret = 1;
1230         int ioflags, aftat_ret = 1, adjust, zeroing;
1231         struct mbuf *mp1;
1232         struct vnode *vp = NULL;
1233         struct mount *mp = NULL;
1234         struct uio io, *uiop = &io;
1235         u_quad_t cur_usec;
1236         struct nfsm_info info;
1237
1238         info.mreq = NULL;
1239
1240         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1241 #ifndef nolint
1242         i = 0;
1243         len = 0;
1244 #endif
1245         if (*ndp) {
1246             nfsd = *ndp;
1247             *ndp = NULL;
1248             info.mrep = nfsd->nd_mrep;
1249             info.mreq = NULL;
1250             info.md = nfsd->nd_md;
1251             info.dpos = nfsd->nd_dpos;
1252             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1253             cred = &nfsd->nd_cr;
1254             LIST_INIT(&nfsd->nd_coalesce);
1255             nfsd->nd_mreq = NULL;
1256             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1257             cur_usec = nfs_curusec();
1258             nfsd->nd_time = cur_usec +
1259                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1260     
1261             /*
1262              * Now, get the write header..
1263              */
1264             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1265             if (info.v3) {
1266                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1267                 nfsd->nd_off = fxdr_hyper(tl);
1268                 tl += 3;
1269                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1270             } else {
1271                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1272                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1273                 tl += 2;
1274                 if (nfs_async)
1275                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1276             }
1277             len = fxdr_unsigned(int32_t, *tl);
1278             nfsd->nd_len = len;
1279             nfsd->nd_eoff = nfsd->nd_off + len;
1280     
1281             /*
1282              * Trim the header out of the mbuf list and trim off any trailing
1283              * junk so that the mbuf list has only the write data.
1284              */
1285             zeroing = 1;
1286             i = 0;
1287             mp1 = info.mrep;
1288             while (mp1) {
1289                 if (mp1 == info.md) {
1290                     zeroing = 0;
1291                     adjust = info.dpos - mtod(mp1, caddr_t);
1292                     mp1->m_len -= adjust;
1293                     if (mp1->m_len > 0 && adjust > 0)
1294                         mp1->m_data += adjust;
1295                 }
1296                 if (zeroing)
1297                     mp1->m_len = 0;
1298                 else {
1299                     i += mp1->m_len;
1300                     if (i > len) {
1301                         mp1->m_len -= (i - len);
1302                         zeroing = 1;
1303                     }
1304                 }
1305                 mp1 = mp1->m_next;
1306             }
1307             if (len > NFS_MAXDATA || len < 0  || i < len) {
1308 nfsmout:
1309                 m_freem(info.mrep);
1310                 info.mrep = NULL;
1311                 error = EIO;
1312                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1313                 if (info.v3) {
1314                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1315                                      aftat_ret, &va);
1316                 }
1317                 nfsd->nd_mreq = info.mreq;
1318                 nfsd->nd_mrep = NULL;
1319                 nfsd->nd_time = 0;
1320             }
1321     
1322             /*
1323              * Add this entry to the hash and time queues.
1324              */
1325             owp = NULL;
1326             wp = slp->ns_tq.lh_first;
1327             while (wp && wp->nd_time < nfsd->nd_time) {
1328                 owp = wp;
1329                 wp = wp->nd_tq.le_next;
1330             }
1331             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1332             if (owp) {
1333                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1334             } else {
1335                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1336             }
1337             if (nfsd->nd_mrep) {
1338                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1339                 owp = NULL;
1340                 wp = wpp->lh_first;
1341                 while (wp &&
1342                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1343                     owp = wp;
1344                     wp = wp->nd_hash.le_next;
1345                 }
1346                 while (wp && wp->nd_off < nfsd->nd_off &&
1347                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1348                     owp = wp;
1349                     wp = wp->nd_hash.le_next;
1350                 }
1351                 if (owp) {
1352                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1353
1354                     /*
1355                      * Search the hash list for overlapping entries and
1356                      * coalesce.
1357                      */
1358                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1359                         wp = nfsd->nd_hash.le_next;
1360                         if (NFSW_SAMECRED(owp, nfsd))
1361                             nfsrvw_coalesce(owp, nfsd);
1362                     }
1363                 } else {
1364                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1365                 }
1366             }
1367         }
1368     
1369         /*
1370          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1371          * and generate the associated reply mbuf list(s).
1372          */
1373 loop1:
1374         cur_usec = nfs_curusec();
1375         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1376                 owp = nfsd->nd_tq.le_next;
1377                 if (nfsd->nd_time > cur_usec)
1378                     break;
1379                 if (nfsd->nd_mreq)
1380                     continue;
1381                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1382                 LIST_REMOVE(nfsd, nd_tq);
1383                 LIST_REMOVE(nfsd, nd_hash);
1384                 info.mrep = nfsd->nd_mrep;
1385                 info.mreq = NULL;
1386                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1387                 nfsd->nd_mrep = NULL;
1388                 cred = &nfsd->nd_cr;
1389                 forat_ret = aftat_ret = 1;
1390                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1391                                      nfsd->nd_nam, &rdonly,
1392                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1393                 if (!error) {
1394                     if (info.v3)
1395                         forat_ret = VOP_GETATTR(vp, &forat);
1396                     if (vp->v_type != VREG) {
1397                         if (info.v3)
1398                             error = EINVAL;
1399                         else
1400                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1401                     }
1402                 } else {
1403                     vp = NULL;
1404                 }
1405                 if (!error) {
1406                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1407                 }
1408     
1409                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1410                     ioflags = IO_NODELOCKED;
1411                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1412                     ioflags = (IO_SYNC | IO_NODELOCKED);
1413                 else
1414                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1415                 uiop->uio_rw = UIO_WRITE;
1416                 uiop->uio_segflg = UIO_SYSSPACE;
1417                 uiop->uio_td = NULL;
1418                 uiop->uio_offset = nfsd->nd_off;
1419                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1420                 if (uiop->uio_resid > 0) {
1421                     mp1 = info.mrep;
1422                     i = 0;
1423                     while (mp1) {
1424                         if (mp1->m_len > 0)
1425                             i++;
1426                         mp1 = mp1->m_next;
1427                     }
1428                     uiop->uio_iovcnt = i;
1429                     iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1430                     uiop->uio_iov = ivp = iov;
1431                     mp1 = info.mrep;
1432                     while (mp1) {
1433                         if (mp1->m_len > 0) {
1434                             ivp->iov_base = mtod(mp1, caddr_t);
1435                             ivp->iov_len = mp1->m_len;
1436                             ivp++;
1437                         }
1438                         mp1 = mp1->m_next;
1439                     }
1440                     if (!error) {
1441                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1442                         nfsstats.srvvop_writes++;
1443                     }
1444                     kfree((caddr_t)iov, M_TEMP);
1445                 }
1446                 m_freem(info.mrep);
1447                 info.mrep = NULL;
1448                 if (vp) {
1449                     aftat_ret = VOP_GETATTR(vp, &va);
1450                     vput(vp);
1451                     vp = NULL;
1452                 }
1453
1454                 /*
1455                  * Loop around generating replies for all write rpcs that have
1456                  * now been completed.
1457                  */
1458                 swp = nfsd;
1459                 do {
1460                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1461                     if (error) {
1462                         nfsm_writereply(&info, nfsd, slp, error,
1463                                         NFSX_WCCDATA(info.v3));
1464                         if (info.v3) {
1465                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1466                                              aftat_ret, &va);
1467                         }
1468                     } else {
1469                         nfsm_writereply(&info, nfsd, slp, error,
1470                                         NFSX_PREOPATTR(info.v3) +
1471                                         NFSX_POSTOPORFATTR(info.v3) +
1472                                         2 * NFSX_UNSIGNED +
1473                                         NFSX_WRITEVERF(info.v3));
1474                         if (info.v3) {
1475                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1476                                              aftat_ret, &va);
1477                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1478                             *tl++ = txdr_unsigned(nfsd->nd_len);
1479                             *tl++ = txdr_unsigned(swp->nd_stable);
1480                             /*
1481                              * Actually, there is no need to txdr these fields,
1482                              * but it may make the values more human readable,
1483                              * for debugging purposes.
1484                              */
1485                             if (nfsver.tv_sec == 0)
1486                                     nfsver = boottime;
1487                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1488                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1489                         } else {
1490                             fp = nfsm_build(&info, NFSX_V2FATTR);
1491                             nfsm_srvfattr(nfsd, &va, fp);
1492                         }
1493                     }
1494                     nfsd->nd_mreq = info.mreq;
1495                     if (nfsd->nd_mrep)
1496                         panic("nfsrv_write: nd_mrep not free");
1497
1498                     /*
1499                      * Done. Put it at the head of the timer queue so that
1500                      * the final phase can return the reply.
1501                      */
1502                     if (nfsd != swp) {
1503                         nfsd->nd_time = 0;
1504                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1505                     }
1506                     nfsd = swp->nd_coalesce.lh_first;
1507                     if (nfsd) {
1508                         LIST_REMOVE(nfsd, nd_tq);
1509                     }
1510                 } while (nfsd);
1511                 swp->nd_time = 0;
1512                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1513                 goto loop1;
1514         }
1515
1516         /*
1517          * Search for a reply to return.
1518          */
1519         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1520                 if (nfsd->nd_mreq) {
1521                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1522                     LIST_REMOVE(nfsd, nd_tq);
1523                     break;
1524                 }
1525         }
1526         if (nfsd) {
1527                 *ndp = nfsd;
1528                 *mrq = nfsd->nd_mreq;
1529         } else {
1530                 *ndp = NULL;
1531                 *mrq = NULL;
1532         }
1533         return (0);
1534 }
1535
1536 /*
1537  * Coalesce the write request nfsd into owp. To do this we must:
1538  * - remove nfsd from the queues
1539  * - merge nfsd->nd_mrep into owp->nd_mrep
1540  * - update the nd_eoff and nd_stable for owp
1541  * - put nfsd on owp's nd_coalesce list
1542  * NB: Must be called at splsoftclock().
1543  */
1544 static void
1545 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1546 {
1547         int overlap;
1548         struct mbuf *mp1;
1549         struct nfsrv_descript *p;
1550
1551         NFS_DPF(WG, ("C%03x-%03x",
1552                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1553         LIST_REMOVE(nfsd, nd_hash);
1554         LIST_REMOVE(nfsd, nd_tq);
1555         if (owp->nd_eoff < nfsd->nd_eoff) {
1556             overlap = owp->nd_eoff - nfsd->nd_off;
1557             if (overlap < 0)
1558                 panic("nfsrv_coalesce: bad off");
1559             if (overlap > 0)
1560                 m_adj(nfsd->nd_mrep, overlap);
1561             mp1 = owp->nd_mrep;
1562             while (mp1->m_next)
1563                 mp1 = mp1->m_next;
1564             mp1->m_next = nfsd->nd_mrep;
1565             owp->nd_eoff = nfsd->nd_eoff;
1566         } else
1567             m_freem(nfsd->nd_mrep);
1568         nfsd->nd_mrep = NULL;
1569         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1570             owp->nd_stable = NFSV3WRITE_FILESYNC;
1571         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1572             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1573             owp->nd_stable = NFSV3WRITE_DATASYNC;
1574         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1575
1576         /*
1577          * If nfsd had anything else coalesced into it, transfer them
1578          * to owp, otherwise their replies will never get sent.
1579          */
1580         for (p = nfsd->nd_coalesce.lh_first; p;
1581              p = nfsd->nd_coalesce.lh_first) {
1582             LIST_REMOVE(p, nd_tq);
1583             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1584         }
1585 }
1586
1587 /*
1588  * nfs create service
1589  * now does a truncate to 0 length via. setattr if it already exists
1590  */
1591 int
1592 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1593              struct thread *td, struct mbuf **mrq)
1594 {
1595         struct sockaddr *nam = nfsd->nd_nam;
1596         struct ucred *cred = &nfsd->nd_cr;
1597         struct nfs_fattr *fp;
1598         struct vattr va, dirfor, diraft;
1599         struct vattr *vap = &va;
1600         struct nfsv2_sattr *sp;
1601         u_int32_t *tl;
1602         struct nlookupdata nd;
1603         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1604         udev_t rdev = NOUDEV;
1605         caddr_t cp;
1606         int how, exclusive_flag = 0;
1607         struct vnode *dirp;
1608         struct vnode *dvp;
1609         struct vnode *vp;
1610         struct mount *mp;
1611         nfsfh_t nfh;
1612         fhandle_t *fhp;
1613         u_quad_t tempsize;
1614         u_char cverf[NFSX_V3CREATEVERF];
1615         struct nfsm_info info;
1616
1617         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1618         nlookup_zero(&nd);
1619         dirp = NULL;
1620         dvp = NULL;
1621         vp = NULL;
1622
1623         info.mrep = nfsd->nd_mrep;
1624         info.mreq = NULL;
1625         info.md = nfsd->nd_md;
1626         info.dpos = nfsd->nd_dpos;
1627         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1628
1629         fhp = &nfh.fh_generic;
1630         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1631         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1632
1633         /*
1634          * Call namei and do initial cleanup to get a few things
1635          * out of the way.  If we get an initial error we cleanup
1636          * and return here to avoid special-casing the invalid nd
1637          * structure through the rest of the case.  dirp may be
1638          * set even if an error occurs, but the nd structure will not
1639          * be valid at all if an error occurs so we have to invalidate it
1640          * prior to calling nfsm_reply ( which might goto nfsmout ).
1641          */
1642         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1643                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1644                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1645         mp = vfs_getvfs(&fhp->fh_fsid);
1646
1647         if (dirp) {
1648                 if (info.v3) {
1649                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1650                 } else {
1651                         vrele(dirp);
1652                         dirp = NULL;
1653                 }
1654         }
1655         if (error) {
1656                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1657                                       NFSX_WCCDATA(info.v3), &error));
1658                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1659                                  diraft_ret, &diraft);
1660                 error = 0;
1661                 goto nfsmout;
1662         }
1663
1664         /*
1665          * No error.  Continue.  State:
1666          *
1667          *      dirp            may be valid
1668          *      vp              may be valid or NULL if the target does not
1669          *                      exist.
1670          *      dvp             is valid
1671          *
1672          * The error state is set through the code and we may also do some
1673          * opportunistic releasing of vnodes to avoid holding locks through
1674          * NFS I/O.  The cleanup at the end is a catch-all
1675          */
1676
1677         VATTR_NULL(vap);
1678         if (info.v3) {
1679                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1680                 how = fxdr_unsigned(int, *tl);
1681                 switch (how) {
1682                 case NFSV3CREATE_GUARDED:
1683                         if (vp) {
1684                                 error = EEXIST;
1685                                 break;
1686                         }
1687                         /* fall through */
1688                 case NFSV3CREATE_UNCHECKED:
1689                         ERROROUT(nfsm_srvsattr(&info, vap));
1690                         break;
1691                 case NFSV3CREATE_EXCLUSIVE:
1692                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1693                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1694                         exclusive_flag = 1;
1695                         break;
1696                 };
1697                 vap->va_type = VREG;
1698         } else {
1699                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1700                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1701                 if (vap->va_type == VNON)
1702                         vap->va_type = VREG;
1703                 vap->va_mode = nfstov_mode(sp->sa_mode);
1704                 switch (vap->va_type) {
1705                 case VREG:
1706                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1707                         if (tsize != -1)
1708                                 vap->va_size = (u_quad_t)tsize;
1709                         break;
1710                 case VCHR:
1711                 case VBLK:
1712                 case VFIFO:
1713                         rdev = fxdr_unsigned(long, sp->sa_size);
1714                         break;
1715                 default:
1716                         break;
1717                 };
1718         }
1719
1720         /*
1721          * Iff doesn't exist, create it
1722          * otherwise just truncate to 0 length
1723          *   should I set the mode too ?
1724          *
1725          * The only possible error we can have at this point is EEXIST. 
1726          * nd.ni_vp will also be non-NULL in that case.
1727          */
1728         if (vp == NULL) {
1729                 if (vap->va_mode == (mode_t)VNOVAL)
1730                         vap->va_mode = 0;
1731                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1732                         vn_unlock(dvp);
1733                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1734                                             nd.nl_cred, vap);
1735                         vrele(dvp);
1736                         dvp = NULL;
1737                         if (error == 0) {
1738                                 if (exclusive_flag) {
1739                                         exclusive_flag = 0;
1740                                         VATTR_NULL(vap);
1741                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1742                                                 NFSX_V3CREATEVERF);
1743                                         error = VOP_SETATTR(vp, vap, cred);
1744                                 }
1745                         }
1746                 } else if (
1747                         vap->va_type == VCHR || 
1748                         vap->va_type == VBLK ||
1749                         vap->va_type == VFIFO
1750                 ) {
1751                         /*
1752                          * Handle SysV FIFO node special cases.  All other
1753                          * devices require super user to access.
1754                          */
1755                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1756                                 vap->va_type = VFIFO;
1757                         if (vap->va_type != VFIFO &&
1758                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1759                                 goto nfsmreply0;
1760                         }
1761                         vap->va_rmajor = umajor(rdev);
1762                         vap->va_rminor = uminor(rdev);
1763
1764                         vn_unlock(dvp);
1765                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1766                         vrele(dvp);
1767                         dvp = NULL;
1768                         if (error)
1769                                 goto nfsmreply0;
1770 #if 0
1771                         /*
1772                          * XXX what is this junk supposed to do ?
1773                          */
1774
1775                         vput(vp);
1776                         vp = NULL;
1777
1778                         /*
1779                          * release dvp prior to lookup
1780                          */
1781                         vput(dvp);
1782                         dvp = NULL;
1783
1784                         /*
1785                          * Setup for lookup. 
1786                          *
1787                          * Even though LOCKPARENT was cleared, ni_dvp may
1788                          * be garbage. 
1789                          */
1790                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1791                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1792                         nd.ni_cnd.cn_td = td;
1793                         nd.ni_cnd.cn_cred = cred;
1794
1795                         error = lookup(&nd);
1796                         nd.ni_dvp = NULL;
1797
1798                         if (error != 0) {
1799                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1800                                                       0, &error));
1801                                 /* fall through on certain errors */
1802                         }
1803                         nfsrv_object_create(nd.ni_vp);
1804                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1805                                 error = EINVAL;
1806                                 goto nfsmreply0;
1807                         }
1808 #endif
1809                 } else {
1810                         error = ENXIO;
1811                 }
1812         } else {
1813                 if (vap->va_size != -1) {
1814                         error = nfsrv_access(mp, vp, VWRITE, cred,
1815                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1816                         if (!error) {
1817                                 tempsize = vap->va_size;
1818                                 VATTR_NULL(vap);
1819                                 vap->va_size = tempsize;
1820                                 error = VOP_SETATTR(vp, vap, cred);
1821                         }
1822                 }
1823         }
1824
1825         if (!error) {
1826                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1827                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1828                 if (!error)
1829                         error = VOP_GETATTR(vp, vap);
1830         }
1831         if (info.v3) {
1832                 if (exclusive_flag && !error &&
1833                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1834                         error = EEXIST;
1835                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1836                 vrele(dirp);
1837                 dirp = NULL;
1838         }
1839         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1840                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1841                               NFSX_WCCDATA(info.v3),
1842                               &error));
1843         if (info.v3) {
1844                 if (!error) {
1845                         nfsm_srvpostop_fh(&info, fhp);
1846                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1847                 }
1848                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1849                                  diraft_ret, &diraft);
1850                 error = 0;
1851         } else {
1852                 nfsm_srvfhtom(&info, fhp);
1853                 fp = nfsm_build(&info, NFSX_V2FATTR);
1854                 nfsm_srvfattr(nfsd, vap, fp);
1855         }
1856         goto nfsmout;
1857
1858 nfsmreply0:
1859         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1860         error = 0;
1861         /* fall through */
1862
1863 nfsmout:
1864         *mrq = info.mreq;
1865         if (dirp)
1866                 vrele(dirp);
1867         nlookup_done(&nd);
1868         if (dvp) {
1869                 if (dvp == vp)
1870                         vrele(dvp);
1871                 else
1872                         vput(dvp);
1873         }
1874         if (vp)
1875                 vput(vp);
1876         return (error);
1877 }
1878
1879 /*
1880  * nfs v3 mknod service
1881  */
1882 int
1883 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1884             struct thread *td, struct mbuf **mrq)
1885 {
1886         struct sockaddr *nam = nfsd->nd_nam;
1887         struct ucred *cred = &nfsd->nd_cr;
1888         struct vattr va, dirfor, diraft;
1889         struct vattr *vap = &va;
1890         u_int32_t *tl;
1891         struct nlookupdata nd;
1892         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1893         enum vtype vtyp;
1894         struct vnode *dirp;
1895         struct vnode *dvp;
1896         struct vnode *vp;
1897         nfsfh_t nfh;
1898         fhandle_t *fhp;
1899         struct nfsm_info info;
1900
1901         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1902         nlookup_zero(&nd);
1903         dirp = NULL;
1904         dvp = NULL;
1905         vp = NULL;
1906
1907         info.mrep = nfsd->nd_mrep;
1908         info.mreq = NULL;
1909         info.md = nfsd->nd_md;
1910         info.dpos = nfsd->nd_dpos;
1911
1912         fhp = &nfh.fh_generic;
1913         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1914         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1915
1916         /*
1917          * Handle nfs_namei() call.  If an error occurs, the nd structure
1918          * is not valid.  However, nfsm_*() routines may still jump to
1919          * nfsmout.
1920          */
1921
1922         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1923                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1924                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1925         if (dirp)
1926                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1927         if (error) {
1928                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1929                            NFSX_WCCDATA(1), &error));
1930                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1931                                  diraft_ret, &diraft);
1932                 error = 0;
1933                 goto nfsmout;
1934         }
1935         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1936         vtyp = nfsv3tov_type(*tl);
1937         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1938                 error = NFSERR_BADTYPE;
1939                 goto out;
1940         }
1941         VATTR_NULL(vap);
1942         ERROROUT(nfsm_srvsattr(&info, vap));
1943         if (vtyp == VCHR || vtyp == VBLK) {
1944                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1945                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1946                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1947         }
1948
1949         /*
1950          * Iff doesn't exist, create it.
1951          */
1952         if (vp) {
1953                 error = EEXIST;
1954                 goto out;
1955         }
1956         vap->va_type = vtyp;
1957         if (vap->va_mode == (mode_t)VNOVAL)
1958                 vap->va_mode = 0;
1959         if (vtyp == VSOCK) {
1960                 vn_unlock(dvp);
1961                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1962                 vrele(dvp);
1963                 dvp = NULL;
1964         } else {
1965                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1966                         goto out;
1967
1968                 vn_unlock(dvp);
1969                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1970                 vrele(dvp);
1971                 dvp = NULL;
1972                 if (error)
1973                         goto out;
1974         }
1975
1976         /*
1977          * send response, cleanup, return.
1978          */
1979 out:
1980         nlookup_done(&nd);
1981         if (dvp) {
1982                 if (dvp == vp)
1983                         vrele(dvp);
1984                 else
1985                         vput(dvp);
1986                 dvp = NULL;
1987         }
1988         if (!error) {
1989                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1990                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1991                 if (!error)
1992                         error = VOP_GETATTR(vp, vap);
1993         }
1994         if (vp) {
1995                 vput(vp);
1996                 vp = NULL;
1997         }
1998         diraft_ret = VOP_GETATTR(dirp, &diraft);
1999         if (dirp) {
2000                 vrele(dirp);
2001                 dirp = NULL;
2002         }
2003         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2004                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
2005                               NFSX_WCCDATA(1), &error));
2006         if (!error) {
2007                 nfsm_srvpostop_fh(&info, fhp);
2008                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2009         }
2010         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2011                          diraft_ret, &diraft);
2012         *mrq = info.mreq;
2013         return (0);
2014 nfsmout:
2015         *mrq = info.mreq;
2016         if (dirp)
2017                 vrele(dirp);
2018         nlookup_done(&nd);
2019         if (dvp) {
2020                 if (dvp == vp)
2021                         vrele(dvp);
2022                 else
2023                         vput(dvp);
2024         }
2025         if (vp)
2026                 vput(vp);
2027         return (error);
2028 }
2029
2030 /*
2031  * nfs remove service
2032  */
2033 int
2034 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2035              struct thread *td, struct mbuf **mrq)
2036 {
2037         struct sockaddr *nam = nfsd->nd_nam;
2038         struct ucred *cred = &nfsd->nd_cr;
2039         struct nlookupdata nd;
2040         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2041         struct vnode *dirp;
2042         struct vnode *dvp;
2043         struct vnode *vp;
2044         struct vattr dirfor, diraft;
2045         nfsfh_t nfh;
2046         fhandle_t *fhp;
2047         struct nfsm_info info;
2048
2049         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2050         nlookup_zero(&nd);
2051         dirp = NULL;
2052         dvp = NULL;
2053         vp = NULL;
2054
2055         info.mrep = nfsd->nd_mrep;
2056         info.mreq = NULL;
2057         info.md = nfsd->nd_md;
2058         info.dpos = nfsd->nd_dpos;
2059         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2060
2061         fhp = &nfh.fh_generic;
2062         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2063         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2064
2065         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2066                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2067                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2068         if (dirp) {
2069                 if (info.v3)
2070                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2071         }
2072         if (error == 0) {
2073                 if (vp->v_type == VDIR) {
2074                         error = EPERM;          /* POSIX */
2075                         goto out;
2076                 }
2077                 /*
2078                  * The root of a mounted filesystem cannot be deleted.
2079                  */
2080                 if (vp->v_flag & VROOT) {
2081                         error = EBUSY;
2082                         goto out;
2083                 }
2084 out:
2085                 if (!error) {
2086                         if (dvp != vp)
2087                                 vn_unlock(dvp);
2088                         if (vp) {
2089                                 vput(vp);
2090                                 vp = NULL;
2091                         }
2092                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2093                         vrele(dvp);
2094                         dvp = NULL;
2095                 }
2096         }
2097         if (dirp && info.v3)
2098                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2099         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2100         if (info.v3) {
2101                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2102                                  diraft_ret, &diraft);
2103                 error = 0;
2104         }
2105 nfsmout:
2106         *mrq = info.mreq;
2107         nlookup_done(&nd);
2108         if (dirp)
2109                 vrele(dirp);
2110         if (dvp) {
2111                 if (dvp == vp)
2112                         vrele(dvp);
2113                 else
2114                         vput(dvp);
2115         }
2116         if (vp)
2117                 vput(vp);
2118         return(error);
2119 }
2120
2121 /*
2122  * nfs rename service
2123  */
2124 int
2125 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2126              struct thread *td, struct mbuf **mrq)
2127 {
2128         struct sockaddr *nam = nfsd->nd_nam;
2129         struct ucred *cred = &nfsd->nd_cr;
2130         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2131         int tdirfor_ret = 1, tdiraft_ret = 1;
2132         struct nlookupdata fromnd, tond;
2133         struct vnode *fvp, *fdirp, *fdvp;
2134         struct vnode *tvp, *tdirp, *tdvp;
2135         struct namecache *ncp;
2136         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2137         nfsfh_t fnfh, tnfh;
2138         fhandle_t *ffhp, *tfhp;
2139         uid_t saved_uid;
2140         struct nfsm_info info;
2141
2142         info.mrep = nfsd->nd_mrep;
2143         info.mreq = NULL;
2144         info.md = nfsd->nd_md;
2145         info.dpos = nfsd->nd_dpos;
2146         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2147
2148         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2149 #ifndef nolint
2150         fvp = NULL;
2151 #endif
2152         ffhp = &fnfh.fh_generic;
2153         tfhp = &tnfh.fh_generic;
2154
2155         /*
2156          * Clear fields incase goto nfsmout occurs from macro.
2157          */
2158
2159         nlookup_zero(&fromnd);
2160         nlookup_zero(&tond);
2161         fdirp = NULL;
2162         tdirp = NULL;
2163
2164         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2165         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2166
2167         /*
2168          * Remember our original uid so that we can reset cr_uid before
2169          * the second nfs_namei() call, in case it is remapped.
2170          */
2171         saved_uid = cred->cr_uid;
2172         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2173                           NULL, NULL,
2174                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2175                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2176         if (fdirp) {
2177                 if (info.v3)
2178                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2179         }
2180         if (error) {
2181                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2182                                       2 * NFSX_WCCDATA(info.v3), &error));
2183                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2184                                  fdiraft_ret, &fdiraft);
2185                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2186                                  tdiraft_ret, &tdiraft);
2187                 error = 0;
2188                 goto nfsmout;
2189         }
2190
2191         /*
2192          * We have to unlock the from ncp before we can safely lookup
2193          * the target ncp.
2194          */
2195         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2196         cache_unlock(&fromnd.nl_nch);
2197         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2198         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2199         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2200         cred->cr_uid = saved_uid;
2201
2202         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2203                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2204                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2205         if (tdirp) {
2206                 if (info.v3)
2207                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2208         }
2209         if (error)
2210                 goto out1;
2211
2212         /*
2213          * relock the source
2214          */
2215         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2216                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2217         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2218                 cache_lock(&fromnd.nl_nch);
2219                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2220         } else {
2221                 cache_unlock(&tond.nl_nch);
2222                 cache_lock(&fromnd.nl_nch);
2223                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2224                 cache_lock(&tond.nl_nch);
2225                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2226         }
2227         fromnd.nl_flags |= NLC_NCPISLOCKED;
2228
2229         fvp = fromnd.nl_nch.ncp->nc_vp;
2230         tvp = tond.nl_nch.ncp->nc_vp;
2231
2232         /*
2233          * Set fdvp and tdvp.  We haven't done all the topology checks
2234          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2235          * point).  If we get through the checks these will be guarenteed
2236          * to be non-NULL.
2237          *
2238          * Holding the children ncp's should be sufficient to prevent
2239          * fdvp and tdvp ripouts.
2240          */
2241         if (fromnd.nl_nch.ncp->nc_parent)
2242                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2243         else
2244                 fdvp = NULL;
2245         if (tond.nl_nch.ncp->nc_parent)
2246                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2247         else
2248                 tdvp = NULL;
2249
2250         if (tvp != NULL) {
2251                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2252                         if (info.v3)
2253                                 error = EEXIST;
2254                         else
2255                                 error = EISDIR;
2256                         goto out;
2257                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2258                         if (info.v3)
2259                                 error = EEXIST;
2260                         else
2261                                 error = ENOTDIR;
2262                         goto out;
2263                 }
2264                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2265                         if (info.v3)
2266                                 error = EXDEV;
2267                         else
2268                                 error = ENOTEMPTY;
2269                         goto out;
2270                 }
2271         }
2272         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2273                 if (info.v3)
2274                         error = EXDEV;
2275                 else
2276                         error = ENOTEMPTY;
2277                 goto out;
2278         }
2279         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2280                 if (info.v3)
2281                         error = EXDEV;
2282                 else
2283                         error = ENOTEMPTY;
2284                 goto out;
2285         }
2286         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2287                 if (info.v3)
2288                         error = EINVAL;
2289                 else
2290                         error = ENOTEMPTY;
2291         }
2292
2293         /*
2294          * You cannot rename a source into itself or a subdirectory of itself.
2295          * We check this by travsering the target directory upwards looking
2296          * for a match against the source.
2297          */
2298         if (error == 0) {
2299                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2300                         if (fromnd.nl_nch.ncp == ncp) {
2301                                 error = EINVAL;
2302                                 break;
2303                         }
2304                 }
2305         }
2306
2307         /*
2308          * If source is the same as the destination (that is the
2309          * same vnode with the same name in the same directory),
2310          * then there is nothing to do.
2311          */
2312         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2313                 error = -1;
2314 out:
2315         if (!error) {
2316                 /*
2317                  * The VOP_NRENAME function releases all vnode references &
2318                  * locks prior to returning so we need to clear the pointers
2319                  * to bypass cleanup code later on.
2320                  */
2321                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2322                                     fdvp, tdvp, tond.nl_cred);
2323         } else {
2324                 if (error == -1)
2325                         error = 0;
2326         }
2327         /* fall through */
2328
2329 out1:
2330         if (fdirp)
2331                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2332         if (tdirp)
2333                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2334         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2335                               2 * NFSX_WCCDATA(info.v3), &error));
2336         if (info.v3) {
2337                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2338                                  fdiraft_ret, &fdiraft);
2339                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2340                                  tdiraft_ret, &tdiraft);
2341         }
2342         error = 0;
2343         /* fall through */
2344
2345 nfsmout:
2346         *mrq = info.mreq;
2347         if (tdirp)
2348                 vrele(tdirp);
2349         nlookup_done(&tond);
2350         if (fdirp)
2351                 vrele(fdirp);
2352         nlookup_done(&fromnd);
2353         return (error);
2354 }
2355
2356 /*
2357  * nfs link service
2358  */
2359 int
2360 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2361            struct thread *td, struct mbuf **mrq)
2362 {
2363         struct sockaddr *nam = nfsd->nd_nam;
2364         struct ucred *cred = &nfsd->nd_cr;
2365         struct nlookupdata nd;
2366         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2367         int getret = 1;
2368         struct vnode *dirp;
2369         struct vnode *dvp;
2370         struct vnode *vp;
2371         struct vnode *xp;
2372         struct mount *mp;
2373         struct mount *xmp;
2374         struct vattr dirfor, diraft, at;
2375         nfsfh_t nfh, dnfh;
2376         fhandle_t *fhp, *dfhp;
2377         struct nfsm_info info;
2378
2379         info.mrep = nfsd->nd_mrep;
2380         info.mreq = NULL;
2381         info.md = nfsd->nd_md;
2382         info.dpos = nfsd->nd_dpos;
2383         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2384
2385         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2386         nlookup_zero(&nd);
2387         dirp = dvp = vp = xp = NULL;
2388         mp = xmp = NULL;
2389
2390         fhp = &nfh.fh_generic;
2391         dfhp = &dnfh.fh_generic;
2392         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2393         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2394         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2395
2396         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2397                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2398         if (error) {
2399                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2400                                       NFSX_POSTOPATTR(info.v3) +
2401                                       NFSX_WCCDATA(info.v3),
2402                                       &error));
2403                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2404                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2405                                  diraft_ret, &diraft);
2406                 xp = NULL;
2407                 error = 0;
2408                 goto nfsmout;
2409         }
2410         if (xp->v_type == VDIR) {
2411                 error = EPERM;          /* POSIX */
2412                 goto out1;
2413         }
2414
2415         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2416                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2417                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2418         if (dirp) {
2419                 if (info.v3)
2420                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2421         }
2422         if (error)
2423                 goto out1;
2424
2425         if (vp != NULL) {
2426                 error = EEXIST;
2427                 goto out;
2428         }
2429         if (xp->v_mount != dvp->v_mount)
2430                 error = EXDEV;
2431 out:
2432         if (!error) {
2433                 vn_unlock(dvp);
2434                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2435                 vrele(dvp);
2436                 dvp = NULL;
2437         }
2438         /* fall through */
2439
2440 out1:
2441         if (info.v3)
2442                 getret = VOP_GETATTR(xp, &at);
2443         if (dirp)
2444                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2445         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2446                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2447                               &error));
2448         if (info.v3) {
2449                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2450                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2451                                  diraft_ret, &diraft);
2452                 error = 0;
2453         }
2454         /* fall through */
2455
2456 nfsmout:
2457         *mrq = info.mreq;
2458         nlookup_done(&nd);
2459         if (dirp)
2460                 vrele(dirp);
2461         if (xp)
2462                 vrele(xp);
2463         if (dvp) {
2464                 if (dvp == vp)
2465                         vrele(dvp);
2466                 else
2467                         vput(dvp);
2468         }
2469         if (vp)
2470                 vput(vp);
2471         return(error);
2472 }
2473
2474 /*
2475  * nfs symbolic link service
2476  */
2477 int
2478 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2479               struct thread *td, struct mbuf **mrq)
2480 {
2481         struct sockaddr *nam = nfsd->nd_nam;
2482         struct ucred *cred = &nfsd->nd_cr;
2483         struct vattr va, dirfor, diraft;
2484         struct nlookupdata nd;
2485         struct vattr *vap = &va;
2486         struct nfsv2_sattr *sp;
2487         char *pathcp = NULL;
2488         struct uio io;
2489         struct iovec iv;
2490         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2491         struct vnode *dirp;
2492         struct vnode *vp;
2493         struct vnode *dvp;
2494         nfsfh_t nfh;
2495         fhandle_t *fhp;
2496         struct nfsm_info info;
2497
2498         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2499         nlookup_zero(&nd);
2500         dirp = NULL;
2501         dvp = NULL;
2502         vp = NULL;
2503
2504         info.mrep = nfsd->nd_mrep;
2505         info.mreq =  NULL;
2506         info.md = nfsd->nd_md;
2507         info.dpos = nfsd->nd_dpos;
2508         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2509
2510         fhp = &nfh.fh_generic;
2511         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2512         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2513
2514         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2515                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2516                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2517         if (dirp) {
2518                 if (info.v3)
2519                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2520         }
2521         if (error)
2522                 goto out;
2523
2524         VATTR_NULL(vap);
2525         if (info.v3) {
2526                 ERROROUT(nfsm_srvsattr(&info, vap));
2527         }
2528         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2529         pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2530         iv.iov_base = pathcp;
2531         iv.iov_len = len2;
2532         io.uio_resid = len2;
2533         io.uio_offset = 0;
2534         io.uio_iov = &iv;
2535         io.uio_iovcnt = 1;
2536         io.uio_segflg = UIO_SYSSPACE;
2537         io.uio_rw = UIO_READ;
2538         io.uio_td = NULL;
2539         ERROROUT(nfsm_mtouio(&info, &io, len2));
2540         if (info.v3 == 0) {
2541                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2542                 vap->va_mode = nfstov_mode(sp->sa_mode);
2543         }
2544         *(pathcp + len2) = '\0';
2545         if (vp) {
2546                 error = EEXIST;
2547                 goto out;
2548         }
2549
2550         if (vap->va_mode == (mode_t)VNOVAL)
2551                 vap->va_mode = 0;
2552         if (dvp != vp)
2553                 vn_unlock(dvp);
2554         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2555         vrele(dvp);
2556         dvp = NULL;
2557         if (error == 0) {
2558                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2559                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2560                 if (!error)
2561                         error = VOP_GETATTR(vp, vap);
2562         }
2563
2564 out:
2565         if (dvp) {
2566                 if (dvp == vp)
2567                         vrele(dvp);
2568                 else
2569                         vput(dvp);
2570         }
2571         if (vp) {
2572                 vput(vp);
2573                 vp = NULL;
2574         }
2575         if (pathcp) {
2576                 kfree(pathcp, M_TEMP);
2577                 pathcp = NULL;
2578         }
2579         if (dirp) {
2580                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2581                 vrele(dirp);
2582                 dirp = NULL;
2583         }
2584         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2585                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2586                               NFSX_WCCDATA(info.v3),
2587                               &error));
2588         if (info.v3) {
2589                 if (!error) {
2590                         nfsm_srvpostop_fh(&info, fhp);
2591                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2592                 }
2593                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2594                                  diraft_ret, &diraft);
2595         }
2596         error = 0;
2597         /* fall through */
2598
2599 nfsmout:
2600         *mrq = info.mreq;
2601         nlookup_done(&nd);
2602         if (vp)
2603                 vput(vp);
2604         if (dirp)
2605                 vrele(dirp);
2606         if (pathcp)
2607                 kfree(pathcp, M_TEMP);
2608         return (error);
2609 }
2610
2611 /*
2612  * nfs mkdir service
2613  */
2614 int
2615 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2616             struct thread *td, struct mbuf **mrq)
2617 {
2618         struct sockaddr *nam = nfsd->nd_nam;
2619         struct ucred *cred = &nfsd->nd_cr;
2620         struct vattr va, dirfor, diraft;
2621         struct vattr *vap = &va;
2622         struct nfs_fattr *fp;
2623         struct nlookupdata nd;
2624         u_int32_t *tl;
2625         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2626         struct vnode *dirp;
2627         struct vnode *dvp;
2628         struct vnode *vp;
2629         nfsfh_t nfh;
2630         fhandle_t *fhp;
2631         struct nfsm_info info;
2632
2633         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2634         nlookup_zero(&nd);
2635         dirp = NULL;
2636         dvp = NULL;
2637         vp = NULL;
2638
2639         info.dpos = nfsd->nd_dpos;
2640         info.mrep = nfsd->nd_mrep;
2641         info.mreq =  NULL;
2642         info.md = nfsd->nd_md;
2643         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2644
2645         fhp = &nfh.fh_generic;
2646         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2647         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2648
2649         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2650                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2651                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2652         if (dirp) {
2653                 if (info.v3)
2654                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2655         }
2656         if (error) {
2657                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2658                                       NFSX_WCCDATA(info.v3), &error));
2659                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2660                                  diraft_ret, &diraft);
2661                 error = 0;
2662                 goto nfsmout;
2663         }
2664         VATTR_NULL(vap);
2665         if (info.v3) {
2666                 ERROROUT(nfsm_srvsattr(&info, vap));
2667         } else {
2668                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2669                 vap->va_mode = nfstov_mode(*tl++);
2670         }
2671
2672         /*
2673          * At this point nd.ni_dvp is referenced and exclusively locked and
2674          * nd.ni_vp, if it exists, is referenced but not locked.
2675          */
2676
2677         vap->va_type = VDIR;
2678         if (vp != NULL) {
2679                 error = EEXIST;
2680                 goto out;
2681         }
2682
2683         /*
2684          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2685          * component is freed by the VOP call.  This will fill-in
2686          * nd.ni_vp, reference, and exclusively lock it.
2687          */
2688         if (vap->va_mode == (mode_t)VNOVAL)
2689                 vap->va_mode = 0;
2690         vn_unlock(dvp);
2691         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2692         vrele(dvp);
2693         dvp = NULL;
2694
2695         if (error == 0) {
2696                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2697                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2698                 if (error == 0)
2699                         error = VOP_GETATTR(vp, vap);
2700         }
2701 out:
2702         if (dirp)
2703                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2704         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2705                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2706                               NFSX_WCCDATA(info.v3),
2707                               &error));
2708         if (info.v3) {
2709                 if (!error) {
2710                         nfsm_srvpostop_fh(&info, fhp);
2711                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2712                 }
2713                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2714                                  diraft_ret, &diraft);
2715         } else {
2716                 nfsm_srvfhtom(&info, fhp);
2717                 fp = nfsm_build(&info, NFSX_V2FATTR);
2718                 nfsm_srvfattr(nfsd, vap, fp);
2719         }
2720         error = 0;
2721         /* fall through */
2722
2723 nfsmout:
2724         *mrq = info.mreq;
2725         nlookup_done(&nd);
2726         if (dirp)
2727                 vrele(dirp);
2728         if (dvp) {
2729                 if (dvp == vp)
2730                         vrele(dvp);
2731                 else
2732                         vput(dvp);
2733         }
2734         if (vp)
2735                 vput(vp);
2736         return (error);
2737 }
2738
2739 /*
2740  * nfs rmdir service
2741  */
2742 int
2743 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2744             struct thread *td, struct mbuf **mrq)
2745 {
2746         struct sockaddr *nam = nfsd->nd_nam;
2747         struct ucred *cred = &nfsd->nd_cr;
2748         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2749         struct vnode *dirp;
2750         struct vnode *dvp;
2751         struct vnode *vp;
2752         struct vattr dirfor, diraft;
2753         nfsfh_t nfh;
2754         fhandle_t *fhp;
2755         struct nlookupdata nd;
2756         struct nfsm_info info;
2757
2758         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2759         nlookup_zero(&nd);
2760         dirp = NULL;
2761         dvp = NULL;
2762         vp = NULL;
2763
2764         info.mrep = nfsd->nd_mrep;
2765         info.mreq = NULL;
2766         info.md = nfsd->nd_md;
2767         info.dpos = nfsd->nd_dpos;
2768         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2769
2770         fhp = &nfh.fh_generic;
2771         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2772         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2773
2774         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2775                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2776                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2777         if (dirp) {
2778                 if (info.v3)
2779                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2780         }
2781         if (error) {
2782                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2783                                       NFSX_WCCDATA(info.v3), &error));
2784                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2785                                  diraft_ret, &diraft);
2786                 error = 0;
2787                 goto nfsmout;
2788         }
2789         if (vp->v_type != VDIR) {
2790                 error = ENOTDIR;
2791                 goto out;
2792         }
2793
2794         /*
2795          * The root of a mounted filesystem cannot be deleted.
2796          */
2797         if (vp->v_flag & VROOT)
2798                 error = EBUSY;
2799 out:
2800         /*
2801          * Issue or abort op.  Since SAVESTART is not set, path name
2802          * component is freed by the VOP after either.
2803          */
2804         if (!error) {
2805                 if (dvp != vp)
2806                         vn_unlock(dvp);
2807                 vput(vp);
2808                 vp = NULL;
2809                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2810                 vrele(dvp);
2811                 dvp = NULL;
2812         }
2813         nlookup_done(&nd);
2814
2815         if (dirp)
2816                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2817         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2818         if (info.v3) {
2819                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2820                                  diraft_ret, &diraft);
2821                 error = 0;
2822         }
2823         /* fall through */
2824
2825 nfsmout:
2826         *mrq = info.mreq;
2827         if (dvp) {
2828                 if (dvp == vp)
2829                         vrele(dvp);
2830                 else
2831                         vput(dvp);
2832         }
2833         nlookup_done(&nd);
2834         if (dirp)
2835                 vrele(dirp);
2836         if (vp)
2837                 vput(vp);
2838         return(error);
2839 }
2840
2841 /*
2842  * nfs readdir service
2843  * - mallocs what it thinks is enough to read
2844  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2845  * - calls VOP_READDIR()
2846  * - loops around building the reply
2847  *      if the output generated exceeds count break out of loop
2848  *      The nfsm_clget macro is used here so that the reply will be packed
2849  *      tightly in mbuf clusters.
2850  * - it only knows that it has encountered eof when the VOP_READDIR()
2851  *      reads nothing
2852  * - as such one readdir rpc will return eof false although you are there
2853  *      and then the next will return eof
2854  * - it trims out records with d_fileno == 0
2855  *      this doesn't matter for Unix clients, but they might confuse clients
2856  *      for other os'.
2857  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2858  *      than requested, but this may not apply to all filesystems. For
2859  *      example, client NFS does not { although it is never remote mounted
2860  *      anyhow }
2861  *     The alternate call nfsrv_readdirplus() does lookups as well.
2862  * PS: The NFS protocol spec. does not clarify what the "count" byte
2863  *      argument is a count of.. just name strings and file id's or the
2864  *      entire reply rpc or ...
2865  *      I tried just file name and id sizes and it confused the Sun client,
2866  *      so I am using the full rpc size now. The "paranoia.." comment refers
2867  *      to including the status longwords that are not a part of the dir.
2868  *      "entry" structures, but are in the rpc.
2869  */
2870 struct flrep {
2871         nfsuint64       fl_off;
2872         u_int32_t       fl_postopok;
2873         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2874         u_int32_t       fl_fhok;
2875         u_int32_t       fl_fhsize;
2876         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2877 };
2878
2879 int
2880 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2881               struct thread *td, struct mbuf **mrq)
2882 {
2883         struct sockaddr *nam = nfsd->nd_nam;
2884         struct ucred *cred = &nfsd->nd_cr;
2885         char *bp, *be;
2886         struct dirent *dp;
2887         caddr_t cp;
2888         u_int32_t *tl;
2889         struct mbuf *mp1, *mp2;
2890         char *cpos, *cend, *rbuf;
2891         struct vnode *vp = NULL;
2892         struct mount *mp = NULL;
2893         struct vattr at;
2894         nfsfh_t nfh;
2895         fhandle_t *fhp;
2896         struct uio io;
2897         struct iovec iv;
2898         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2899         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2900         u_quad_t off, toff, verf;
2901         off_t *cookies = NULL, *cookiep;
2902         struct nfsm_info info;
2903
2904         info.mrep = nfsd->nd_mrep;
2905         info.mreq = NULL;
2906         info.md = nfsd->nd_md;
2907         info.dpos = nfsd->nd_dpos;
2908         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2909
2910         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2911         fhp = &nfh.fh_generic;
2912         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2913         if (info.v3) {
2914                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2915                 toff = fxdr_hyper(tl);
2916                 tl += 2;
2917                 verf = fxdr_hyper(tl);
2918                 tl += 2;
2919         } else {
2920                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2921                 toff = fxdr_unsigned(u_quad_t, *tl++);
2922                 verf = 0;       /* shut up gcc */
2923         }
2924         off = toff;
2925         cnt = fxdr_unsigned(int, *tl);
2926         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2927         xfer = NFS_SRVMAXDATA(nfsd);
2928         if ((unsigned)cnt > xfer)
2929                 cnt = xfer;
2930         if ((unsigned)siz > xfer)
2931                 siz = xfer;
2932         fullsiz = siz;
2933         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2934                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2935         if (!error && vp->v_type != VDIR) {
2936                 error = ENOTDIR;
2937                 vput(vp);
2938                 vp = NULL;
2939         }
2940         if (error) {
2941                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2942                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2943                 error = 0;
2944                 goto nfsmout;
2945         }
2946
2947         /*
2948          * Obtain lock on vnode for this section of the code
2949          */
2950
2951         if (info.v3) {
2952                 error = getret = VOP_GETATTR(vp, &at);
2953 #if 0
2954                 /*
2955                  * XXX This check may be too strict for Solaris 2.5 clients.
2956                  */
2957                 if (!error && toff && verf && verf != at.va_filerev)
2958                         error = NFSERR_BAD_COOKIE;
2959 #endif
2960         }
2961         if (!error)
2962                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2963         if (error) {
2964                 vput(vp);
2965                 vp = NULL;
2966                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2967                                       NFSX_POSTOPATTR(info.v3), &error));
2968                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2969                 error = 0;
2970                 goto nfsmout;
2971         }
2972         vn_unlock(vp);
2973
2974         /*
2975          * end section.  Allocate rbuf and continue
2976          */
2977         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2978 again:
2979         iv.iov_base = rbuf;
2980         iv.iov_len = fullsiz;
2981         io.uio_iov = &iv;
2982         io.uio_iovcnt = 1;
2983         io.uio_offset = (off_t)off;
2984         io.uio_resid = fullsiz;
2985         io.uio_segflg = UIO_SYSSPACE;
2986         io.uio_rw = UIO_READ;
2987         io.uio_td = NULL;
2988         eofflag = 0;
2989         if (cookies) {
2990                 kfree((caddr_t)cookies, M_TEMP);
2991                 cookies = NULL;
2992         }
2993         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2994         off = (off_t)io.uio_offset;
2995         if (!cookies && !error)
2996                 error = NFSERR_PERM;
2997         if (info.v3) {
2998                 getret = VOP_GETATTR(vp, &at);
2999                 if (!error)
3000                         error = getret;
3001         }
3002         if (error) {
3003                 vrele(vp);
3004                 vp = NULL;
3005                 kfree((caddr_t)rbuf, M_TEMP);
3006                 if (cookies)
3007                         kfree((caddr_t)cookies, M_TEMP);
3008                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3009                                       NFSX_POSTOPATTR(info.v3), &error));
3010                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3011                 error = 0;
3012                 goto nfsmout;
3013         }
3014         if (io.uio_resid) {
3015                 siz -= io.uio_resid;
3016
3017                 /*
3018                  * If nothing read, return eof
3019                  * rpc reply
3020                  */
3021                 if (siz == 0) {
3022                         vrele(vp);
3023                         vp = NULL;
3024                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3025                                               NFSX_POSTOPATTR(info.v3) +
3026                                               NFSX_COOKIEVERF(info.v3) +
3027                                               2 * NFSX_UNSIGNED,
3028                                               &error));
3029                         if (info.v3) {
3030                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3031                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3032                                 txdr_hyper(at.va_filerev, tl);
3033                                 tl += 2;
3034                         } else
3035                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3036                         *tl++ = nfs_false;
3037                         *tl = nfs_true;
3038                         kfree((caddr_t)rbuf, M_TEMP);
3039                         kfree((caddr_t)cookies, M_TEMP);
3040                         error = 0;
3041                         goto nfsmout;
3042                 }
3043         }
3044
3045         /*
3046          * Check for degenerate cases of nothing useful read.
3047          * If so go try again
3048          */
3049         cpos = rbuf;
3050         cend = rbuf + siz;
3051         dp = (struct dirent *)cpos;
3052         cookiep = cookies;
3053         /*
3054          * For some reason FreeBSD's ufs_readdir() chooses to back the
3055          * directory offset up to a block boundary, so it is necessary to
3056          * skip over the records that preceed the requested offset. This
3057          * requires the assumption that file offset cookies monotonically
3058          * increase.
3059          */
3060         while (cpos < cend && ncookies > 0 &&
3061                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3062                  ((u_quad_t)(*cookiep)) <= toff)) {
3063                 dp = _DIRENT_NEXT(dp);
3064                 cpos = (char *)dp;
3065                 cookiep++;
3066                 ncookies--;
3067         }
3068         if (cpos >= cend || ncookies == 0) {
3069                 toff = off;
3070                 siz = fullsiz;
3071                 goto again;
3072         }
3073
3074         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3075         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3076                               NFSX_POSTOPATTR(info.v3) +
3077                               NFSX_COOKIEVERF(info.v3) + siz,
3078                               &error));
3079         if (info.v3) {
3080                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3081                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3082                 txdr_hyper(at.va_filerev, tl);
3083         }
3084         mp1 = mp2 = info.mb;
3085         bp = info.bpos;
3086         be = bp + M_TRAILINGSPACE(mp1);
3087
3088         /* Loop through the records and build reply */
3089         while (cpos < cend && ncookies > 0) {
3090                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3091                         nlen = dp->d_namlen;
3092                         rem = nfsm_rndup(nlen) - nlen;
3093                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3094                         if (info.v3)
3095                                 len += 2 * NFSX_UNSIGNED;
3096                         if (len > cnt) {
3097                                 eofflag = 0;
3098                                 break;
3099                         }
3100                         /*
3101                          * Build the directory record xdr from
3102                          * the dirent entry.
3103                          */
3104                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3105                         *tl = nfs_true;
3106                         bp += NFSX_UNSIGNED;
3107                         if (info.v3) {
3108                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3109                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3110                                 bp += NFSX_UNSIGNED;
3111                         }
3112                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3113                         *tl = txdr_unsigned(dp->d_ino);
3114                         bp += NFSX_UNSIGNED;
3115                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3116                         *tl = txdr_unsigned(nlen);
3117                         bp += NFSX_UNSIGNED;
3118
3119                         /* And loop around copying the name */
3120                         xfer = nlen;
3121                         cp = dp->d_name;
3122                         while (xfer > 0) {
3123                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3124                                 if ((bp+xfer) > be)
3125                                         tsiz = be-bp;
3126                                 else
3127                                         tsiz = xfer;
3128                                 bcopy(cp, bp, tsiz);
3129                                 bp += tsiz;
3130                                 xfer -= tsiz;
3131                                 if (xfer > 0)
3132                                         cp += tsiz;
3133                         }
3134                         /* And null pad to a int32_t boundary */
3135                         for (i = 0; i < rem; i++)
3136                                 *bp++ = '\0';
3137                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3138
3139                         /* Finish off the record */
3140                         if (info.v3) {
3141                                 *tl = txdr_unsigned(*cookiep >> 32);
3142                                 bp += NFSX_UNSIGNED;
3143                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3144                         }
3145                         *tl = txdr_unsigned(*cookiep);
3146                         bp += NFSX_UNSIGNED;
3147                 }
3148                 dp = _DIRENT_NEXT(dp);
3149                 cpos = (char *)dp;
3150                 cookiep++;
3151                 ncookies--;
3152         }
3153         vrele(vp);
3154         vp = NULL;
3155         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3156         *tl = nfs_false;
3157         bp += NFSX_UNSIGNED;
3158         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3159         if (eofflag)
3160                 *tl = nfs_true;
3161         else
3162                 *tl = nfs_false;
3163         bp += NFSX_UNSIGNED;
3164         if (mp1 != info.mb) {
3165                 if (bp < be)
3166                         mp1->m_len = bp - mtod(mp1, caddr_t);
3167         } else
3168                 mp1->m_len += bp - info.bpos;
3169         kfree((caddr_t)rbuf, M_TEMP);
3170         kfree((caddr_t)cookies, M_TEMP);
3171
3172 nfsmout:
3173         *mrq = info.mreq;
3174         if (vp)
3175                 vrele(vp);
3176         return(error);
3177 }
3178
3179 int
3180 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3181                   struct thread *td, struct mbuf **mrq)
3182 {
3183         struct sockaddr *nam = nfsd->nd_nam;
3184         struct ucred *cred = &nfsd->nd_cr;
3185         char *bp, *be;
3186         struct dirent *dp;
3187         caddr_t cp;
3188         u_int32_t *tl;
3189         struct mbuf *mp1, *mp2;
3190         char *cpos, *cend, *rbuf;
3191         struct vnode *vp = NULL, *nvp;
3192         struct mount *mp = NULL;
3193         struct flrep fl;
3194         nfsfh_t nfh;
3195         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3196         struct uio io;
3197         struct iovec iv;
3198         struct vattr va, at, *vap = &va;
3199         struct nfs_fattr *fp;
3200         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3201         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3202         u_quad_t off, toff, verf;
3203         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3204         struct nfsm_info info;
3205
3206         info.mrep = nfsd->nd_mrep;
3207         info.mreq = NULL;
3208         info.md = nfsd->nd_md;
3209         info.dpos = nfsd->nd_dpos;
3210         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3211
3212         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3213         fhp = &nfh.fh_generic;
3214         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3215         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3216         toff = fxdr_hyper(tl);
3217         tl += 2;
3218         verf = fxdr_hyper(tl);
3219         tl += 2;
3220         siz = fxdr_unsigned(int, *tl++);
3221         cnt = fxdr_unsigned(int, *tl);
3222         off = toff;
3223         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3224         xfer = NFS_SRVMAXDATA(nfsd);
3225         if ((unsigned)cnt > xfer)
3226                 cnt = xfer;
3227         if ((unsigned)siz > xfer)
3228                 siz = xfer;
3229         fullsiz = siz;
3230         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3231                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3232         if (!error && vp->v_type != VDIR) {
3233                 error = ENOTDIR;
3234                 vput(vp);
3235                 vp = NULL;
3236         }
3237         if (error) {
3238                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3239                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3240                 error = 0;
3241                 goto nfsmout;
3242         }
3243         error = getret = VOP_GETATTR(vp, &at);
3244 #if 0
3245         /*
3246          * XXX This check may be too strict for Solaris 2.5 clients.
3247          */
3248         if (!error && toff && verf && verf != at.va_filerev)
3249                 error = NFSERR_BAD_COOKIE;
3250 #endif
3251         if (!error) {
3252                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3253         }
3254         if (error) {
3255                 vput(vp);
3256                 vp = NULL;
3257                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3258                                       NFSX_V3POSTOPATTR, &error));
3259                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3260                 error = 0;
3261                 goto nfsmout;
3262         }
3263         vn_unlock(vp);
3264         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3265 again:
3266         iv.iov_base = rbuf;
3267         iv.iov_len = fullsiz;
3268         io.uio_iov = &iv;
3269         io.uio_iovcnt = 1;
3270         io.uio_offset = (off_t)off;
3271         io.uio_resid = fullsiz;
3272         io.uio_segflg = UIO_SYSSPACE;
3273         io.uio_rw = UIO_READ;
3274         io.uio_td = NULL;
3275         eofflag = 0;
3276         if (cookies) {
3277                 kfree((caddr_t)cookies, M_TEMP);
3278                 cookies = NULL;
3279         }
3280         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3281         off = (u_quad_t)io.uio_offset;
3282         getret = VOP_GETATTR(vp, &at);
3283         if (!cookies && !error)
3284                 error = NFSERR_PERM;
3285         if (!error)
3286                 error = getret;
3287         if (error) {
3288                 vrele(vp);
3289                 vp = NULL;
3290                 if (cookies)
3291                         kfree((caddr_t)cookies, M_TEMP);
3292                 kfree((caddr_t)rbuf, M_TEMP);
3293                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3294                                       NFSX_V3POSTOPATTR, &error));
3295                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3296                 error = 0;
3297                 goto nfsmout;
3298         }
3299         if (io.uio_resid) {
3300                 siz -= io.uio_resid;
3301
3302                 /*
3303                  * If nothing read, return eof
3304                  * rpc reply
3305                  */
3306                 if (siz == 0) {
3307                         vrele(vp);
3308                         vp = NULL;
3309                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3310                                               NFSX_V3POSTOPATTR +
3311                                               NFSX_V3COOKIEVERF +
3312                                               2 * NFSX_UNSIGNED,
3313                                               &error));
3314                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3315                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3316                         txdr_hyper(at.va_filerev, tl);
3317                         tl += 2;
3318                         *tl++ = nfs_false;
3319                         *tl = nfs_true;
3320                         kfree((caddr_t)cookies, M_TEMP);
3321                         kfree((caddr_t)rbuf, M_TEMP);
3322                         error = 0;
3323                         goto nfsmout;
3324                 }
3325         }
3326
3327         /*
3328          * Check for degenerate cases of nothing useful read.
3329          * If so go try again
3330          */
3331         cpos = rbuf;
3332         cend = rbuf + siz;
3333         dp = (struct dirent *)cpos;
3334         cookiep = cookies;
3335         /*
3336          * For some reason FreeBSD's ufs_readdir() chooses to back the
3337          * directory offset up to a block boundary, so it is necessary to
3338          * skip over the records that preceed the requested offset. This
3339          * requires the assumption that file offset cookies monotonically
3340          * increase.
3341          */
3342         while (cpos < cend && ncookies > 0 &&
3343                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3344                  ((u_quad_t)(*cookiep)) <= toff)) {
3345                 dp = _DIRENT_NEXT(dp);
3346                 cpos = (char *)dp;
3347                 cookiep++;
3348                 ncookies--;
3349         }
3350         if (cpos >= cend || ncookies == 0) {
3351                 toff = off;
3352                 siz = fullsiz;
3353                 goto again;
3354         }
3355
3356         /*
3357          * Probe one of the directory entries to see if the filesystem
3358          * supports VGET.
3359          */
3360         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3361                 error = NFSERR_NOTSUPP;
3362                 vrele(vp);
3363                 vp = NULL;
3364                 kfree((caddr_t)cookies, M_TEMP);
3365                 kfree((caddr_t)rbuf, M_TEMP);
3366                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3367                                       NFSX_V3POSTOPATTR, &error));
3368                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3369                 error = 0;
3370                 goto nfsmout;
3371         }
3372         if (nvp) {
3373                 vput(nvp);
3374                 nvp = NULL;
3375         }
3376             
3377         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3378                         2 * NFSX_UNSIGNED;
3379         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3380         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3381         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3382         txdr_hyper(at.va_filerev, tl);
3383         mp1 = mp2 = info.mb;
3384         bp = info.bpos;
3385         be = bp + M_TRAILINGSPACE(mp1);
3386
3387         /* Loop through the records and build reply */
3388         while (cpos < cend && ncookies > 0) {
3389                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3390                         nlen = dp->d_namlen;
3391                         rem = nfsm_rndup(nlen) - nlen;
3392
3393                         /*
3394                          * For readdir_and_lookup get the vnode using
3395                          * the file number.
3396                          */
3397                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3398                                 goto invalid;
3399                         bzero((caddr_t)nfhp, NFSX_V3FH);
3400                         nfhp->fh_fsid = fhp->fh_fsid;
3401                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3402                                 vput(nvp);
3403                                 nvp = NULL;
3404                                 goto invalid;
3405                         }
3406                         if (VOP_GETATTR(nvp, vap)) {
3407                                 vput(nvp);
3408                                 nvp = NULL;
3409                                 goto invalid;
3410                         }
3411                         vput(nvp);
3412                         nvp = NULL;
3413
3414                         /*
3415                          * If either the dircount or maxcount will be
3416                          * exceeded, get out now. Both of these lengths
3417                          * are calculated conservatively, including all
3418                          * XDR overheads.
3419                          */
3420                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3421                                 NFSX_V3POSTOPATTR);
3422                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3423                         if (len > cnt || dirlen > fullsiz) {
3424                                 eofflag = 0;
3425                                 break;
3426                         }
3427
3428                         /*
3429                          * Build the directory record xdr from
3430                          * the dirent entry.
3431                          */
3432                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3433                         nfsm_srvfattr(nfsd, vap, fp);
3434                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3435                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3436                         fl.fl_postopok = nfs_true;
3437                         fl.fl_fhok = nfs_true;
3438                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3439
3440                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3441                         *tl = nfs_true;
3442                         bp += NFSX_UNSIGNED;
3443                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3444                         *tl = txdr_unsigned(dp->d_ino >> 32);
3445                         bp += NFSX_UNSIGNED;
3446                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3447                         *tl = txdr_unsigned(dp->d_ino);
3448                         bp += NFSX_UNSIGNED;
3449                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3450                         *tl = txdr_unsigned(nlen);
3451                         bp += NFSX_UNSIGNED;
3452
3453                         /* And loop around copying the name */
3454                         xfer = nlen;
3455                         cp = dp->d_name;
3456                         while (xfer > 0) {
3457                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3458                                 if ((bp + xfer) > be)
3459                                         tsiz = be - bp;
3460                                 else
3461                                         tsiz = xfer;
3462                                 bcopy(cp, bp, tsiz);
3463                                 bp += tsiz;
3464                                 xfer -= tsiz;
3465                                 cp += tsiz;
3466                         }
3467                         /* And null pad to a int32_t boundary */
3468                         for (i = 0; i < rem; i++)
3469                                 *bp++ = '\0';
3470         
3471                         /*
3472                          * Now copy the flrep structure out.
3473                          */
3474                         xfer = sizeof (struct flrep);
3475                         cp = (caddr_t)&fl;
3476                         while (xfer > 0) {
3477                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3478                                 if ((bp + xfer) > be)
3479                                         tsiz = be - bp;
3480                                 else
3481                                         tsiz = xfer;
3482                                 bcopy(cp, bp, tsiz);
3483                                 bp += tsiz;
3484                                 xfer -= tsiz;
3485                                 cp += tsiz;
3486                         }
3487                 }
3488 invalid:
3489                 dp = _DIRENT_NEXT(dp);
3490                 cpos = (char *)dp;
3491                 cookiep++;
3492                 ncookies--;
3493         }
3494         vrele(vp);
3495         vp = NULL;
3496         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3497         *tl = nfs_false;
3498         bp += NFSX_UNSIGNED;
3499         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3500         if (eofflag)
3501                 *tl = nfs_true;
3502         else
3503                 *tl = nfs_false;
3504         bp += NFSX_UNSIGNED;
3505         if (mp1 != info.mb) {
3506                 if (bp < be)
3507                         mp1->m_len = bp - mtod(mp1, caddr_t);
3508         } else
3509                 mp1->m_len += bp - info.bpos;
3510         kfree((caddr_t)cookies, M_TEMP);
3511         kfree((caddr_t)rbuf, M_TEMP);
3512 nfsmout:
3513         *mrq = info.mreq;
3514         if (vp)
3515                 vrele(vp);
3516         return(error);
3517 }
3518
3519 /*
3520  * nfs commit service
3521  */
3522 int
3523 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3524              struct thread *td, struct mbuf **mrq)
3525 {
3526         struct sockaddr *nam = nfsd->nd_nam;
3527         struct ucred *cred = &nfsd->nd_cr;
3528         struct vattr bfor, aft;
3529         struct vnode *vp = NULL;
3530         struct mount *mp = NULL;
3531         nfsfh_t nfh;
3532         fhandle_t *fhp;
3533         u_int32_t *tl;
3534         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3535         u_quad_t off;
3536         struct nfsm_info info;
3537
3538         info.mrep = nfsd->nd_mrep;
3539         info.mreq = NULL;
3540         info.md = nfsd->nd_md;
3541         info.dpos = nfsd->nd_dpos;
3542
3543         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3544         fhp = &nfh.fh_generic;
3545         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3546         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3547
3548         /*
3549          * XXX At this time VOP_FSYNC() does not accept offset and byte
3550          * count parameters, so these arguments are useless (someday maybe).
3551          */
3552         off = fxdr_hyper(tl);
3553         tl += 2;
3554         cnt = fxdr_unsigned(int, *tl);
3555         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3556                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3557         if (error) {
3558                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3559                                       2 * NFSX_UNSIGNED, &error));
3560                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3561                                  aft_ret, &aft);
3562                 error = 0;
3563                 goto nfsmout;
3564         }
3565         for_ret = VOP_GETATTR(vp, &bfor);
3566
3567         /*
3568          * RFC 1813 3.3.21: If count is 0, a flush from offset to the end of
3569          * file is done. At this time VOP_FSYNC does not accept offset and
3570          * byte count parameters, so call VOP_FSYNC the whole file for now.
3571          */
3572         if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
3573                 /*
3574                  * Give up and do the whole thing
3575                  */
3576                 if (vp->v_object &&
3577                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3578                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3579                 }
3580                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3581         } else {
3582                 /*
3583                  * Locate and synchronously write any buffers that fall
3584                  * into the requested range.  Note:  we are assuming that
3585                  * f_iosize is a power of 2.
3586                  */
3587                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3588                 int iomask = iosize - 1;
3589                 off_t loffset;
3590
3591                 /*
3592                  * Align to iosize boundry, super-align to page boundry.
3593                  */
3594                 if (off & iomask) {
3595                         cnt += off & iomask;
3596                         off &= ~(u_quad_t)iomask;
3597                 }
3598                 if (off & PAGE_MASK) {
3599                         cnt += off & PAGE_MASK;
3600                         off &= ~(u_quad_t)PAGE_MASK;
3601                 }
3602                 loffset = off;
3603
3604                 if (vp->v_object &&
3605                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3606                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3607                             (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3608                 }
3609
3610                 crit_enter();
3611                 while (error == 0 || cnt > 0) {
3612                         struct buf *bp;
3613
3614                         /*
3615                          * If we have a buffer and it is marked B_DELWRI we
3616                          * have to lock and write it.  Otherwise the prior
3617                          * write is assumed to have already been committed.
3618                          *
3619                          * WARNING: FINDBLK_TEST buffers represent stable
3620                          *          storage but not necessarily stable
3621                          *          content.  It is ok in this case.
3622                          */
3623                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3624                                 if (bp->b_flags & B_DELWRI)
3625                                         bp = findblk(vp, loffset, 0);
3626                                 else
3627                                         bp = NULL;
3628                         }
3629                         if (bp) {
3630                                 if (bp->b_flags & B_DELWRI) {
3631                                         bremfree(bp);
3632                                         error = bwrite(bp);
3633                                         ++nfs_commit_miss;
3634                                 } else {
3635                                         BUF_UNLOCK(bp);
3636                                 }
3637                         }
3638                         ++nfs_commit_blks;
3639                         if (cnt < iosize)
3640                                 break;
3641                         cnt -= iosize;
3642                         loffset += iosize;
3643                 }
3644                 crit_exit();
3645         }
3646
3647         aft_ret = VOP_GETATTR(vp, &aft);
3648         vput(vp);
3649         vp = NULL;
3650         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3651                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3652                               &error));
3653         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3654                          aft_ret, &aft);
3655         if (!error) {
3656                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3657                 if (nfsver.tv_sec == 0)
3658                         nfsver = boottime;
3659                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3660                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3661         } else {
3662                 error = 0;
3663         }
3664 nfsmout:
3665         *mrq = info.mreq;
3666         if (vp)
3667                 vput(vp);
3668         return(error);
3669 }
3670
3671 /*
3672  * nfs statfs service
3673  */
3674 int
3675 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3676              struct thread *td, struct mbuf **mrq)
3677 {
3678         struct sockaddr *nam = nfsd->nd_nam;
3679         struct ucred *cred = &nfsd->nd_cr;
3680         struct statfs *sf;
3681         struct nfs_statfs *sfp;
3682         int error = 0, rdonly, getret = 1;
3683         struct vnode *vp = NULL;
3684         struct mount *mp = NULL;
3685         struct vattr at;
3686         nfsfh_t nfh;
3687         fhandle_t *fhp;
3688         struct statfs statfs;
3689         u_quad_t tval;
3690         struct nfsm_info info;
3691
3692         info.mrep = nfsd->nd_mrep;
3693         info.mreq = NULL;
3694         info.md = nfsd->nd_md;
3695         info.dpos = nfsd->nd_dpos;
3696         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3697
3698         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3699         fhp = &nfh.fh_generic;
3700         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3701         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3702                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3703         if (error) {
3704                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3705                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3706                 error = 0;
3707                 goto nfsmout;
3708         }
3709         sf = &statfs;
3710         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3711         getret = VOP_GETATTR(vp, &at);
3712         vput(vp);
3713         vp = NULL;
3714         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3715                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3716                               &error));
3717         if (info.v3)
3718                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3719         if (error) {
3720                 error = 0;
3721                 goto nfsmout;
3722         }
3723         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3724         if (info.v3) {
3725                 tval = (u_quad_t)sf->f_blocks;
3726                 tval *= (u_quad_t)sf->f_bsize;
3727                 txdr_hyper(tval, &sfp->sf_tbytes);
3728                 tval = (u_quad_t)sf->f_bfree;
3729                 tval *= (u_quad_t)sf->f_bsize;
3730                 txdr_hyper(tval, &sfp->sf_fbytes);
3731                 tval = (u_quad_t)sf->f_bavail;
3732                 tval *= (u_quad_t)sf->f_bsize;
3733                 txdr_hyper(tval, &sfp->sf_abytes);
3734                 sfp->sf_tfiles.nfsuquad[0] = 0;
3735                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3736                 sfp->sf_ffiles.nfsuquad[0] = 0;
3737                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3738                 sfp->sf_afiles.nfsuquad[0] = 0;
3739                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3740                 sfp->sf_invarsec = 0;
3741         } else {
3742                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3743                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3744                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3745                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3746                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3747         }
3748 nfsmout:
3749         *mrq = info.mreq;
3750         if (vp)
3751                 vput(vp);
3752         return(error);
3753 }
3754
3755 /*
3756  * nfs fsinfo service
3757  */
3758 int
3759 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3760              struct thread *td, struct mbuf **mrq)
3761 {
3762         struct sockaddr *nam = nfsd->nd_nam;
3763         struct ucred *cred = &nfsd->nd_cr;
3764         struct nfsv3_fsinfo *sip;
3765         int error = 0, rdonly, getret = 1, pref;
3766         struct vnode *vp = NULL;
3767         struct mount *mp = NULL;
3768         struct vattr at;
3769         nfsfh_t nfh;
3770         fhandle_t *fhp;
3771         u_quad_t maxfsize;
3772         struct statfs sb;
3773         struct nfsm_info info;
3774
3775         info.mrep = nfsd->nd_mrep;
3776         info.mreq = NULL;
3777         info.md = nfsd->nd_md;
3778         info.dpos = nfsd->nd_dpos;
3779
3780         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3781         fhp = &nfh.fh_generic;
3782         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3783         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3784                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3785         if (error) {
3786                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3787                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3788                 error = 0;
3789                 goto nfsmout;
3790         }
3791
3792         /* XXX Try to make a guess on the max file size. */
3793         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3794         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3795
3796         getret = VOP_GETATTR(vp, &at);
3797         vput(vp);
3798         vp = NULL;
3799         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3800                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3801         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3802         sip = nfsm_build(&info, NFSX_V3FSINFO);
3803
3804         /*
3805          * XXX
3806          * There should be file system VFS OP(s) to get this information.
3807          * For now, assume ufs.
3808          */
3809         if (slp->ns_so->so_type == SOCK_DGRAM)
3810                 pref = NFS_MAXDGRAMDATA;
3811         else
3812                 pref = NFS_MAXDATA;
3813         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3814         sip->fs_rtpref = txdr_unsigned(pref);
3815         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3816         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3817         sip->fs_wtpref = txdr_unsigned(pref);
3818         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3819         sip->fs_dtpref = txdr_unsigned(pref);
3820         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3821         sip->fs_timedelta.nfsv3_sec = 0;
3822         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3823         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3824                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3825                 NFSV3FSINFO_CANSETTIME);
3826 nfsmout:
3827         *mrq = info.mreq;
3828         if (vp)
3829                 vput(vp);
3830         return(error);
3831 }
3832
3833 /*
3834  * nfs pathconf service
3835  */
3836 int
3837 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3838                struct thread *td, struct mbuf **mrq)
3839 {
3840         struct sockaddr *nam = nfsd->nd_nam;
3841         struct ucred *cred = &nfsd->nd_cr;
3842         struct nfsv3_pathconf *pc;
3843         int error = 0, rdonly, getret = 1;
3844         register_t linkmax, namemax, chownres, notrunc;
3845         struct vnode *vp = NULL;
3846         struct mount *mp = NULL;
3847         struct vattr at;
3848         nfsfh_t nfh;
3849         fhandle_t *fhp;
3850         struct nfsm_info info;
3851
3852         info.mrep = nfsd->nd_mrep;
3853         info.mreq = NULL;
3854         info.md = nfsd->nd_md;
3855         info.dpos = nfsd->nd_dpos;
3856
3857         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3858         fhp = &nfh.fh_generic;
3859         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3860         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3861                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3862         if (error) {
3863                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3864                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3865                 error = 0;
3866                 goto nfsmout;
3867         }
3868         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3869         if (!error)
3870                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3871         if (!error)
3872                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3873         if (!error)
3874                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3875         getret = VOP_GETATTR(vp, &at);
3876         vput(vp);
3877         vp = NULL;
3878         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3879                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3880                               &error));
3881         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3882         if (error) {
3883                 error = 0;
3884                 goto nfsmout;
3885         }
3886         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3887
3888         pc->pc_linkmax = txdr_unsigned(linkmax);
3889         pc->pc_namemax = txdr_unsigned(namemax);
3890         pc->pc_notrunc = txdr_unsigned(notrunc);
3891         pc->pc_chownrestricted = txdr_unsigned(chownres);
3892
3893         /*
3894          * These should probably be supported by VOP_PATHCONF(), but
3895          * until msdosfs is exportable (why would you want to?), the
3896          * Unix defaults should be ok.
3897          */
3898         pc->pc_caseinsensitive = nfs_false;
3899         pc->pc_casepreserving = nfs_true;
3900 nfsmout:
3901         *mrq = info.mreq;
3902         if (vp) 
3903                 vput(vp);
3904         return(error);
3905 }
3906
3907 /*
3908  * Null operation, used by clients to ping server
3909  */
3910 /* ARGSUSED */
3911 int
3912 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3913            struct thread *td, struct mbuf **mrq)
3914 {
3915         struct nfsm_info info;
3916         int error = NFSERR_RETVOID;
3917
3918         info.mrep = nfsd->nd_mrep;
3919         info.mreq = NULL;
3920
3921         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3922         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3923 nfsmout:
3924         *mrq = info.mreq;
3925         return (error);
3926 }
3927
3928 /*
3929  * No operation, used for obsolete procedures
3930  */
3931 /* ARGSUSED */
3932 int
3933 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3934            struct thread *td, struct mbuf **mrq)
3935 {
3936         struct nfsm_info info;
3937         int error;
3938
3939         info.mrep = nfsd->nd_mrep;
3940         info.mreq = NULL;
3941
3942         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3943         if (nfsd->nd_repstat)
3944                 error = nfsd->nd_repstat;
3945         else
3946                 error = EPROCUNAVAIL;
3947         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3948         error = 0;
3949 nfsmout:
3950         *mrq = info.mreq;
3951         return (error);
3952 }
3953
3954 /*
3955  * Perform access checking for vnodes obtained from file handles that would
3956  * refer to files already opened by a Unix client. You cannot just use
3957  * vn_writechk() and VOP_ACCESS() for two reasons.
3958  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3959  * 2 - The owner is to be given access irrespective of mode bits for some
3960  *     operations, so that processes that chmod after opening a file don't
3961  *     break. I don't like this because it opens a security hole, but since
3962  *     the nfs server opens a security hole the size of a barn door anyhow,
3963  *     what the heck.
3964  *
3965  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3966  * will return EPERM instead of EACCESS. EPERM is always an error.
3967  */
3968 static int
3969 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3970              int rdonly, struct thread *td, int override)
3971 {
3972         struct vattr vattr;
3973         int error;
3974
3975         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3976         if (flags & VWRITE) {
3977                 /* Just vn_writechk() changed to check rdonly */
3978                 /*
3979                  * Disallow write attempts on read-only file systems;
3980                  * unless the file is a socket or a block or character
3981                  * device resident on the file system.
3982                  */
3983                 if (rdonly || 
3984                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3985                         switch (vp->v_type) {
3986                         case VREG:
3987                         case VDIR:
3988                         case VLNK:
3989                                 return (EROFS);
3990                         default:
3991                                 break;
3992                         }
3993                 }
3994                 /*
3995                  * If there's shared text associated with
3996                  * the inode, we can't allow writing.
3997                  */
3998                 if (vp->v_flag & VTEXT)
3999                         return (ETXTBSY);
4000         }
4001         error = VOP_GETATTR(vp, &vattr);
4002         if (error)
4003                 return (error);
4004         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
4005         /*
4006          * Allow certain operations for the owner (reads and writes
4007          * on files that are already open).
4008          */
4009         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4010                 error = 0;
4011         return error;
4012 }
4013 #endif /* NFS_NOSERVER */
4014