a0d17b64bcd92e1a75bbcad4d1854790ed9d8a21
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  */
39
40 /*
41  * nfs version 2 and 3 server calls to vnode ops
42  * - these routines generally have 3 phases
43  *   1 - break down and validate rpc request in mbuf list
44  *   2 - do the vnode ops for the request
45  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
46  *   3 - build the rpc reply in an mbuf list
47  *   nb:
48  *      - do not mix the phases, since the nfsm_?? macros can return failures
49  *        on a bad rpc or similar and do not do any vrele() or vput()'s
50  *
51  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
52  *      error number iff error != 0 whereas
53  *      returning an error from the server function implies a fatal error
54  *      such as a badly constructed rpc request that should be dropped without
55  *      a reply.
56  *      For Version 3, nfsm_reply() does not return for the error case, since
57  *      most version 3 rpcs return more than the status for error cases.
58  *
59  * Other notes:
60  *      Warning: always pay careful attention to resource cleanup on return
61  *      and note that nfsm_*() macros can terminate a procedure on certain
62  *      errors.
63  */
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/proc.h>
68 #include <sys/priv.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87
88 #include <sys/buf2.h>
89
90 #include <sys/thread2.h>
91
92 #include "nfsproto.h"
93 #include "rpcv2.h"
94 #include "nfs.h"
95 #include "xdr_subs.h"
96 #include "nfsm_subs.h"
97
98 #ifdef NFSRV_DEBUG
99 #define nfsdbprintf(info)       kprintf info
100 #else
101 #define nfsdbprintf(info)
102 #endif
103
104 #define MAX_REORDERED_RPC       (16)
105 #define MAX_COMMIT_COUNT        (1024 * 1024)
106
107 #define NUM_HEURISTIC           1031
108 #define NHUSE_INIT              64
109 #define NHUSE_INC               16
110 #define NHUSE_MAX               2048
111
112 static struct nfsheur {
113     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
114     off_t nh_nextoff;           /* next offset for sequential detection */
115     int nh_use;                 /* use count for selection */
116     int nh_seqcount;            /* heuristic */
117 } nfsheur[NUM_HEURISTIC];
118
119 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
120                       NFFIFO, NFNON };
121 #ifndef NFS_NOSERVER 
122 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
123                       NFCHR, NFNON };
124
125 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
126 int nfsrvw_procrastinate_v3 = 0;
127
128 static struct timespec  nfsver;
129
130 SYSCTL_DECL(_vfs_nfs);
131
132 int nfs_async;
133 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
134     "Enable unstable and fast writes");
135 static int nfs_commit_blks;
136 static int nfs_commit_miss;
137 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
138     "Number of committed blocks");
139 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
140     "Number of nfs blocks committed from dirty buffers");
141
142 static int nfsrv_access (struct mount *, struct vnode *, int,
143                         struct ucred *, int, struct thread *, int);
144 static void nfsrvw_coalesce (struct nfsrv_descript *,
145                 struct nfsrv_descript *);
146
147 /*
148  * Heuristic to detect sequential operation.
149  */
150 static struct nfsheur *
151 nfsrv_sequential_heuristic(struct uio *uio, struct vnode *vp, int writeop)
152 {
153         struct nfsheur *nh;
154         int hi, try;
155
156         /* Locate best candidate */
157         try = 32;
158         hi = ((int)(vm_offset_t) vp / sizeof(struct vnode)) % NUM_HEURISTIC;
159         nh = &nfsheur[hi];
160
161         while (try--) {
162                 if (nfsheur[hi].nh_vp == vp) {
163                         nh = &nfsheur[hi];
164                         break;
165                 }
166                 if (nfsheur[hi].nh_use > 0)
167                         --nfsheur[hi].nh_use;
168                 hi = (hi + 1) % NUM_HEURISTIC;
169                 if (nfsheur[hi].nh_use < nh->nh_use)
170                         nh = &nfsheur[hi];
171         }
172
173         /* Initialize hint if this is a new file */
174         if (nh->nh_vp != vp) {
175                 nh->nh_vp = vp;
176                 nh->nh_nextoff = uio->uio_offset;
177                 nh->nh_use = NHUSE_INIT;
178                 if (uio->uio_offset == 0)
179                         nh->nh_seqcount = 4;
180                 else
181                         nh->nh_seqcount = 1;
182         }
183
184         /*
185          * Calculate heuristic
186          *
187          * See vfs_vnops.c:sequential_heuristic().
188          */
189         if ((uio->uio_offset == 0 && nh->nh_seqcount > 0) ||
190             uio->uio_offset == nh->nh_nextoff) {
191                 nh->nh_seqcount += howmany(uio->uio_resid, 16384);
192                 if (nh->nh_seqcount > IO_SEQMAX)
193                         nh->nh_seqcount = IO_SEQMAX;
194         } else if (qabs(uio->uio_offset - nh->nh_nextoff) <= MAX_REORDERED_RPC *
195                 imax(vp->v_mount->mnt_stat.f_iosize, uio->uio_resid)) {
196                     /* Probably a reordered RPC, leave seqcount alone. */
197         } else if (nh->nh_seqcount > 1) {
198                 nh->nh_seqcount /= 2;
199         } else {
200                 nh->nh_seqcount = 0;
201         }
202         nh->nh_use += NHUSE_INC;
203         if (nh->nh_use > NHUSE_MAX)
204                 nh->nh_use = NHUSE_MAX;
205         return (nh);
206 }
207
208 /*
209  * nfs v3 access service
210  */
211 int
212 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
213               struct thread *td, struct mbuf **mrq)
214 {
215         struct sockaddr *nam = nfsd->nd_nam;
216         struct ucred *cred = &nfsd->nd_cr;
217         struct vnode *vp = NULL;
218         struct mount *mp = NULL;
219         nfsfh_t nfh;
220         fhandle_t *fhp;
221         int error = 0, rdonly, getret;
222         struct vattr vattr, *vap = &vattr;
223         u_long testmode, nfsmode;
224         struct nfsm_info info;
225         u_int32_t *tl;
226
227         info.dpos = nfsd->nd_dpos;
228         info.md = nfsd->nd_md;
229         info.mrep = nfsd->nd_mrep;
230         info.mreq = NULL;
231
232         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
233         fhp = &nfh.fh_generic;
234         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
235         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
236         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
237             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
238         if (error) {
239                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
240                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
241                 error = 0;
242                 goto nfsmout;
243         }
244         nfsmode = fxdr_unsigned(u_int32_t, *tl);
245         if ((nfsmode & NFSV3ACCESS_READ) &&
246                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
247                 nfsmode &= ~NFSV3ACCESS_READ;
248         if (vp->v_type == VDIR)
249                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
250                         NFSV3ACCESS_DELETE);
251         else
252                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
253         if ((nfsmode & testmode) &&
254                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
255                 nfsmode &= ~testmode;
256         if (vp->v_type == VDIR)
257                 testmode = NFSV3ACCESS_LOOKUP;
258         else
259                 testmode = NFSV3ACCESS_EXECUTE;
260         if ((nfsmode & testmode) &&
261                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
262                 nfsmode &= ~testmode;
263         getret = VOP_GETATTR(vp, vap);
264         vput(vp);
265         vp = NULL;
266         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
267                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
268         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
269         tl = nfsm_build(&info, NFSX_UNSIGNED);
270         *tl = txdr_unsigned(nfsmode);
271 nfsmout:
272         *mrq = info.mreq;
273         if (vp)
274                 vput(vp);
275         return(error);
276 }
277
278 /*
279  * nfs getattr service
280  */
281 int
282 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
283               struct thread *td, struct mbuf **mrq)
284 {
285         struct sockaddr *nam = nfsd->nd_nam;
286         struct ucred *cred = &nfsd->nd_cr;
287         struct nfs_fattr *fp;
288         struct vattr va;
289         struct vattr *vap = &va;
290         struct vnode *vp = NULL;
291         struct mount *mp = NULL;
292         nfsfh_t nfh;
293         fhandle_t *fhp;
294         int error = 0, rdonly;
295         struct nfsm_info info;
296
297         info.mrep = nfsd->nd_mrep;
298         info.md = nfsd->nd_md;
299         info.dpos = nfsd->nd_dpos;
300         info.mreq = NULL;
301
302         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
303         fhp = &nfh.fh_generic;
304         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
305         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
306                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
307         if (error) {
308                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
309                 error = 0;
310                 goto nfsmout;
311         }
312         error = VOP_GETATTR(vp, vap);
313         vput(vp);
314         vp = NULL;
315         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
316                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
317         if (error) {
318                 error = 0;
319                 goto nfsmout;
320         }
321         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
322         nfsm_srvfattr(nfsd, vap, fp);
323         /* fall through */
324
325 nfsmout:
326         *mrq = info.mreq;
327         if (vp)
328                 vput(vp);
329         return(error);
330 }
331
332 /*
333  * nfs setattr service
334  */
335 int
336 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
337               struct thread *td, struct mbuf **mrq)
338 {
339         struct sockaddr *nam = nfsd->nd_nam;
340         struct ucred *cred = &nfsd->nd_cr;
341         struct vattr va, preat;
342         struct vattr *vap = &va;
343         struct nfsv2_sattr *sp;
344         struct nfs_fattr *fp;
345         struct vnode *vp = NULL;
346         struct mount *mp = NULL;
347         nfsfh_t nfh;
348         fhandle_t *fhp;
349         u_int32_t *tl;
350         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
351         int gcheck = 0;
352         struct timespec guard;
353         struct nfsm_info info;
354
355         info.mrep = nfsd->nd_mrep;
356         info.mreq = NULL;
357         info.md = nfsd->nd_md;
358         info.dpos = nfsd->nd_dpos;
359         info.v3 = (nfsd->nd_flag & ND_NFSV3);
360
361         guard.tv_sec = 0;       /* fix compiler warning */
362         guard.tv_nsec = 0;
363
364         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
365         fhp = &nfh.fh_generic;
366         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
367         VATTR_NULL(vap);
368         if (info.v3) {
369                 ERROROUT(nfsm_srvsattr(&info, vap));
370                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
371                 gcheck = fxdr_unsigned(int, *tl);
372                 if (gcheck) {
373                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
374                         fxdr_nfsv3time(tl, &guard);
375                 }
376         } else {
377                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
378                 /*
379                  * Nah nah nah nah na nah
380                  * There is a bug in the Sun client that puts 0xffff in the mode
381                  * field of sattr when it should put in 0xffffffff. The u_short
382                  * doesn't sign extend.
383                  * --> check the low order 2 bytes for 0xffff
384                  */
385                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
386                         vap->va_mode = nfstov_mode(sp->sa_mode);
387                 if (sp->sa_uid != nfs_xdrneg1)
388                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
389                 if (sp->sa_gid != nfs_xdrneg1)
390                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
391                 if (sp->sa_size != nfs_xdrneg1)
392                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
393                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
394 #ifdef notyet
395                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
396 #else
397                         vap->va_atime.tv_sec =
398                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
399                         vap->va_atime.tv_nsec = 0;
400 #endif
401                 }
402                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
403                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
404
405         }
406
407         /*
408          * Now that we have all the fields, lets do it.
409          */
410         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
411                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
412         if (error) {
413                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
414                                       2 * NFSX_UNSIGNED, &error));
415                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
416                                  postat_ret, vap);
417                 error = 0;
418                 goto nfsmout;
419         }
420
421         /*
422          * vp now an active resource, pay careful attention to cleanup
423          */
424
425         if (info.v3) {
426                 error = preat_ret = VOP_GETATTR(vp, &preat);
427                 if (!error && gcheck &&
428                         (preat.va_ctime.tv_sec != guard.tv_sec ||
429                          preat.va_ctime.tv_nsec != guard.tv_nsec))
430                         error = NFSERR_NOT_SYNC;
431                 if (error) {
432                         vput(vp);
433                         vp = NULL;
434                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
435                                               NFSX_WCCDATA(info.v3), &error));
436                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
437                                          postat_ret, vap);
438                         error = 0;
439                         goto nfsmout;
440                 }
441         }
442
443         /*
444          * If the size is being changed write acces is required, otherwise
445          * just check for a read only file system.
446          */
447         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
448                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
449                         error = EROFS;
450                         goto out;
451                 }
452         } else {
453                 if (vp->v_type == VDIR) {
454                         error = EISDIR;
455                         goto out;
456                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
457                             td, 0)) != 0){ 
458                         goto out;
459                 }
460         }
461         error = VOP_SETATTR(vp, vap, cred);
462         postat_ret = VOP_GETATTR(vp, vap);
463         if (!error)
464                 error = postat_ret;
465 out:
466         vput(vp);
467         vp = NULL;
468         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
469                    NFSX_WCCORFATTR(info.v3), &error));
470         if (info.v3) {
471                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
472                                  postat_ret, vap);
473                 error = 0;
474                 goto nfsmout;
475         } else {
476                 fp = nfsm_build(&info, NFSX_V2FATTR);
477                 nfsm_srvfattr(nfsd, vap, fp);
478         }
479         /* fall through */
480
481 nfsmout:
482         *mrq = info.mreq;
483         if (vp)
484                 vput(vp);
485         return(error);
486 }
487
488 /*
489  * nfs lookup rpc
490  */
491 int
492 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
493              struct thread *td, struct mbuf **mrq)
494 {
495         struct sockaddr *nam = nfsd->nd_nam;
496         struct ucred *cred = &nfsd->nd_cr;
497         struct nfs_fattr *fp;
498         struct nlookupdata nd;
499         struct vnode *vp;
500         struct vnode *dirp;
501         struct nchandle nch;
502         nfsfh_t nfh;
503         fhandle_t *fhp;
504         int error = 0, len, dirattr_ret = 1;
505         int pubflag;
506         struct vattr va, dirattr, *vap = &va;
507         struct nfsm_info info;
508
509         info.mrep = nfsd->nd_mrep;
510         info.mreq = NULL;
511         info.md = nfsd->nd_md;
512         info.dpos = nfsd->nd_dpos;
513         info.v3 = (nfsd->nd_flag & ND_NFSV3);
514
515         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
516         nlookup_zero(&nd);
517         dirp = NULL;
518         vp = NULL;
519
520         fhp = &nfh.fh_generic;
521         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
522         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
523
524         pubflag = nfs_ispublicfh(fhp);
525
526         error = nfs_namei(&nd, cred, 0, NULL, &vp,
527                 fhp, len, slp, nam, &info.md, &info.dpos,
528                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
529
530         /*
531          * namei failure, only dirp to cleanup.  Clear out garbarge from
532          * structure in case macros jump to nfsmout.
533          */
534
535         if (error) {
536                 if (dirp) {
537                         if (info.v3)
538                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
539                         vrele(dirp);
540                         dirp = NULL;
541                 }
542                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
543                                       NFSX_POSTOPATTR(info.v3), &error));
544                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
545                 error = 0;
546                 goto nfsmout;
547         }
548
549         /*
550          * Locate index file for public filehandle
551          *
552          * error is 0 on entry and 0 on exit from this block.
553          */
554
555         if (pubflag) {
556                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
557                         /*
558                          * Setup call to lookup() to see if we can find
559                          * the index file. Arguably, this doesn't belong
560                          * in a kernel.. Ugh.  If an error occurs, do not
561                          * try to install an index file and then clear the
562                          * error.
563                          *
564                          * When we replace nd with ind and redirect ndp,
565                          * maintenance of ni_startdir and ni_vp shift to
566                          * ind and we have to clean them up in the old nd.
567                          * However, the cnd resource continues to be maintained
568                          * via the original nd.  Confused?  You aren't alone!
569                          */
570                         vn_unlock(vp);
571                         cache_copy(&nd.nl_nch, &nch);
572                         nlookup_done(&nd);
573                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
574                                                 UIO_SYSSPACE, 0, cred, &nch);
575                         cache_drop(&nch);
576                         if (error == 0)
577                                 error = nlookup(&nd);
578
579                         if (error == 0) {
580                                 /*
581                                  * Found an index file. Get rid of
582                                  * the old references.  transfer vp and
583                                  * load up the new vp.  Fortunately we do
584                                  * not have to deal with dvp, that would be
585                                  * a huge mess.
586                                  */
587                                 if (dirp)       
588                                         vrele(dirp);
589                                 dirp = vp;
590                                 vp = NULL;
591                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
592                                                         LK_EXCLUSIVE, &vp);
593                                 KKASSERT(error == 0);
594                         }
595                         error = 0;
596                 }
597                 /*
598                  * If the public filehandle was used, check that this lookup
599                  * didn't result in a filehandle outside the publicly exported
600                  * filesystem.  We clear the poor vp here to avoid lockups due
601                  * to NFS I/O.
602                  */
603
604                 if (vp->v_mount != nfs_pub.np_mount) {
605                         vput(vp);
606                         vp = NULL;
607                         error = EPERM;
608                 }
609         }
610
611         if (dirp) {
612                 if (info.v3)
613                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
614                 vrele(dirp);
615                 dirp = NULL;
616         }
617
618         /*
619          * Resources at this point:
620          *      ndp->ni_vp      may not be NULL
621          *
622          */
623
624         if (error) {
625                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
626                                       NFSX_POSTOPATTR(info.v3), &error));
627                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
628                 error = 0;
629                 goto nfsmout;
630         }
631
632         /*
633          * Clear out some resources prior to potentially blocking.  This
634          * is not as critical as ni_dvp resources in other routines, but
635          * it helps.
636          */
637         nlookup_done(&nd);
638
639         /*
640          * Get underlying attribute, then release remaining resources ( for
641          * the same potential blocking reason ) and reply.
642          */
643         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
644         error = VFS_VPTOFH(vp, &fhp->fh_fid);
645         if (!error)
646                 error = VOP_GETATTR(vp, vap);
647
648         vput(vp);
649         vp = NULL;
650         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
651                               NFSX_SRVFH(info.v3) +
652                               NFSX_POSTOPORFATTR(info.v3) +
653                               NFSX_POSTOPATTR(info.v3),
654                               &error));
655         if (error) {
656                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
657                 error = 0;
658                 goto nfsmout;
659         }
660         nfsm_srvfhtom(&info, fhp);
661         if (info.v3) {
662                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
663                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
664         } else {
665                 fp = nfsm_build(&info, NFSX_V2FATTR);
666                 nfsm_srvfattr(nfsd, vap, fp);
667         }
668
669 nfsmout:
670         *mrq = info.mreq;
671         if (dirp)
672                 vrele(dirp);
673         nlookup_done(&nd);              /* may be called twice */
674         if (vp)
675                 vput(vp);
676         return (error);
677 }
678
679 /*
680  * nfs readlink service
681  */
682 int
683 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
684                struct thread *td, struct mbuf **mrq)
685 {
686         struct sockaddr *nam = nfsd->nd_nam;
687         struct ucred *cred = &nfsd->nd_cr;
688         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
689         struct iovec *ivp = iv;
690         u_int32_t *tl;
691         int error = 0, rdonly, i, tlen, len, getret;
692         struct mbuf *mp1, *mp2, *mp3;
693         struct vnode *vp = NULL;
694         struct mount *mp = NULL;
695         struct vattr attr;
696         nfsfh_t nfh;
697         fhandle_t *fhp;
698         struct uio io, *uiop = &io;
699         struct nfsm_info info;
700
701         info.mrep = nfsd->nd_mrep;
702         info.mreq = NULL;
703         info.md = nfsd->nd_md;
704         info.dpos = nfsd->nd_dpos;
705         info.v3 = (nfsd->nd_flag & ND_NFSV3);
706
707         bzero(&io, sizeof(struct uio));
708
709         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
710 #ifndef nolint
711         mp2 = NULL;
712 #endif
713         mp3 = NULL;
714         fhp = &nfh.fh_generic;
715         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
716         len = 0;
717         i = 0;
718         while (len < NFS_MAXPATHLEN) {
719                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
720                 mp1->m_len = MCLBYTES;
721                 if (len == 0)
722                         mp3 = mp2 = mp1;
723                 else {
724                         mp2->m_next = mp1;
725                         mp2 = mp1;
726                 }
727                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
728                         mp1->m_len = NFS_MAXPATHLEN-len;
729                         len = NFS_MAXPATHLEN;
730                 } else
731                         len += mp1->m_len;
732                 ivp->iov_base = mtod(mp1, caddr_t);
733                 ivp->iov_len = mp1->m_len;
734                 i++;
735                 ivp++;
736         }
737         uiop->uio_iov = iv;
738         uiop->uio_iovcnt = i;
739         uiop->uio_offset = 0;
740         uiop->uio_resid = len;
741         uiop->uio_rw = UIO_READ;
742         uiop->uio_segflg = UIO_SYSSPACE;
743         uiop->uio_td = NULL;
744         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
745                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
746         if (error) {
747                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
748                                       2 * NFSX_UNSIGNED, &error));
749                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
750                 error = 0;
751                 goto nfsmout;
752         }
753         if (vp->v_type != VLNK) {
754                 if (info.v3)
755                         error = EINVAL;
756                 else
757                         error = ENXIO;
758                 goto out;
759         }
760         error = VOP_READLINK(vp, uiop, cred);
761 out:
762         getret = VOP_GETATTR(vp, &attr);
763         vput(vp);
764         vp = NULL;
765         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
766                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
767                              &error));
768         if (info.v3) {
769                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
770                 if (error) {
771                         error = 0;
772                         goto nfsmout;
773                 }
774         }
775         if (uiop->uio_resid > 0) {
776                 len -= uiop->uio_resid;
777                 tlen = nfsm_rndup(len);
778                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
779         }
780         tl = nfsm_build(&info, NFSX_UNSIGNED);
781         *tl = txdr_unsigned(len);
782         info.mb->m_next = mp3;
783         mp3 = NULL;
784 nfsmout:
785         *mrq = info.mreq;
786         if (mp3)
787                 m_freem(mp3);
788         if (vp)
789                 vput(vp);
790         return(error);
791 }
792
793 /*
794  * nfs read service
795  */
796 int
797 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
798            struct thread *td, struct mbuf **mrq)
799 {
800         struct nfsm_info info;
801         struct sockaddr *nam = nfsd->nd_nam;
802         struct ucred *cred = &nfsd->nd_cr;
803         struct iovec *iv;
804         struct iovec *iv2;
805         struct mbuf *m;
806         struct nfs_fattr *fp;
807         u_int32_t *tl;
808         int i;
809         int reqlen;
810         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
811         struct mbuf *m2;
812         struct vnode *vp = NULL;
813         struct mount *mp = NULL;
814         nfsfh_t nfh;
815         fhandle_t *fhp;
816         struct uio io, *uiop = &io;
817         struct vattr va, *vap = &va;
818         struct nfsheur *nh;
819         off_t off;
820         int ioflag = 0;
821
822         info.mrep = nfsd->nd_mrep;
823         info.mreq = NULL;
824         info.md = nfsd->nd_md;
825         info.dpos = nfsd->nd_dpos;
826         info.v3 = (nfsd->nd_flag & ND_NFSV3);
827
828         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
829         fhp = &nfh.fh_generic;
830         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
831         if (info.v3) {
832                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
833                 off = fxdr_hyper(tl);
834         } else {
835                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
836                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
837         }
838         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
839                                             NFS_SRVMAXDATA(nfsd), &error));
840
841         /*
842          * Reference vp.  If an error occurs, vp will be invalid, but we
843          * have to NULL it just in case.  The macros might goto nfsmout
844          * as well.
845          */
846
847         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
848                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
849         if (error) {
850                 vp = NULL;
851                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
852                                       2 * NFSX_UNSIGNED, &error));
853                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
854                 error = 0;
855                 goto nfsmout;
856         }
857
858         if (vp->v_type != VREG) {
859                 if (info.v3)
860                         error = EINVAL;
861                 else
862                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
863         }
864         if (!error) {
865             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
866                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
867         }
868         getret = VOP_GETATTR(vp, vap);
869         if (!error)
870                 error = getret;
871         if (error) {
872                 vput(vp);
873                 vp = NULL;
874                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
875                                       NFSX_POSTOPATTR(info.v3), &error));
876                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
877                 error = 0;
878                 goto nfsmout;
879         }
880
881         /*
882          * Calculate byte count to read
883          */
884
885         if (off >= vap->va_size)
886                 cnt = 0;
887         else if ((off + reqlen) > vap->va_size)
888                 cnt = vap->va_size - off;
889         else
890                 cnt = reqlen;
891
892         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
893                               NFSX_POSTOPORFATTR(info.v3) +
894                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
895                               &error));
896         if (info.v3) {
897                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
898                 *tl++ = nfs_true;
899                 fp = (struct nfs_fattr *)tl;
900                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
901         } else {
902                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
903                 fp = (struct nfs_fattr *)tl;
904                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
905         }
906         len = left = nfsm_rndup(cnt);
907         if (cnt > 0) {
908                 /*
909                  * Generate the mbuf list with the uio_iov ref. to it.
910                  */
911                 i = 0;
912                 m = m2 = info.mb;
913                 while (left > 0) {
914                         siz = min(M_TRAILINGSPACE(m), left);
915                         if (siz > 0) {
916                                 left -= siz;
917                                 i++;
918                         }
919                         if (left > 0) {
920                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
921                                 m->m_len = 0;
922                                 m2->m_next = m;
923                                 m2 = m;
924                         }
925                 }
926                 iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
927                 uiop->uio_iov = iv2 = iv;
928                 m = info.mb;
929                 left = len;
930                 i = 0;
931                 while (left > 0) {
932                         if (m == NULL)
933                                 panic("nfsrv_read iov");
934                         siz = min(M_TRAILINGSPACE(m), left);
935                         if (siz > 0) {
936                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
937                                 iv->iov_len = siz;
938                                 m->m_len += siz;
939                                 left -= siz;
940                                 iv++;
941                                 i++;
942                         }
943                         m = m->m_next;
944                 }
945                 uiop->uio_iovcnt = i;
946                 uiop->uio_offset = off;
947                 uiop->uio_resid = len;
948                 uiop->uio_rw = UIO_READ;
949                 uiop->uio_segflg = UIO_SYSSPACE;
950                 nh = nfsrv_sequential_heuristic(uiop, vp, 0);
951                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
952                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
953                 if (error == 0) {
954                         off = uiop->uio_offset;
955                         nh->nh_nextoff = off;
956                 }
957                 kfree((caddr_t)iv2, M_TEMP);
958                 if (error || (getret = VOP_GETATTR(vp, vap))) {
959                         if (!error)
960                                 error = getret;
961                         m_freem(info.mreq);
962                         info.mreq = NULL;
963                         vput(vp);
964                         vp = NULL;
965                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
966                                               NFSX_POSTOPATTR(info.v3),
967                                               &error));
968                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
969                         error = 0;
970                         goto nfsmout;
971                 }
972         } else {
973                 uiop->uio_resid = 0;
974         }
975         vput(vp);
976         vp = NULL;
977         nfsm_srvfattr(nfsd, vap, fp);
978         tlen = len - uiop->uio_resid;
979         cnt = cnt < tlen ? cnt : tlen;
980         tlen = nfsm_rndup(cnt);
981         if (len != tlen || tlen != cnt)
982                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
983         if (info.v3) {
984                 *tl++ = txdr_unsigned(cnt);
985                 if (cnt < reqlen)
986                         *tl++ = nfs_true;
987                 else
988                         *tl++ = nfs_false;
989         }
990         *tl = txdr_unsigned(cnt);
991 nfsmout:
992         *mrq = info.mreq;
993         if (vp)
994                 vput(vp);
995         return(error);
996 }
997
998 /*
999  * nfs write service
1000  */
1001 int
1002 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1003             struct thread *td, struct mbuf **mrq)
1004 {
1005         struct sockaddr *nam = nfsd->nd_nam;
1006         struct ucred *cred = &nfsd->nd_cr;
1007         struct iovec *ivp;
1008         int i, cnt;
1009         struct mbuf *mp1;
1010         struct nfs_fattr *fp;
1011         struct iovec *iv;
1012         struct vattr va, forat;
1013         struct vattr *vap = &va;
1014         u_int32_t *tl;
1015         int error = 0, rdonly, len, forat_ret = 1;
1016         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1017         int stable = NFSV3WRITE_FILESYNC;
1018         struct vnode *vp = NULL;
1019         struct mount *mp = NULL;
1020         struct nfsheur *nh;
1021         nfsfh_t nfh;
1022         fhandle_t *fhp;
1023         struct uio io, *uiop = &io;
1024         struct nfsm_info info;
1025         off_t off;
1026
1027         info.mrep = nfsd->nd_mrep;
1028         info.mreq = NULL;
1029         info.md = nfsd->nd_md;
1030         info.dpos = nfsd->nd_dpos;
1031         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1032
1033         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1034         if (info.mrep == NULL) {
1035                 error = 0;
1036                 goto nfsmout;
1037         }
1038         fhp = &nfh.fh_generic;
1039         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1040         if (info.v3) {
1041                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1042                 off = fxdr_hyper(tl);
1043                 tl += 3;
1044                 stable = fxdr_unsigned(int, *tl++);
1045         } else {
1046                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1047                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1048                 tl += 2;
1049                 if (nfs_async)
1050                         stable = NFSV3WRITE_UNSTABLE;
1051         }
1052         retlen = len = fxdr_unsigned(int32_t, *tl);
1053         cnt = i = 0;
1054
1055         /*
1056          * For NFS Version 2, it is not obvious what a write of zero length
1057          * should do, but I might as well be consistent with Version 3,
1058          * which is to return ok so long as there are no permission problems.
1059          */
1060         if (len > 0) {
1061             zeroing = 1;
1062             mp1 = info.mrep;
1063             while (mp1) {
1064                 if (mp1 == info.md) {
1065                         zeroing = 0;
1066                         adjust = info.dpos - mtod(mp1, caddr_t);
1067                         mp1->m_len -= adjust;
1068                         if (mp1->m_len > 0 && adjust > 0)
1069                                 mp1->m_data += adjust;
1070                 }
1071                 if (zeroing)
1072                         mp1->m_len = 0;
1073                 else if (mp1->m_len > 0) {
1074                         i += mp1->m_len;
1075                         if (i > len) {
1076                                 mp1->m_len -= (i - len);
1077                                 zeroing = 1;
1078                         }
1079                         if (mp1->m_len > 0)
1080                                 cnt++;
1081                 }
1082                 mp1 = mp1->m_next;
1083             }
1084         }
1085         if (len > NFS_MAXDATA || len < 0 || i < len) {
1086                 error = EIO;
1087                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1088                                       2 * NFSX_UNSIGNED, &error));
1089                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1090                                  aftat_ret, vap);
1091                 error = 0;
1092                 goto nfsmout;
1093         }
1094         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1095                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1096         if (error) {
1097                 vp = NULL;
1098                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1099                                       2 * NFSX_UNSIGNED, &error));
1100                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1101                                  aftat_ret, vap);
1102                 error = 0;
1103                 goto nfsmout;
1104         }
1105         if (info.v3)
1106                 forat_ret = VOP_GETATTR(vp, &forat);
1107         if (vp->v_type != VREG) {
1108                 if (info.v3)
1109                         error = EINVAL;
1110                 else
1111                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1112         }
1113         if (!error) {
1114                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1115         }
1116         if (error) {
1117                 vput(vp);
1118                 vp = NULL;
1119                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1120                                       NFSX_WCCDATA(info.v3), &error));
1121                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1122                                  aftat_ret, vap);
1123                 error = 0;
1124                 goto nfsmout;
1125         }
1126
1127         if (len > 0) {
1128             ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1129             uiop->uio_iov = iv = ivp;
1130             uiop->uio_iovcnt = cnt;
1131             mp1 = info.mrep;
1132             while (mp1) {
1133                 if (mp1->m_len > 0) {
1134                         ivp->iov_base = mtod(mp1, caddr_t);
1135                         ivp->iov_len = mp1->m_len;
1136                         ivp++;
1137                 }
1138                 mp1 = mp1->m_next;
1139             }
1140
1141             /*
1142              * XXX
1143              * The IO_METASYNC flag indicates that all metadata (and not just
1144              * enough to ensure data integrity) mus be written to stable storage
1145              * synchronously.
1146              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1147              */
1148             if (stable == NFSV3WRITE_UNSTABLE)
1149                 ioflags = IO_NODELOCKED;
1150             else if (stable == NFSV3WRITE_DATASYNC)
1151                 ioflags = (IO_SYNC | IO_NODELOCKED);
1152             else
1153                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1154             uiop->uio_resid = len;
1155             uiop->uio_rw = UIO_WRITE;
1156             uiop->uio_segflg = UIO_SYSSPACE;
1157             uiop->uio_td = NULL;
1158             uiop->uio_offset = off;
1159             nh = nfsrv_sequential_heuristic(uiop, vp, 1);
1160             ioflags |= nh->nh_seqcount << IO_SEQSHIFT;
1161             error = VOP_WRITE(vp, uiop, ioflags, cred);
1162             if (error == 0)
1163                 nh->nh_nextoff = uiop->uio_offset;
1164             nfsstats.srvvop_writes++;
1165             kfree((caddr_t)iv, M_TEMP);
1166         }
1167         aftat_ret = VOP_GETATTR(vp, vap);
1168         vput(vp);
1169         vp = NULL;
1170         if (!error)
1171                 error = aftat_ret;
1172         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1173                               NFSX_PREOPATTR(info.v3) +
1174                               NFSX_POSTOPORFATTR(info.v3) +
1175                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1176                               &error));
1177         if (info.v3) {
1178                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1179                                  aftat_ret, vap);
1180                 if (error) {
1181                         error = 0;
1182                         goto nfsmout;
1183                 }
1184                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1185                 *tl++ = txdr_unsigned(retlen);
1186                 /*
1187                  * If nfs_async is set, then pretend the write was FILESYNC.
1188                  */
1189                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1190                         *tl++ = txdr_unsigned(stable);
1191                 else
1192                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1193                 /*
1194                  * Actually, there is no need to txdr these fields,
1195                  * but it may make the values more human readable,
1196                  * for debugging purposes.
1197                  */
1198                 if (nfsver.tv_sec == 0)
1199                         nfsver = boottime;
1200                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1201                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1202         } else {
1203                 fp = nfsm_build(&info, NFSX_V2FATTR);
1204                 nfsm_srvfattr(nfsd, vap, fp);
1205         }
1206 nfsmout:
1207         *mrq = info.mreq;
1208         if (vp)
1209                 vput(vp);
1210         return(error);
1211 }
1212
1213 /*
1214  * NFS write service with write gathering support. Called when
1215  * nfsrvw_procrastinate > 0.
1216  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1217  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1218  * Jan. 1994.
1219  */
1220 int
1221 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1222                   struct thread *td, struct mbuf **mrq)
1223 {
1224         struct iovec *ivp;
1225         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1226         struct nfs_fattr *fp;
1227         int i;
1228         struct iovec *iov;
1229         struct nfsrvw_delayhash *wpp;
1230         struct ucred *cred;
1231         struct vattr va, forat;
1232         u_int32_t *tl;
1233         int error = 0, rdonly, len, forat_ret = 1;
1234         int ioflags, aftat_ret = 1, adjust, zeroing;
1235         struct mbuf *mp1;
1236         struct vnode *vp = NULL;
1237         struct mount *mp = NULL;
1238         struct uio io, *uiop = &io;
1239         u_quad_t cur_usec;
1240         struct nfsm_info info;
1241
1242         info.mreq = NULL;
1243
1244         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1245 #ifndef nolint
1246         i = 0;
1247         len = 0;
1248 #endif
1249         if (*ndp) {
1250             nfsd = *ndp;
1251             *ndp = NULL;
1252             info.mrep = nfsd->nd_mrep;
1253             info.mreq = NULL;
1254             info.md = nfsd->nd_md;
1255             info.dpos = nfsd->nd_dpos;
1256             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1257             cred = &nfsd->nd_cr;
1258             LIST_INIT(&nfsd->nd_coalesce);
1259             nfsd->nd_mreq = NULL;
1260             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1261             cur_usec = nfs_curusec();
1262             nfsd->nd_time = cur_usec +
1263                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1264     
1265             /*
1266              * Now, get the write header..
1267              */
1268             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1269             if (info.v3) {
1270                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1271                 nfsd->nd_off = fxdr_hyper(tl);
1272                 tl += 3;
1273                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1274             } else {
1275                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1276                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1277                 tl += 2;
1278                 if (nfs_async)
1279                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1280             }
1281             len = fxdr_unsigned(int32_t, *tl);
1282             nfsd->nd_len = len;
1283             nfsd->nd_eoff = nfsd->nd_off + len;
1284     
1285             /*
1286              * Trim the header out of the mbuf list and trim off any trailing
1287              * junk so that the mbuf list has only the write data.
1288              */
1289             zeroing = 1;
1290             i = 0;
1291             mp1 = info.mrep;
1292             while (mp1) {
1293                 if (mp1 == info.md) {
1294                     zeroing = 0;
1295                     adjust = info.dpos - mtod(mp1, caddr_t);
1296                     mp1->m_len -= adjust;
1297                     if (mp1->m_len > 0 && adjust > 0)
1298                         mp1->m_data += adjust;
1299                 }
1300                 if (zeroing)
1301                     mp1->m_len = 0;
1302                 else {
1303                     i += mp1->m_len;
1304                     if (i > len) {
1305                         mp1->m_len -= (i - len);
1306                         zeroing = 1;
1307                     }
1308                 }
1309                 mp1 = mp1->m_next;
1310             }
1311             if (len > NFS_MAXDATA || len < 0  || i < len) {
1312 nfsmout:
1313                 m_freem(info.mrep);
1314                 info.mrep = NULL;
1315                 error = EIO;
1316                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1317                 if (info.v3) {
1318                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1319                                      aftat_ret, &va);
1320                 }
1321                 nfsd->nd_mreq = info.mreq;
1322                 nfsd->nd_mrep = NULL;
1323                 nfsd->nd_time = 0;
1324             }
1325     
1326             /*
1327              * Add this entry to the hash and time queues.
1328              */
1329             owp = NULL;
1330             wp = slp->ns_tq.lh_first;
1331             while (wp && wp->nd_time < nfsd->nd_time) {
1332                 owp = wp;
1333                 wp = wp->nd_tq.le_next;
1334             }
1335             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1336             if (owp) {
1337                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1338             } else {
1339                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1340             }
1341             if (nfsd->nd_mrep) {
1342                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1343                 owp = NULL;
1344                 wp = wpp->lh_first;
1345                 while (wp &&
1346                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1347                     owp = wp;
1348                     wp = wp->nd_hash.le_next;
1349                 }
1350                 while (wp && wp->nd_off < nfsd->nd_off &&
1351                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1352                     owp = wp;
1353                     wp = wp->nd_hash.le_next;
1354                 }
1355                 if (owp) {
1356                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1357
1358                     /*
1359                      * Search the hash list for overlapping entries and
1360                      * coalesce.
1361                      */
1362                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1363                         wp = nfsd->nd_hash.le_next;
1364                         if (NFSW_SAMECRED(owp, nfsd))
1365                             nfsrvw_coalesce(owp, nfsd);
1366                     }
1367                 } else {
1368                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1369                 }
1370             }
1371         }
1372     
1373         /*
1374          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1375          * and generate the associated reply mbuf list(s).
1376          */
1377 loop1:
1378         cur_usec = nfs_curusec();
1379         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1380                 owp = nfsd->nd_tq.le_next;
1381                 if (nfsd->nd_time > cur_usec)
1382                     break;
1383                 if (nfsd->nd_mreq)
1384                     continue;
1385                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1386                 LIST_REMOVE(nfsd, nd_tq);
1387                 LIST_REMOVE(nfsd, nd_hash);
1388                 info.mrep = nfsd->nd_mrep;
1389                 info.mreq = NULL;
1390                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1391                 nfsd->nd_mrep = NULL;
1392                 cred = &nfsd->nd_cr;
1393                 forat_ret = aftat_ret = 1;
1394                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1395                                      nfsd->nd_nam, &rdonly,
1396                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1397                 if (!error) {
1398                     if (info.v3)
1399                         forat_ret = VOP_GETATTR(vp, &forat);
1400                     if (vp->v_type != VREG) {
1401                         if (info.v3)
1402                             error = EINVAL;
1403                         else
1404                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1405                     }
1406                 } else {
1407                     vp = NULL;
1408                 }
1409                 if (!error) {
1410                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1411                 }
1412     
1413                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1414                     ioflags = IO_NODELOCKED;
1415                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1416                     ioflags = (IO_SYNC | IO_NODELOCKED);
1417                 else
1418                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1419                 uiop->uio_rw = UIO_WRITE;
1420                 uiop->uio_segflg = UIO_SYSSPACE;
1421                 uiop->uio_td = NULL;
1422                 uiop->uio_offset = nfsd->nd_off;
1423                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1424                 if (uiop->uio_resid > 0) {
1425                     mp1 = info.mrep;
1426                     i = 0;
1427                     while (mp1) {
1428                         if (mp1->m_len > 0)
1429                             i++;
1430                         mp1 = mp1->m_next;
1431                     }
1432                     uiop->uio_iovcnt = i;
1433                     iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1434                     uiop->uio_iov = ivp = iov;
1435                     mp1 = info.mrep;
1436                     while (mp1) {
1437                         if (mp1->m_len > 0) {
1438                             ivp->iov_base = mtod(mp1, caddr_t);
1439                             ivp->iov_len = mp1->m_len;
1440                             ivp++;
1441                         }
1442                         mp1 = mp1->m_next;
1443                     }
1444                     if (!error) {
1445                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1446                         nfsstats.srvvop_writes++;
1447                     }
1448                     kfree((caddr_t)iov, M_TEMP);
1449                 }
1450                 m_freem(info.mrep);
1451                 info.mrep = NULL;
1452                 if (vp) {
1453                     aftat_ret = VOP_GETATTR(vp, &va);
1454                     vput(vp);
1455                     vp = NULL;
1456                 }
1457
1458                 /*
1459                  * Loop around generating replies for all write rpcs that have
1460                  * now been completed.
1461                  */
1462                 swp = nfsd;
1463                 do {
1464                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1465                     if (error) {
1466                         nfsm_writereply(&info, nfsd, slp, error,
1467                                         NFSX_WCCDATA(info.v3));
1468                         if (info.v3) {
1469                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1470                                              aftat_ret, &va);
1471                         }
1472                     } else {
1473                         nfsm_writereply(&info, nfsd, slp, error,
1474                                         NFSX_PREOPATTR(info.v3) +
1475                                         NFSX_POSTOPORFATTR(info.v3) +
1476                                         2 * NFSX_UNSIGNED +
1477                                         NFSX_WRITEVERF(info.v3));
1478                         if (info.v3) {
1479                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1480                                              aftat_ret, &va);
1481                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1482                             *tl++ = txdr_unsigned(nfsd->nd_len);
1483                             *tl++ = txdr_unsigned(swp->nd_stable);
1484                             /*
1485                              * Actually, there is no need to txdr these fields,
1486                              * but it may make the values more human readable,
1487                              * for debugging purposes.
1488                              */
1489                             if (nfsver.tv_sec == 0)
1490                                     nfsver = boottime;
1491                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1492                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1493                         } else {
1494                             fp = nfsm_build(&info, NFSX_V2FATTR);
1495                             nfsm_srvfattr(nfsd, &va, fp);
1496                         }
1497                     }
1498                     nfsd->nd_mreq = info.mreq;
1499                     if (nfsd->nd_mrep)
1500                         panic("nfsrv_write: nd_mrep not free");
1501
1502                     /*
1503                      * Done. Put it at the head of the timer queue so that
1504                      * the final phase can return the reply.
1505                      */
1506                     if (nfsd != swp) {
1507                         nfsd->nd_time = 0;
1508                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1509                     }
1510                     nfsd = swp->nd_coalesce.lh_first;
1511                     if (nfsd) {
1512                         LIST_REMOVE(nfsd, nd_tq);
1513                     }
1514                 } while (nfsd);
1515                 swp->nd_time = 0;
1516                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1517                 goto loop1;
1518         }
1519
1520         /*
1521          * Search for a reply to return.
1522          */
1523         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1524                 if (nfsd->nd_mreq) {
1525                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1526                     LIST_REMOVE(nfsd, nd_tq);
1527                     break;
1528                 }
1529         }
1530         if (nfsd) {
1531                 *ndp = nfsd;
1532                 *mrq = nfsd->nd_mreq;
1533         } else {
1534                 *ndp = NULL;
1535                 *mrq = NULL;
1536         }
1537         return (0);
1538 }
1539
1540 /*
1541  * Coalesce the write request nfsd into owp. To do this we must:
1542  * - remove nfsd from the queues
1543  * - merge nfsd->nd_mrep into owp->nd_mrep
1544  * - update the nd_eoff and nd_stable for owp
1545  * - put nfsd on owp's nd_coalesce list
1546  * NB: Must be called at splsoftclock().
1547  */
1548 static void
1549 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1550 {
1551         int overlap;
1552         struct mbuf *mp1;
1553         struct nfsrv_descript *p;
1554
1555         NFS_DPF(WG, ("C%03x-%03x",
1556                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1557         LIST_REMOVE(nfsd, nd_hash);
1558         LIST_REMOVE(nfsd, nd_tq);
1559         if (owp->nd_eoff < nfsd->nd_eoff) {
1560             overlap = owp->nd_eoff - nfsd->nd_off;
1561             if (overlap < 0)
1562                 panic("nfsrv_coalesce: bad off");
1563             if (overlap > 0)
1564                 m_adj(nfsd->nd_mrep, overlap);
1565             mp1 = owp->nd_mrep;
1566             while (mp1->m_next)
1567                 mp1 = mp1->m_next;
1568             mp1->m_next = nfsd->nd_mrep;
1569             owp->nd_eoff = nfsd->nd_eoff;
1570         } else
1571             m_freem(nfsd->nd_mrep);
1572         nfsd->nd_mrep = NULL;
1573         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1574             owp->nd_stable = NFSV3WRITE_FILESYNC;
1575         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1576             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1577             owp->nd_stable = NFSV3WRITE_DATASYNC;
1578         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1579
1580         /*
1581          * If nfsd had anything else coalesced into it, transfer them
1582          * to owp, otherwise their replies will never get sent.
1583          */
1584         for (p = nfsd->nd_coalesce.lh_first; p;
1585              p = nfsd->nd_coalesce.lh_first) {
1586             LIST_REMOVE(p, nd_tq);
1587             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1588         }
1589 }
1590
1591 /*
1592  * nfs create service
1593  * now does a truncate to 0 length via. setattr if it already exists
1594  */
1595 int
1596 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1597              struct thread *td, struct mbuf **mrq)
1598 {
1599         struct sockaddr *nam = nfsd->nd_nam;
1600         struct ucred *cred = &nfsd->nd_cr;
1601         struct nfs_fattr *fp;
1602         struct vattr va, dirfor, diraft;
1603         struct vattr *vap = &va;
1604         struct nfsv2_sattr *sp;
1605         u_int32_t *tl;
1606         struct nlookupdata nd;
1607         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1608         udev_t rdev = NOUDEV;
1609         caddr_t cp;
1610         int how, exclusive_flag = 0;
1611         struct vnode *dirp;
1612         struct vnode *dvp;
1613         struct vnode *vp;
1614         struct mount *mp;
1615         nfsfh_t nfh;
1616         fhandle_t *fhp;
1617         u_quad_t tempsize;
1618         u_char cverf[NFSX_V3CREATEVERF];
1619         struct nfsm_info info;
1620
1621         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1622         nlookup_zero(&nd);
1623         dirp = NULL;
1624         dvp = NULL;
1625         vp = NULL;
1626
1627         info.mrep = nfsd->nd_mrep;
1628         info.mreq = NULL;
1629         info.md = nfsd->nd_md;
1630         info.dpos = nfsd->nd_dpos;
1631         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1632
1633         fhp = &nfh.fh_generic;
1634         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1635         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1636
1637         /*
1638          * Call namei and do initial cleanup to get a few things
1639          * out of the way.  If we get an initial error we cleanup
1640          * and return here to avoid special-casing the invalid nd
1641          * structure through the rest of the case.  dirp may be
1642          * set even if an error occurs, but the nd structure will not
1643          * be valid at all if an error occurs so we have to invalidate it
1644          * prior to calling nfsm_reply ( which might goto nfsmout ).
1645          */
1646         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1647                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1648                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1649         mp = vfs_getvfs(&fhp->fh_fsid);
1650
1651         if (dirp) {
1652                 if (info.v3) {
1653                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1654                 } else {
1655                         vrele(dirp);
1656                         dirp = NULL;
1657                 }
1658         }
1659         if (error) {
1660                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1661                                       NFSX_WCCDATA(info.v3), &error));
1662                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1663                                  diraft_ret, &diraft);
1664                 error = 0;
1665                 goto nfsmout;
1666         }
1667
1668         /*
1669          * No error.  Continue.  State:
1670          *
1671          *      dirp            may be valid
1672          *      vp              may be valid or NULL if the target does not
1673          *                      exist.
1674          *      dvp             is valid
1675          *
1676          * The error state is set through the code and we may also do some
1677          * opportunistic releasing of vnodes to avoid holding locks through
1678          * NFS I/O.  The cleanup at the end is a catch-all
1679          */
1680
1681         VATTR_NULL(vap);
1682         if (info.v3) {
1683                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1684                 how = fxdr_unsigned(int, *tl);
1685                 switch (how) {
1686                 case NFSV3CREATE_GUARDED:
1687                         if (vp) {
1688                                 error = EEXIST;
1689                                 break;
1690                         }
1691                         /* fall through */
1692                 case NFSV3CREATE_UNCHECKED:
1693                         ERROROUT(nfsm_srvsattr(&info, vap));
1694                         break;
1695                 case NFSV3CREATE_EXCLUSIVE:
1696                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1697                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1698                         exclusive_flag = 1;
1699                         break;
1700                 };
1701                 vap->va_type = VREG;
1702         } else {
1703                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1704                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1705                 if (vap->va_type == VNON)
1706                         vap->va_type = VREG;
1707                 vap->va_mode = nfstov_mode(sp->sa_mode);
1708                 switch (vap->va_type) {
1709                 case VREG:
1710                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1711                         if (tsize != -1)
1712                                 vap->va_size = (u_quad_t)tsize;
1713                         break;
1714                 case VCHR:
1715                 case VBLK:
1716                 case VFIFO:
1717                         rdev = fxdr_unsigned(long, sp->sa_size);
1718                         break;
1719                 default:
1720                         break;
1721                 };
1722         }
1723
1724         /*
1725          * Iff doesn't exist, create it
1726          * otherwise just truncate to 0 length
1727          *   should I set the mode too ?
1728          *
1729          * The only possible error we can have at this point is EEXIST. 
1730          * nd.ni_vp will also be non-NULL in that case.
1731          */
1732         if (vp == NULL) {
1733                 if (vap->va_mode == (mode_t)VNOVAL)
1734                         vap->va_mode = 0;
1735                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1736                         vn_unlock(dvp);
1737                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1738                                             nd.nl_cred, vap);
1739                         vrele(dvp);
1740                         dvp = NULL;
1741                         if (error == 0) {
1742                                 if (exclusive_flag) {
1743                                         exclusive_flag = 0;
1744                                         VATTR_NULL(vap);
1745                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1746                                                 NFSX_V3CREATEVERF);
1747                                         error = VOP_SETATTR(vp, vap, cred);
1748                                 }
1749                         }
1750                 } else if (
1751                         vap->va_type == VCHR || 
1752                         vap->va_type == VBLK ||
1753                         vap->va_type == VFIFO
1754                 ) {
1755                         /*
1756                          * Handle SysV FIFO node special cases.  All other
1757                          * devices require super user to access.
1758                          */
1759                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1760                                 vap->va_type = VFIFO;
1761                         if (vap->va_type != VFIFO &&
1762                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1763                                 goto nfsmreply0;
1764                         }
1765                         vap->va_rmajor = umajor(rdev);
1766                         vap->va_rminor = uminor(rdev);
1767
1768                         vn_unlock(dvp);
1769                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1770                         vrele(dvp);
1771                         dvp = NULL;
1772                         if (error)
1773                                 goto nfsmreply0;
1774 #if 0
1775                         /*
1776                          * XXX what is this junk supposed to do ?
1777                          */
1778
1779                         vput(vp);
1780                         vp = NULL;
1781
1782                         /*
1783                          * release dvp prior to lookup
1784                          */
1785                         vput(dvp);
1786                         dvp = NULL;
1787
1788                         /*
1789                          * Setup for lookup. 
1790                          *
1791                          * Even though LOCKPARENT was cleared, ni_dvp may
1792                          * be garbage. 
1793                          */
1794                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1795                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1796                         nd.ni_cnd.cn_td = td;
1797                         nd.ni_cnd.cn_cred = cred;
1798
1799                         error = lookup(&nd);
1800                         nd.ni_dvp = NULL;
1801
1802                         if (error != 0) {
1803                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1804                                                       0, &error));
1805                                 /* fall through on certain errors */
1806                         }
1807                         nfsrv_object_create(nd.ni_vp);
1808                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1809                                 error = EINVAL;
1810                                 goto nfsmreply0;
1811                         }
1812 #endif
1813                 } else {
1814                         error = ENXIO;
1815                 }
1816         } else {
1817                 if (vap->va_size != -1) {
1818                         error = nfsrv_access(mp, vp, VWRITE, cred,
1819                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1820                         if (!error) {
1821                                 tempsize = vap->va_size;
1822                                 VATTR_NULL(vap);
1823                                 vap->va_size = tempsize;
1824                                 error = VOP_SETATTR(vp, vap, cred);
1825                         }
1826                 }
1827         }
1828
1829         if (!error) {
1830                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1831                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1832                 if (!error)
1833                         error = VOP_GETATTR(vp, vap);
1834         }
1835         if (info.v3) {
1836                 if (exclusive_flag && !error &&
1837                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1838                         error = EEXIST;
1839                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1840                 vrele(dirp);
1841                 dirp = NULL;
1842         }
1843         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1844                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1845                               NFSX_WCCDATA(info.v3),
1846                               &error));
1847         if (info.v3) {
1848                 if (!error) {
1849                         nfsm_srvpostop_fh(&info, fhp);
1850                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1851                 }
1852                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1853                                  diraft_ret, &diraft);
1854                 error = 0;
1855         } else {
1856                 nfsm_srvfhtom(&info, fhp);
1857                 fp = nfsm_build(&info, NFSX_V2FATTR);
1858                 nfsm_srvfattr(nfsd, vap, fp);
1859         }
1860         goto nfsmout;
1861
1862 nfsmreply0:
1863         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1864         error = 0;
1865         /* fall through */
1866
1867 nfsmout:
1868         *mrq = info.mreq;
1869         if (dirp)
1870                 vrele(dirp);
1871         nlookup_done(&nd);
1872         if (dvp) {
1873                 if (dvp == vp)
1874                         vrele(dvp);
1875                 else
1876                         vput(dvp);
1877         }
1878         if (vp)
1879                 vput(vp);
1880         return (error);
1881 }
1882
1883 /*
1884  * nfs v3 mknod service
1885  */
1886 int
1887 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1888             struct thread *td, struct mbuf **mrq)
1889 {
1890         struct sockaddr *nam = nfsd->nd_nam;
1891         struct ucred *cred = &nfsd->nd_cr;
1892         struct vattr va, dirfor, diraft;
1893         struct vattr *vap = &va;
1894         u_int32_t *tl;
1895         struct nlookupdata nd;
1896         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1897         enum vtype vtyp;
1898         struct vnode *dirp;
1899         struct vnode *dvp;
1900         struct vnode *vp;
1901         nfsfh_t nfh;
1902         fhandle_t *fhp;
1903         struct nfsm_info info;
1904
1905         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1906         nlookup_zero(&nd);
1907         dirp = NULL;
1908         dvp = NULL;
1909         vp = NULL;
1910
1911         info.mrep = nfsd->nd_mrep;
1912         info.mreq = NULL;
1913         info.md = nfsd->nd_md;
1914         info.dpos = nfsd->nd_dpos;
1915
1916         fhp = &nfh.fh_generic;
1917         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1918         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1919
1920         /*
1921          * Handle nfs_namei() call.  If an error occurs, the nd structure
1922          * is not valid.  However, nfsm_*() routines may still jump to
1923          * nfsmout.
1924          */
1925
1926         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1927                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1928                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1929         if (dirp)
1930                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1931         if (error) {
1932                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1933                            NFSX_WCCDATA(1), &error));
1934                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1935                                  diraft_ret, &diraft);
1936                 error = 0;
1937                 goto nfsmout;
1938         }
1939         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1940         vtyp = nfsv3tov_type(*tl);
1941         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1942                 error = NFSERR_BADTYPE;
1943                 goto out;
1944         }
1945         VATTR_NULL(vap);
1946         ERROROUT(nfsm_srvsattr(&info, vap));
1947         if (vtyp == VCHR || vtyp == VBLK) {
1948                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1949                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1950                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1951         }
1952
1953         /*
1954          * Iff doesn't exist, create it.
1955          */
1956         if (vp) {
1957                 error = EEXIST;
1958                 goto out;
1959         }
1960         vap->va_type = vtyp;
1961         if (vap->va_mode == (mode_t)VNOVAL)
1962                 vap->va_mode = 0;
1963         if (vtyp == VSOCK) {
1964                 vn_unlock(dvp);
1965                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1966                 vrele(dvp);
1967                 dvp = NULL;
1968         } else {
1969                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1970                         goto out;
1971
1972                 vn_unlock(dvp);
1973                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1974                 vrele(dvp);
1975                 dvp = NULL;
1976                 if (error)
1977                         goto out;
1978         }
1979
1980         /*
1981          * send response, cleanup, return.
1982          */
1983 out:
1984         nlookup_done(&nd);
1985         if (dvp) {
1986                 if (dvp == vp)
1987                         vrele(dvp);
1988                 else
1989                         vput(dvp);
1990                 dvp = NULL;
1991         }
1992         if (!error) {
1993                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1994                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1995                 if (!error)
1996                         error = VOP_GETATTR(vp, vap);
1997         }
1998         if (vp) {
1999                 vput(vp);
2000                 vp = NULL;
2001         }
2002         diraft_ret = VOP_GETATTR(dirp, &diraft);
2003         if (dirp) {
2004                 vrele(dirp);
2005                 dirp = NULL;
2006         }
2007         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2008                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
2009                               NFSX_WCCDATA(1), &error));
2010         if (!error) {
2011                 nfsm_srvpostop_fh(&info, fhp);
2012                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2013         }
2014         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2015                          diraft_ret, &diraft);
2016         *mrq = info.mreq;
2017         return (0);
2018 nfsmout:
2019         *mrq = info.mreq;
2020         if (dirp)
2021                 vrele(dirp);
2022         nlookup_done(&nd);
2023         if (dvp) {
2024                 if (dvp == vp)
2025                         vrele(dvp);
2026                 else
2027                         vput(dvp);
2028         }
2029         if (vp)
2030                 vput(vp);
2031         return (error);
2032 }
2033
2034 /*
2035  * nfs remove service
2036  */
2037 int
2038 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2039              struct thread *td, struct mbuf **mrq)
2040 {
2041         struct sockaddr *nam = nfsd->nd_nam;
2042         struct ucred *cred = &nfsd->nd_cr;
2043         struct nlookupdata nd;
2044         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2045         struct vnode *dirp;
2046         struct vnode *dvp;
2047         struct vnode *vp;
2048         struct vattr dirfor, diraft;
2049         nfsfh_t nfh;
2050         fhandle_t *fhp;
2051         struct nfsm_info info;
2052
2053         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2054         nlookup_zero(&nd);
2055         dirp = NULL;
2056         dvp = NULL;
2057         vp = NULL;
2058
2059         info.mrep = nfsd->nd_mrep;
2060         info.mreq = NULL;
2061         info.md = nfsd->nd_md;
2062         info.dpos = nfsd->nd_dpos;
2063         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2064
2065         fhp = &nfh.fh_generic;
2066         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2067         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2068
2069         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2070                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2071                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2072         if (dirp) {
2073                 if (info.v3)
2074                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2075         }
2076         if (error == 0) {
2077                 if (vp->v_type == VDIR) {
2078                         error = EPERM;          /* POSIX */
2079                         goto out;
2080                 }
2081                 /*
2082                  * The root of a mounted filesystem cannot be deleted.
2083                  */
2084                 if (vp->v_flag & VROOT) {
2085                         error = EBUSY;
2086                         goto out;
2087                 }
2088 out:
2089                 if (!error) {
2090                         if (dvp != vp)
2091                                 vn_unlock(dvp);
2092                         if (vp) {
2093                                 vput(vp);
2094                                 vp = NULL;
2095                         }
2096                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2097                         vrele(dvp);
2098                         dvp = NULL;
2099                 }
2100         }
2101         if (dirp && info.v3)
2102                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2103         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2104         if (info.v3) {
2105                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2106                                  diraft_ret, &diraft);
2107                 error = 0;
2108         }
2109 nfsmout:
2110         *mrq = info.mreq;
2111         nlookup_done(&nd);
2112         if (dirp)
2113                 vrele(dirp);
2114         if (dvp) {
2115                 if (dvp == vp)
2116                         vrele(dvp);
2117                 else
2118                         vput(dvp);
2119         }
2120         if (vp)
2121                 vput(vp);
2122         return(error);
2123 }
2124
2125 /*
2126  * nfs rename service
2127  */
2128 int
2129 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2130              struct thread *td, struct mbuf **mrq)
2131 {
2132         struct sockaddr *nam = nfsd->nd_nam;
2133         struct ucred *cred = &nfsd->nd_cr;
2134         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2135         int tdirfor_ret = 1, tdiraft_ret = 1;
2136         struct nlookupdata fromnd, tond;
2137         struct vnode *fvp, *fdirp, *fdvp;
2138         struct vnode *tvp, *tdirp, *tdvp;
2139         struct namecache *ncp;
2140         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2141         nfsfh_t fnfh, tnfh;
2142         fhandle_t *ffhp, *tfhp;
2143         uid_t saved_uid;
2144         struct nfsm_info info;
2145
2146         info.mrep = nfsd->nd_mrep;
2147         info.mreq = NULL;
2148         info.md = nfsd->nd_md;
2149         info.dpos = nfsd->nd_dpos;
2150         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2151
2152         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2153 #ifndef nolint
2154         fvp = NULL;
2155 #endif
2156         ffhp = &fnfh.fh_generic;
2157         tfhp = &tnfh.fh_generic;
2158
2159         /*
2160          * Clear fields incase goto nfsmout occurs from macro.
2161          */
2162
2163         nlookup_zero(&fromnd);
2164         nlookup_zero(&tond);
2165         fdirp = NULL;
2166         tdirp = NULL;
2167
2168         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2169         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2170
2171         /*
2172          * Remember our original uid so that we can reset cr_uid before
2173          * the second nfs_namei() call, in case it is remapped.
2174          */
2175         saved_uid = cred->cr_uid;
2176         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2177                           NULL, NULL,
2178                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2179                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2180         if (fdirp) {
2181                 if (info.v3)
2182                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2183         }
2184         if (error) {
2185                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2186                                       2 * NFSX_WCCDATA(info.v3), &error));
2187                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2188                                  fdiraft_ret, &fdiraft);
2189                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2190                                  tdiraft_ret, &tdiraft);
2191                 error = 0;
2192                 goto nfsmout;
2193         }
2194
2195         /*
2196          * We have to unlock the from ncp before we can safely lookup
2197          * the target ncp.
2198          */
2199         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2200         cache_unlock(&fromnd.nl_nch);
2201         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2202         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2203         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2204         cred->cr_uid = saved_uid;
2205
2206         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2207                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2208                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2209         if (tdirp) {
2210                 if (info.v3)
2211                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2212         }
2213         if (error)
2214                 goto out1;
2215
2216         /*
2217          * relock the source
2218          */
2219         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2220                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2221         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2222                 cache_lock(&fromnd.nl_nch);
2223                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2224         } else {
2225                 cache_unlock(&tond.nl_nch);
2226                 cache_lock(&fromnd.nl_nch);
2227                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2228                 cache_lock(&tond.nl_nch);
2229                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2230         }
2231         fromnd.nl_flags |= NLC_NCPISLOCKED;
2232
2233         fvp = fromnd.nl_nch.ncp->nc_vp;
2234         tvp = tond.nl_nch.ncp->nc_vp;
2235
2236         /*
2237          * Set fdvp and tdvp.  We haven't done all the topology checks
2238          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2239          * point).  If we get through the checks these will be guarenteed
2240          * to be non-NULL.
2241          *
2242          * Holding the children ncp's should be sufficient to prevent
2243          * fdvp and tdvp ripouts.
2244          */
2245         if (fromnd.nl_nch.ncp->nc_parent)
2246                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2247         else
2248                 fdvp = NULL;
2249         if (tond.nl_nch.ncp->nc_parent)
2250                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2251         else
2252                 tdvp = NULL;
2253
2254         if (tvp != NULL) {
2255                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2256                         if (info.v3)
2257                                 error = EEXIST;
2258                         else
2259                                 error = EISDIR;
2260                         goto out;
2261                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2262                         if (info.v3)
2263                                 error = EEXIST;
2264                         else
2265                                 error = ENOTDIR;
2266                         goto out;
2267                 }
2268                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2269                         if (info.v3)
2270                                 error = EXDEV;
2271                         else
2272                                 error = ENOTEMPTY;
2273                         goto out;
2274                 }
2275         }
2276         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2277                 if (info.v3)
2278                         error = EXDEV;
2279                 else
2280                         error = ENOTEMPTY;
2281                 goto out;
2282         }
2283         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2284                 if (info.v3)
2285                         error = EXDEV;
2286                 else
2287                         error = ENOTEMPTY;
2288                 goto out;
2289         }
2290         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2291                 if (info.v3)
2292                         error = EINVAL;
2293                 else
2294                         error = ENOTEMPTY;
2295         }
2296
2297         /*
2298          * You cannot rename a source into itself or a subdirectory of itself.
2299          * We check this by travsering the target directory upwards looking
2300          * for a match against the source.
2301          */
2302         if (error == 0) {
2303                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2304                         if (fromnd.nl_nch.ncp == ncp) {
2305                                 error = EINVAL;
2306                                 break;
2307                         }
2308                 }
2309         }
2310
2311         /*
2312          * If source is the same as the destination (that is the
2313          * same vnode with the same name in the same directory),
2314          * then there is nothing to do.
2315          */
2316         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2317                 error = -1;
2318 out:
2319         if (!error) {
2320                 /*
2321                  * The VOP_NRENAME function releases all vnode references &
2322                  * locks prior to returning so we need to clear the pointers
2323                  * to bypass cleanup code later on.
2324                  */
2325                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2326                                     fdvp, tdvp, tond.nl_cred);
2327         } else {
2328                 if (error == -1)
2329                         error = 0;
2330         }
2331         /* fall through */
2332
2333 out1:
2334         if (fdirp)
2335                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2336         if (tdirp)
2337                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2338         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2339                               2 * NFSX_WCCDATA(info.v3), &error));
2340         if (info.v3) {
2341                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2342                                  fdiraft_ret, &fdiraft);
2343                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2344                                  tdiraft_ret, &tdiraft);
2345         }
2346         error = 0;
2347         /* fall through */
2348
2349 nfsmout:
2350         *mrq = info.mreq;
2351         if (tdirp)
2352                 vrele(tdirp);
2353         nlookup_done(&tond);
2354         if (fdirp)
2355                 vrele(fdirp);
2356         nlookup_done(&fromnd);
2357         return (error);
2358 }
2359
2360 /*
2361  * nfs link service
2362  */
2363 int
2364 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2365            struct thread *td, struct mbuf **mrq)
2366 {
2367         struct sockaddr *nam = nfsd->nd_nam;
2368         struct ucred *cred = &nfsd->nd_cr;
2369         struct nlookupdata nd;
2370         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2371         int getret = 1;
2372         struct vnode *dirp;
2373         struct vnode *dvp;
2374         struct vnode *vp;
2375         struct vnode *xp;
2376         struct mount *xmp;
2377         struct vattr dirfor, diraft, at;
2378         nfsfh_t nfh, dnfh;
2379         fhandle_t *fhp, *dfhp;
2380         struct nfsm_info info;
2381
2382         info.mrep = nfsd->nd_mrep;
2383         info.mreq = NULL;
2384         info.md = nfsd->nd_md;
2385         info.dpos = nfsd->nd_dpos;
2386         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2387
2388         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2389         nlookup_zero(&nd);
2390         dirp = dvp = vp = xp = NULL;
2391         xmp = NULL;
2392
2393         fhp = &nfh.fh_generic;
2394         dfhp = &dnfh.fh_generic;
2395         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2396         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2397         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2398
2399         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2400                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2401         if (error) {
2402                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2403                                       NFSX_POSTOPATTR(info.v3) +
2404                                       NFSX_WCCDATA(info.v3),
2405                                       &error));
2406                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2407                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2408                                  diraft_ret, &diraft);
2409                 xp = NULL;
2410                 error = 0;
2411                 goto nfsmout;
2412         }
2413         if (xp->v_type == VDIR) {
2414                 error = EPERM;          /* POSIX */
2415                 goto out1;
2416         }
2417
2418         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2419                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2420                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2421         if (dirp) {
2422                 if (info.v3)
2423                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2424         }
2425         if (error)
2426                 goto out1;
2427
2428         if (vp != NULL) {
2429                 error = EEXIST;
2430                 goto out;
2431         }
2432         if (xp->v_mount != dvp->v_mount)
2433                 error = EXDEV;
2434 out:
2435         if (!error) {
2436                 vn_unlock(dvp);
2437                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2438                 vrele(dvp);
2439                 dvp = NULL;
2440         }
2441         /* fall through */
2442
2443 out1:
2444         if (info.v3)
2445                 getret = VOP_GETATTR(xp, &at);
2446         if (dirp)
2447                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2448         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2449                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2450                               &error));
2451         if (info.v3) {
2452                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2453                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2454                                  diraft_ret, &diraft);
2455                 error = 0;
2456         }
2457         /* fall through */
2458
2459 nfsmout:
2460         *mrq = info.mreq;
2461         nlookup_done(&nd);
2462         if (dirp)
2463                 vrele(dirp);
2464         if (xp)
2465                 vrele(xp);
2466         if (dvp) {
2467                 if (dvp == vp)
2468                         vrele(dvp);
2469                 else
2470                         vput(dvp);
2471         }
2472         if (vp)
2473                 vput(vp);
2474         return(error);
2475 }
2476
2477 /*
2478  * nfs symbolic link service
2479  */
2480 int
2481 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2482               struct thread *td, struct mbuf **mrq)
2483 {
2484         struct sockaddr *nam = nfsd->nd_nam;
2485         struct ucred *cred = &nfsd->nd_cr;
2486         struct vattr va, dirfor, diraft;
2487         struct nlookupdata nd;
2488         struct vattr *vap = &va;
2489         struct nfsv2_sattr *sp;
2490         char *pathcp = NULL;
2491         struct uio io;
2492         struct iovec iv;
2493         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2494         struct vnode *dirp;
2495         struct vnode *vp;
2496         struct vnode *dvp;
2497         nfsfh_t nfh;
2498         fhandle_t *fhp;
2499         struct nfsm_info info;
2500
2501         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2502         nlookup_zero(&nd);
2503         dirp = NULL;
2504         dvp = NULL;
2505         vp = NULL;
2506
2507         info.mrep = nfsd->nd_mrep;
2508         info.mreq =  NULL;
2509         info.md = nfsd->nd_md;
2510         info.dpos = nfsd->nd_dpos;
2511         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2512
2513         fhp = &nfh.fh_generic;
2514         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2515         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2516
2517         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2518                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2519                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2520         if (dirp) {
2521                 if (info.v3)
2522                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2523         }
2524         if (error)
2525                 goto out;
2526
2527         VATTR_NULL(vap);
2528         if (info.v3) {
2529                 ERROROUT(nfsm_srvsattr(&info, vap));
2530         }
2531         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2532         pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2533         iv.iov_base = pathcp;
2534         iv.iov_len = len2;
2535         io.uio_resid = len2;
2536         io.uio_offset = 0;
2537         io.uio_iov = &iv;
2538         io.uio_iovcnt = 1;
2539         io.uio_segflg = UIO_SYSSPACE;
2540         io.uio_rw = UIO_READ;
2541         io.uio_td = NULL;
2542         ERROROUT(nfsm_mtouio(&info, &io, len2));
2543         if (info.v3 == 0) {
2544                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2545                 vap->va_mode = nfstov_mode(sp->sa_mode);
2546         }
2547         *(pathcp + len2) = '\0';
2548         if (vp) {
2549                 error = EEXIST;
2550                 goto out;
2551         }
2552
2553         if (vap->va_mode == (mode_t)VNOVAL)
2554                 vap->va_mode = 0;
2555         if (dvp != vp)
2556                 vn_unlock(dvp);
2557         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2558         vrele(dvp);
2559         dvp = NULL;
2560         if (error == 0) {
2561                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2562                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2563                 if (!error)
2564                         error = VOP_GETATTR(vp, vap);
2565         }
2566
2567 out:
2568         if (dvp) {
2569                 if (dvp == vp)
2570                         vrele(dvp);
2571                 else
2572                         vput(dvp);
2573         }
2574         if (vp) {
2575                 vput(vp);
2576                 vp = NULL;
2577         }
2578         if (pathcp) {
2579                 kfree(pathcp, M_TEMP);
2580                 pathcp = NULL;
2581         }
2582         if (dirp) {
2583                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2584                 vrele(dirp);
2585                 dirp = NULL;
2586         }
2587         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2588                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2589                               NFSX_WCCDATA(info.v3),
2590                               &error));
2591         if (info.v3) {
2592                 if (!error) {
2593                         nfsm_srvpostop_fh(&info, fhp);
2594                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2595                 }
2596                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2597                                  diraft_ret, &diraft);
2598         }
2599         error = 0;
2600         /* fall through */
2601
2602 nfsmout:
2603         *mrq = info.mreq;
2604         nlookup_done(&nd);
2605         if (vp)
2606                 vput(vp);
2607         if (dirp)
2608                 vrele(dirp);
2609         if (pathcp)
2610                 kfree(pathcp, M_TEMP);
2611         return (error);
2612 }
2613
2614 /*
2615  * nfs mkdir service
2616  */
2617 int
2618 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2619             struct thread *td, struct mbuf **mrq)
2620 {
2621         struct sockaddr *nam = nfsd->nd_nam;
2622         struct ucred *cred = &nfsd->nd_cr;
2623         struct vattr va, dirfor, diraft;
2624         struct vattr *vap = &va;
2625         struct nfs_fattr *fp;
2626         struct nlookupdata nd;
2627         u_int32_t *tl;
2628         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2629         struct vnode *dirp;
2630         struct vnode *dvp;
2631         struct vnode *vp;
2632         nfsfh_t nfh;
2633         fhandle_t *fhp;
2634         struct nfsm_info info;
2635
2636         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2637         nlookup_zero(&nd);
2638         dirp = NULL;
2639         dvp = NULL;
2640         vp = NULL;
2641
2642         info.dpos = nfsd->nd_dpos;
2643         info.mrep = nfsd->nd_mrep;
2644         info.mreq =  NULL;
2645         info.md = nfsd->nd_md;
2646         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2647
2648         fhp = &nfh.fh_generic;
2649         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2650         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2651
2652         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2653                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2654                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2655         if (dirp) {
2656                 if (info.v3)
2657                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2658         }
2659         if (error) {
2660                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2661                                       NFSX_WCCDATA(info.v3), &error));
2662                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2663                                  diraft_ret, &diraft);
2664                 error = 0;
2665                 goto nfsmout;
2666         }
2667         VATTR_NULL(vap);
2668         if (info.v3) {
2669                 ERROROUT(nfsm_srvsattr(&info, vap));
2670         } else {
2671                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2672                 vap->va_mode = nfstov_mode(*tl++);
2673         }
2674
2675         /*
2676          * At this point nd.ni_dvp is referenced and exclusively locked and
2677          * nd.ni_vp, if it exists, is referenced but not locked.
2678          */
2679
2680         vap->va_type = VDIR;
2681         if (vp != NULL) {
2682                 error = EEXIST;
2683                 goto out;
2684         }
2685
2686         /*
2687          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2688          * component is freed by the VOP call.  This will fill-in
2689          * nd.ni_vp, reference, and exclusively lock it.
2690          */
2691         if (vap->va_mode == (mode_t)VNOVAL)
2692                 vap->va_mode = 0;
2693         vn_unlock(dvp);
2694         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2695         vrele(dvp);
2696         dvp = NULL;
2697
2698         if (error == 0) {
2699                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2700                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2701                 if (error == 0)
2702                         error = VOP_GETATTR(vp, vap);
2703         }
2704 out:
2705         if (dirp)
2706                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2707         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2708                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2709                               NFSX_WCCDATA(info.v3),
2710                               &error));
2711         if (info.v3) {
2712                 if (!error) {
2713                         nfsm_srvpostop_fh(&info, fhp);
2714                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2715                 }
2716                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2717                                  diraft_ret, &diraft);
2718         } else {
2719                 nfsm_srvfhtom(&info, fhp);
2720                 fp = nfsm_build(&info, NFSX_V2FATTR);
2721                 nfsm_srvfattr(nfsd, vap, fp);
2722         }
2723         error = 0;
2724         /* fall through */
2725
2726 nfsmout:
2727         *mrq = info.mreq;
2728         nlookup_done(&nd);
2729         if (dirp)
2730                 vrele(dirp);
2731         if (dvp) {
2732                 if (dvp == vp)
2733                         vrele(dvp);
2734                 else
2735                         vput(dvp);
2736         }
2737         if (vp)
2738                 vput(vp);
2739         return (error);
2740 }
2741
2742 /*
2743  * nfs rmdir service
2744  */
2745 int
2746 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2747             struct thread *td, struct mbuf **mrq)
2748 {
2749         struct sockaddr *nam = nfsd->nd_nam;
2750         struct ucred *cred = &nfsd->nd_cr;
2751         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2752         struct vnode *dirp;
2753         struct vnode *dvp;
2754         struct vnode *vp;
2755         struct vattr dirfor, diraft;
2756         nfsfh_t nfh;
2757         fhandle_t *fhp;
2758         struct nlookupdata nd;
2759         struct nfsm_info info;
2760
2761         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2762         nlookup_zero(&nd);
2763         dirp = NULL;
2764         dvp = NULL;
2765         vp = NULL;
2766
2767         info.mrep = nfsd->nd_mrep;
2768         info.mreq = NULL;
2769         info.md = nfsd->nd_md;
2770         info.dpos = nfsd->nd_dpos;
2771         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2772
2773         fhp = &nfh.fh_generic;
2774         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2775         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2776
2777         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2778                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2779                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2780         if (dirp) {
2781                 if (info.v3)
2782                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2783         }
2784         if (error) {
2785                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2786                                       NFSX_WCCDATA(info.v3), &error));
2787                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2788                                  diraft_ret, &diraft);
2789                 error = 0;
2790                 goto nfsmout;
2791         }
2792         if (vp->v_type != VDIR) {
2793                 error = ENOTDIR;
2794                 goto out;
2795         }
2796
2797         /*
2798          * The root of a mounted filesystem cannot be deleted.
2799          */
2800         if (vp->v_flag & VROOT)
2801                 error = EBUSY;
2802 out:
2803         /*
2804          * Issue or abort op.  Since SAVESTART is not set, path name
2805          * component is freed by the VOP after either.
2806          */
2807         if (!error) {
2808                 if (dvp != vp)
2809                         vn_unlock(dvp);
2810                 vput(vp);
2811                 vp = NULL;
2812                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2813                 vrele(dvp);
2814                 dvp = NULL;
2815         }
2816         nlookup_done(&nd);
2817
2818         if (dirp)
2819                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2820         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2821         if (info.v3) {
2822                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2823                                  diraft_ret, &diraft);
2824                 error = 0;
2825         }
2826         /* fall through */
2827
2828 nfsmout:
2829         *mrq = info.mreq;
2830         if (dvp) {
2831                 if (dvp == vp)
2832                         vrele(dvp);
2833                 else
2834                         vput(dvp);
2835         }
2836         nlookup_done(&nd);
2837         if (dirp)
2838                 vrele(dirp);
2839         if (vp)
2840                 vput(vp);
2841         return(error);
2842 }
2843
2844 /*
2845  * nfs readdir service
2846  * - mallocs what it thinks is enough to read
2847  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2848  * - calls VOP_READDIR()
2849  * - loops around building the reply
2850  *      if the output generated exceeds count break out of loop
2851  *      The nfsm_clget macro is used here so that the reply will be packed
2852  *      tightly in mbuf clusters.
2853  * - it only knows that it has encountered eof when the VOP_READDIR()
2854  *      reads nothing
2855  * - as such one readdir rpc will return eof false although you are there
2856  *      and then the next will return eof
2857  * - it trims out records with d_fileno == 0
2858  *      this doesn't matter for Unix clients, but they might confuse clients
2859  *      for other os'.
2860  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2861  *      than requested, but this may not apply to all filesystems. For
2862  *      example, client NFS does not { although it is never remote mounted
2863  *      anyhow }
2864  *     The alternate call nfsrv_readdirplus() does lookups as well.
2865  * PS: The NFS protocol spec. does not clarify what the "count" byte
2866  *      argument is a count of.. just name strings and file id's or the
2867  *      entire reply rpc or ...
2868  *      I tried just file name and id sizes and it confused the Sun client,
2869  *      so I am using the full rpc size now. The "paranoia.." comment refers
2870  *      to including the status longwords that are not a part of the dir.
2871  *      "entry" structures, but are in the rpc.
2872  */
2873 struct flrep {
2874         nfsuint64       fl_off;
2875         u_int32_t       fl_postopok;
2876         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2877         u_int32_t       fl_fhok;
2878         u_int32_t       fl_fhsize;
2879         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2880 };
2881
2882 int
2883 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2884               struct thread *td, struct mbuf **mrq)
2885 {
2886         struct sockaddr *nam = nfsd->nd_nam;
2887         struct ucred *cred = &nfsd->nd_cr;
2888         char *bp, *be;
2889         struct dirent *dp;
2890         caddr_t cp;
2891         u_int32_t *tl;
2892         struct mbuf *mp1, *mp2;
2893         char *cpos, *cend, *rbuf;
2894         struct vnode *vp = NULL;
2895         struct mount *mp = NULL;
2896         struct vattr at;
2897         nfsfh_t nfh;
2898         fhandle_t *fhp;
2899         struct uio io;
2900         struct iovec iv;
2901         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2902         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2903         u_quad_t off, toff;
2904 #if 0
2905         u_quad_t verf;
2906 #endif
2907         off_t *cookies = NULL, *cookiep;
2908         struct nfsm_info info;
2909
2910         info.mrep = nfsd->nd_mrep;
2911         info.mreq = NULL;
2912         info.md = nfsd->nd_md;
2913         info.dpos = nfsd->nd_dpos;
2914         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2915
2916         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2917         fhp = &nfh.fh_generic;
2918         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2919         if (info.v3) {
2920                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2921                 toff = fxdr_hyper(tl);
2922                 tl += 2;
2923 #if 0
2924                 verf = fxdr_hyper(tl);
2925 #endif
2926                 tl += 2;
2927         } else {
2928                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2929                 toff = fxdr_unsigned(u_quad_t, *tl++);
2930 #if 0
2931                 verf = 0;       /* shut up gcc */
2932 #endif
2933         }
2934         off = toff;
2935         cnt = fxdr_unsigned(int, *tl);
2936         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2937         xfer = NFS_SRVMAXDATA(nfsd);
2938         if ((unsigned)cnt > xfer)
2939                 cnt = xfer;
2940         if ((unsigned)siz > xfer)
2941                 siz = xfer;
2942         fullsiz = siz;
2943         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2944                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2945         if (!error && vp->v_type != VDIR) {
2946                 error = ENOTDIR;
2947                 vput(vp);
2948                 vp = NULL;
2949         }
2950         if (error) {
2951                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2952                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2953                 error = 0;
2954                 goto nfsmout;
2955         }
2956
2957         /*
2958          * Obtain lock on vnode for this section of the code
2959          */
2960
2961         if (info.v3) {
2962                 error = getret = VOP_GETATTR(vp, &at);
2963 #if 0
2964                 /*
2965                  * XXX This check may be too strict for Solaris 2.5 clients.
2966                  */
2967                 if (!error && toff && verf && verf != at.va_filerev)
2968                         error = NFSERR_BAD_COOKIE;
2969 #endif
2970         }
2971         if (!error)
2972                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2973         if (error) {
2974                 vput(vp);
2975                 vp = NULL;
2976                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2977                                       NFSX_POSTOPATTR(info.v3), &error));
2978                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2979                 error = 0;
2980                 goto nfsmout;
2981         }
2982         vn_unlock(vp);
2983
2984         /*
2985          * end section.  Allocate rbuf and continue
2986          */
2987         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2988 again:
2989         iv.iov_base = rbuf;
2990         iv.iov_len = fullsiz;
2991         io.uio_iov = &iv;
2992         io.uio_iovcnt = 1;
2993         io.uio_offset = (off_t)off;
2994         io.uio_resid = fullsiz;
2995         io.uio_segflg = UIO_SYSSPACE;
2996         io.uio_rw = UIO_READ;
2997         io.uio_td = NULL;
2998         eofflag = 0;
2999         if (cookies) {
3000                 kfree((caddr_t)cookies, M_TEMP);
3001                 cookies = NULL;
3002         }
3003         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3004         off = (off_t)io.uio_offset;
3005         if (!cookies && !error)
3006                 error = NFSERR_PERM;
3007         if (info.v3) {
3008                 getret = VOP_GETATTR(vp, &at);
3009                 if (!error)
3010                         error = getret;
3011         }
3012         if (error) {
3013                 vrele(vp);
3014                 vp = NULL;
3015                 kfree((caddr_t)rbuf, M_TEMP);
3016                 if (cookies)
3017                         kfree((caddr_t)cookies, M_TEMP);
3018                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3019                                       NFSX_POSTOPATTR(info.v3), &error));
3020                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3021                 error = 0;
3022                 goto nfsmout;
3023         }
3024         if (io.uio_resid) {
3025                 siz -= io.uio_resid;
3026
3027                 /*
3028                  * If nothing read, return eof
3029                  * rpc reply
3030                  */
3031                 if (siz == 0) {
3032                         vrele(vp);
3033                         vp = NULL;
3034                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3035                                               NFSX_POSTOPATTR(info.v3) +
3036                                               NFSX_COOKIEVERF(info.v3) +
3037                                               2 * NFSX_UNSIGNED,
3038                                               &error));
3039                         if (info.v3) {
3040                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3041                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3042                                 txdr_hyper(at.va_filerev, tl);
3043                                 tl += 2;
3044                         } else
3045                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3046                         *tl++ = nfs_false;
3047                         *tl = nfs_true;
3048                         kfree((caddr_t)rbuf, M_TEMP);
3049                         kfree((caddr_t)cookies, M_TEMP);
3050                         error = 0;
3051                         goto nfsmout;
3052                 }
3053         }
3054
3055         /*
3056          * Check for degenerate cases of nothing useful read.
3057          * If so go try again
3058          */
3059         cpos = rbuf;
3060         cend = rbuf + siz;
3061         dp = (struct dirent *)cpos;
3062         cookiep = cookies;
3063         /*
3064          * For some reason FreeBSD's ufs_readdir() chooses to back the
3065          * directory offset up to a block boundary, so it is necessary to
3066          * skip over the records that preceed the requested offset. This
3067          * requires the assumption that file offset cookies monotonically
3068          * increase.
3069          */
3070         while (cpos < cend && ncookies > 0 &&
3071                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3072                  ((u_quad_t)(*cookiep)) <= toff)) {
3073                 dp = _DIRENT_NEXT(dp);
3074                 cpos = (char *)dp;
3075                 cookiep++;
3076                 ncookies--;
3077         }
3078         if (cpos >= cend || ncookies == 0) {
3079                 toff = off;
3080                 siz = fullsiz;
3081                 goto again;
3082         }
3083
3084         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3085         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3086                               NFSX_POSTOPATTR(info.v3) +
3087                               NFSX_COOKIEVERF(info.v3) + siz,
3088                               &error));
3089         if (info.v3) {
3090                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3091                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3092                 txdr_hyper(at.va_filerev, tl);
3093         }
3094         mp1 = mp2 = info.mb;
3095         bp = info.bpos;
3096         be = bp + M_TRAILINGSPACE(mp1);
3097
3098         /* Loop through the records and build reply */
3099         while (cpos < cend && ncookies > 0) {
3100                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3101                         nlen = dp->d_namlen;
3102                         rem = nfsm_rndup(nlen) - nlen;
3103                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3104                         if (info.v3)
3105                                 len += 2 * NFSX_UNSIGNED;
3106                         if (len > cnt) {
3107                                 eofflag = 0;
3108                                 break;
3109                         }
3110                         /*
3111                          * Build the directory record xdr from
3112                          * the dirent entry.
3113                          */
3114                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3115                         *tl = nfs_true;
3116                         bp += NFSX_UNSIGNED;
3117                         if (info.v3) {
3118                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3119                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3120                                 bp += NFSX_UNSIGNED;
3121                         }
3122                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3123                         *tl = txdr_unsigned(dp->d_ino);
3124                         bp += NFSX_UNSIGNED;
3125                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3126                         *tl = txdr_unsigned(nlen);
3127                         bp += NFSX_UNSIGNED;
3128
3129                         /* And loop around copying the name */
3130                         xfer = nlen;
3131                         cp = dp->d_name;
3132                         while (xfer > 0) {
3133                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3134                                 if ((bp+xfer) > be)
3135                                         tsiz = be-bp;
3136                                 else
3137                                         tsiz = xfer;
3138                                 bcopy(cp, bp, tsiz);
3139                                 bp += tsiz;
3140                                 xfer -= tsiz;
3141                                 if (xfer > 0)
3142                                         cp += tsiz;
3143                         }
3144                         /* And null pad to a int32_t boundary */
3145                         for (i = 0; i < rem; i++)
3146                                 *bp++ = '\0';
3147                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3148
3149                         /* Finish off the record */
3150                         if (info.v3) {
3151                                 *tl = txdr_unsigned(*cookiep >> 32);
3152                                 bp += NFSX_UNSIGNED;
3153                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3154                         }
3155                         *tl = txdr_unsigned(*cookiep);
3156                         bp += NFSX_UNSIGNED;
3157                 }
3158                 dp = _DIRENT_NEXT(dp);
3159                 cpos = (char *)dp;
3160                 cookiep++;
3161                 ncookies--;
3162         }
3163         vrele(vp);
3164         vp = NULL;
3165         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3166         *tl = nfs_false;
3167         bp += NFSX_UNSIGNED;
3168         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3169         if (eofflag)
3170                 *tl = nfs_true;
3171         else
3172                 *tl = nfs_false;
3173         bp += NFSX_UNSIGNED;
3174         if (mp1 != info.mb) {
3175                 if (bp < be)
3176                         mp1->m_len = bp - mtod(mp1, caddr_t);
3177         } else
3178                 mp1->m_len += bp - info.bpos;
3179         kfree((caddr_t)rbuf, M_TEMP);
3180         kfree((caddr_t)cookies, M_TEMP);
3181
3182 nfsmout:
3183         *mrq = info.mreq;
3184         if (vp)
3185                 vrele(vp);
3186         return(error);
3187 }
3188
3189 int
3190 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3191                   struct thread *td, struct mbuf **mrq)
3192 {
3193         struct sockaddr *nam = nfsd->nd_nam;
3194         struct ucred *cred = &nfsd->nd_cr;
3195         char *bp, *be;
3196         struct dirent *dp;
3197         caddr_t cp;
3198         u_int32_t *tl;
3199         struct mbuf *mp1, *mp2;
3200         char *cpos, *cend, *rbuf;
3201         struct vnode *vp = NULL, *nvp;
3202         struct mount *mp = NULL;
3203         struct flrep fl;
3204         nfsfh_t nfh;
3205         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3206         struct uio io;
3207         struct iovec iv;
3208         struct vattr va, at, *vap = &va;
3209         struct nfs_fattr *fp;
3210         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3211         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3212         u_quad_t off, toff;
3213 #if 0
3214         u_quad_t verf;
3215 #endif
3216         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3217         struct nfsm_info info;
3218
3219         info.mrep = nfsd->nd_mrep;
3220         info.mreq = NULL;
3221         info.md = nfsd->nd_md;
3222         info.dpos = nfsd->nd_dpos;
3223         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3224
3225         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3226         fhp = &nfh.fh_generic;
3227         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3228         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3229         toff = fxdr_hyper(tl);
3230         tl += 2;
3231 #if 0
3232         verf = fxdr_hyper(tl);
3233 #endif
3234         tl += 2;
3235         siz = fxdr_unsigned(int, *tl++);
3236         cnt = fxdr_unsigned(int, *tl);
3237         off = toff;
3238         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3239         xfer = NFS_SRVMAXDATA(nfsd);
3240         if ((unsigned)cnt > xfer)
3241                 cnt = xfer;
3242         if ((unsigned)siz > xfer)
3243                 siz = xfer;
3244         fullsiz = siz;
3245         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3246                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3247         if (!error && vp->v_type != VDIR) {
3248                 error = ENOTDIR;
3249                 vput(vp);
3250                 vp = NULL;
3251         }
3252         if (error) {
3253                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3254                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3255                 error = 0;
3256                 goto nfsmout;
3257         }
3258         error = getret = VOP_GETATTR(vp, &at);
3259 #if 0
3260         /*
3261          * XXX This check may be too strict for Solaris 2.5 clients.
3262          */
3263         if (!error && toff && verf && verf != at.va_filerev)
3264                 error = NFSERR_BAD_COOKIE;
3265 #endif
3266         if (!error) {
3267                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3268         }
3269         if (error) {
3270                 vput(vp);
3271                 vp = NULL;
3272                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3273                                       NFSX_V3POSTOPATTR, &error));
3274                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3275                 error = 0;
3276                 goto nfsmout;
3277         }
3278         vn_unlock(vp);
3279         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3280 again:
3281         iv.iov_base = rbuf;
3282         iv.iov_len = fullsiz;
3283         io.uio_iov = &iv;
3284         io.uio_iovcnt = 1;
3285         io.uio_offset = (off_t)off;
3286         io.uio_resid = fullsiz;
3287         io.uio_segflg = UIO_SYSSPACE;
3288         io.uio_rw = UIO_READ;
3289         io.uio_td = NULL;
3290         eofflag = 0;
3291         if (cookies) {
3292                 kfree((caddr_t)cookies, M_TEMP);
3293                 cookies = NULL;
3294         }
3295         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3296         off = (u_quad_t)io.uio_offset;
3297         getret = VOP_GETATTR(vp, &at);
3298         if (!cookies && !error)
3299                 error = NFSERR_PERM;
3300         if (!error)
3301                 error = getret;
3302         if (error) {
3303                 vrele(vp);
3304                 vp = NULL;
3305                 if (cookies)
3306                         kfree((caddr_t)cookies, M_TEMP);
3307                 kfree((caddr_t)rbuf, M_TEMP);
3308                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3309                                       NFSX_V3POSTOPATTR, &error));
3310                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3311                 error = 0;
3312                 goto nfsmout;
3313         }
3314         if (io.uio_resid) {
3315                 siz -= io.uio_resid;
3316
3317                 /*
3318                  * If nothing read, return eof
3319                  * rpc reply
3320                  */
3321                 if (siz == 0) {
3322                         vrele(vp);
3323                         vp = NULL;
3324                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3325                                               NFSX_V3POSTOPATTR +
3326                                               NFSX_V3COOKIEVERF +
3327                                               2 * NFSX_UNSIGNED,
3328                                               &error));
3329                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3330                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3331                         txdr_hyper(at.va_filerev, tl);
3332                         tl += 2;
3333                         *tl++ = nfs_false;
3334                         *tl = nfs_true;
3335                         kfree((caddr_t)cookies, M_TEMP);
3336                         kfree((caddr_t)rbuf, M_TEMP);
3337                         error = 0;
3338                         goto nfsmout;
3339                 }
3340         }
3341
3342         /*
3343          * Check for degenerate cases of nothing useful read.
3344          * If so go try again
3345          */
3346         cpos = rbuf;
3347         cend = rbuf + siz;
3348         dp = (struct dirent *)cpos;
3349         cookiep = cookies;
3350         /*
3351          * For some reason FreeBSD's ufs_readdir() chooses to back the
3352          * directory offset up to a block boundary, so it is necessary to
3353          * skip over the records that preceed the requested offset. This
3354          * requires the assumption that file offset cookies monotonically
3355          * increase.
3356          */
3357         while (cpos < cend && ncookies > 0 &&
3358                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3359                  ((u_quad_t)(*cookiep)) <= toff)) {
3360                 dp = _DIRENT_NEXT(dp);
3361                 cpos = (char *)dp;
3362                 cookiep++;
3363                 ncookies--;
3364         }
3365         if (cpos >= cend || ncookies == 0) {
3366                 toff = off;
3367                 siz = fullsiz;
3368                 goto again;
3369         }
3370
3371         /*
3372          * Probe one of the directory entries to see if the filesystem
3373          * supports VGET.
3374          */
3375         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3376                 error = NFSERR_NOTSUPP;
3377                 vrele(vp);
3378                 vp = NULL;
3379                 kfree((caddr_t)cookies, M_TEMP);
3380                 kfree((caddr_t)rbuf, M_TEMP);
3381                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3382                                       NFSX_V3POSTOPATTR, &error));
3383                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3384                 error = 0;
3385                 goto nfsmout;
3386         }
3387         if (nvp) {
3388                 vput(nvp);
3389                 nvp = NULL;
3390         }
3391             
3392         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3393                         2 * NFSX_UNSIGNED;
3394         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3395         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3396         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3397         txdr_hyper(at.va_filerev, tl);
3398         mp1 = mp2 = info.mb;
3399         bp = info.bpos;
3400         be = bp + M_TRAILINGSPACE(mp1);
3401
3402         /* Loop through the records and build reply */
3403         while (cpos < cend && ncookies > 0) {
3404                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3405                         nlen = dp->d_namlen;
3406                         rem = nfsm_rndup(nlen) - nlen;
3407
3408                         /*
3409                          * For readdir_and_lookup get the vnode using
3410                          * the file number.
3411                          */
3412                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3413                                 goto invalid;
3414                         bzero((caddr_t)nfhp, NFSX_V3FH);
3415                         nfhp->fh_fsid = fhp->fh_fsid;
3416                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3417                                 vput(nvp);
3418                                 nvp = NULL;
3419                                 goto invalid;
3420                         }
3421                         if (VOP_GETATTR(nvp, vap)) {
3422                                 vput(nvp);
3423                                 nvp = NULL;
3424                                 goto invalid;
3425                         }
3426                         vput(nvp);
3427                         nvp = NULL;
3428
3429                         /*
3430                          * If either the dircount or maxcount will be
3431                          * exceeded, get out now. Both of these lengths
3432                          * are calculated conservatively, including all
3433                          * XDR overheads.
3434                          */
3435                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3436                                 NFSX_V3POSTOPATTR);
3437                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3438                         if (len > cnt || dirlen > fullsiz) {
3439                                 eofflag = 0;
3440                                 break;
3441                         }
3442
3443                         /*
3444                          * Build the directory record xdr from
3445                          * the dirent entry.
3446                          */
3447                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3448                         nfsm_srvfattr(nfsd, vap, fp);
3449                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3450                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3451                         fl.fl_postopok = nfs_true;
3452                         fl.fl_fhok = nfs_true;
3453                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3454
3455                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3456                         *tl = nfs_true;
3457                         bp += NFSX_UNSIGNED;
3458                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3459                         *tl = txdr_unsigned(dp->d_ino >> 32);
3460                         bp += NFSX_UNSIGNED;
3461                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3462                         *tl = txdr_unsigned(dp->d_ino);
3463                         bp += NFSX_UNSIGNED;
3464                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3465                         *tl = txdr_unsigned(nlen);
3466                         bp += NFSX_UNSIGNED;
3467
3468                         /* And loop around copying the name */
3469                         xfer = nlen;
3470                         cp = dp->d_name;
3471                         while (xfer > 0) {
3472                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3473                                 if ((bp + xfer) > be)
3474                                         tsiz = be - bp;
3475                                 else
3476                                         tsiz = xfer;
3477                                 bcopy(cp, bp, tsiz);
3478                                 bp += tsiz;
3479                                 xfer -= tsiz;
3480                                 cp += tsiz;
3481                         }
3482                         /* And null pad to a int32_t boundary */
3483                         for (i = 0; i < rem; i++)
3484                                 *bp++ = '\0';
3485         
3486                         /*
3487                          * Now copy the flrep structure out.
3488                          */
3489                         xfer = sizeof (struct flrep);
3490                         cp = (caddr_t)&fl;
3491                         while (xfer > 0) {
3492                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3493                                 if ((bp + xfer) > be)
3494                                         tsiz = be - bp;
3495                                 else
3496                                         tsiz = xfer;
3497                                 bcopy(cp, bp, tsiz);
3498                                 bp += tsiz;
3499                                 xfer -= tsiz;
3500                                 cp += tsiz;
3501                         }
3502                 }
3503 invalid:
3504                 dp = _DIRENT_NEXT(dp);
3505                 cpos = (char *)dp;
3506                 cookiep++;
3507                 ncookies--;
3508         }
3509         vrele(vp);
3510         vp = NULL;
3511         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3512         *tl = nfs_false;
3513         bp += NFSX_UNSIGNED;
3514         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3515         if (eofflag)
3516                 *tl = nfs_true;
3517         else
3518                 *tl = nfs_false;
3519         bp += NFSX_UNSIGNED;
3520         if (mp1 != info.mb) {
3521                 if (bp < be)
3522                         mp1->m_len = bp - mtod(mp1, caddr_t);
3523         } else
3524                 mp1->m_len += bp - info.bpos;
3525         kfree((caddr_t)cookies, M_TEMP);
3526         kfree((caddr_t)rbuf, M_TEMP);
3527 nfsmout:
3528         *mrq = info.mreq;
3529         if (vp)
3530                 vrele(vp);
3531         return(error);
3532 }
3533
3534 /*
3535  * nfs commit service
3536  */
3537 int
3538 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3539              struct thread *td, struct mbuf **mrq)
3540 {
3541         struct sockaddr *nam = nfsd->nd_nam;
3542         struct ucred *cred = &nfsd->nd_cr;
3543         struct vattr bfor, aft;
3544         struct vnode *vp = NULL;
3545         struct mount *mp = NULL;
3546         nfsfh_t nfh;
3547         fhandle_t *fhp;
3548         u_int32_t *tl;
3549         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3550         u_quad_t off;
3551         struct nfsm_info info;
3552
3553         info.mrep = nfsd->nd_mrep;
3554         info.mreq = NULL;
3555         info.md = nfsd->nd_md;
3556         info.dpos = nfsd->nd_dpos;
3557
3558         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3559         fhp = &nfh.fh_generic;
3560         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3561         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3562
3563         /*
3564          * XXX At this time VOP_FSYNC() does not accept offset and byte
3565          * count parameters, so these arguments are useless (someday maybe).
3566          */
3567         off = fxdr_hyper(tl);
3568         tl += 2;
3569         cnt = fxdr_unsigned(int, *tl);
3570         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3571                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3572         if (error) {
3573                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3574                                       2 * NFSX_UNSIGNED, &error));
3575                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3576                                  aft_ret, &aft);
3577                 error = 0;
3578                 goto nfsmout;
3579         }
3580         for_ret = VOP_GETATTR(vp, &bfor);
3581
3582         /*
3583          * RFC 1813 3.3.21: If count is 0, a flush from offset to the end of
3584          * file is done. At this time VOP_FSYNC does not accept offset and
3585          * byte count parameters, so call VOP_FSYNC the whole file for now.
3586          */
3587         if (cnt == 0 || cnt > MAX_COMMIT_COUNT) {
3588                 /*
3589                  * Give up and do the whole thing
3590                  */
3591                 if (vp->v_object &&
3592                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3593                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3594                 }
3595                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3596         } else {
3597                 /*
3598                  * Locate and synchronously write any buffers that fall
3599                  * into the requested range.  Note:  we are assuming that
3600                  * f_iosize is a power of 2.
3601                  */
3602                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3603                 int iomask = iosize - 1;
3604                 off_t loffset;
3605
3606                 /*
3607                  * Align to iosize boundry, super-align to page boundry.
3608                  */
3609                 if (off & iomask) {
3610                         cnt += off & iomask;
3611                         off &= ~(u_quad_t)iomask;
3612                 }
3613                 if (off & PAGE_MASK) {
3614                         cnt += off & PAGE_MASK;
3615                         off &= ~(u_quad_t)PAGE_MASK;
3616                 }
3617                 loffset = off;
3618
3619                 if (vp->v_object &&
3620                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3621                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3622                             (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3623                 }
3624
3625                 crit_enter();
3626                 while (error == 0 || cnt > 0) {
3627                         struct buf *bp;
3628
3629                         /*
3630                          * If we have a buffer and it is marked B_DELWRI we
3631                          * have to lock and write it.  Otherwise the prior
3632                          * write is assumed to have already been committed.
3633                          *
3634                          * WARNING: FINDBLK_TEST buffers represent stable
3635                          *          storage but not necessarily stable
3636                          *          content.  It is ok in this case.
3637                          */
3638                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3639                                 if (bp->b_flags & B_DELWRI)
3640                                         bp = findblk(vp, loffset, 0);
3641                                 else
3642                                         bp = NULL;
3643                         }
3644                         if (bp) {
3645                                 if (bp->b_flags & B_DELWRI) {
3646                                         bremfree(bp);
3647                                         error = bwrite(bp);
3648                                         ++nfs_commit_miss;
3649                                 } else {
3650                                         BUF_UNLOCK(bp);
3651                                 }
3652                         }
3653                         ++nfs_commit_blks;
3654                         if (cnt < iosize)
3655                                 break;
3656                         cnt -= iosize;
3657                         loffset += iosize;
3658                 }
3659                 crit_exit();
3660         }
3661
3662         aft_ret = VOP_GETATTR(vp, &aft);
3663         vput(vp);
3664         vp = NULL;
3665         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3666                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3667                               &error));
3668         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3669                          aft_ret, &aft);
3670         if (!error) {
3671                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3672                 if (nfsver.tv_sec == 0)
3673                         nfsver = boottime;
3674                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3675                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3676         } else {
3677                 error = 0;
3678         }
3679 nfsmout:
3680         *mrq = info.mreq;
3681         if (vp)
3682                 vput(vp);
3683         return(error);
3684 }
3685
3686 /*
3687  * nfs statfs service
3688  */
3689 int
3690 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3691              struct thread *td, struct mbuf **mrq)
3692 {
3693         struct sockaddr *nam = nfsd->nd_nam;
3694         struct ucred *cred = &nfsd->nd_cr;
3695         struct statfs *sf;
3696         struct nfs_statfs *sfp;
3697         int error = 0, rdonly, getret = 1;
3698         struct vnode *vp = NULL;
3699         struct mount *mp = NULL;
3700         struct vattr at;
3701         nfsfh_t nfh;
3702         fhandle_t *fhp;
3703         struct statfs statfs;
3704         u_quad_t tval;
3705         struct nfsm_info info;
3706
3707         info.mrep = nfsd->nd_mrep;
3708         info.mreq = NULL;
3709         info.md = nfsd->nd_md;
3710         info.dpos = nfsd->nd_dpos;
3711         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3712
3713         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3714         fhp = &nfh.fh_generic;
3715         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3716         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3717                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3718         if (error) {
3719                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3720                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3721                 error = 0;
3722                 goto nfsmout;
3723         }
3724         sf = &statfs;
3725         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3726         getret = VOP_GETATTR(vp, &at);
3727         vput(vp);
3728         vp = NULL;
3729         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3730                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3731                               &error));
3732         if (info.v3)
3733                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3734         if (error) {
3735                 error = 0;
3736                 goto nfsmout;
3737         }
3738         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3739         if (info.v3) {
3740                 tval = (u_quad_t)sf->f_blocks;
3741                 tval *= (u_quad_t)sf->f_bsize;
3742                 txdr_hyper(tval, &sfp->sf_tbytes);
3743                 tval = (u_quad_t)sf->f_bfree;
3744                 tval *= (u_quad_t)sf->f_bsize;
3745                 txdr_hyper(tval, &sfp->sf_fbytes);
3746                 tval = (u_quad_t)sf->f_bavail;
3747                 tval *= (u_quad_t)sf->f_bsize;
3748                 txdr_hyper(tval, &sfp->sf_abytes);
3749                 sfp->sf_tfiles.nfsuquad[0] = 0;
3750                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3751                 sfp->sf_ffiles.nfsuquad[0] = 0;
3752                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3753                 sfp->sf_afiles.nfsuquad[0] = 0;
3754                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3755                 sfp->sf_invarsec = 0;
3756         } else {
3757                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3758                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3759                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3760                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3761                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3762         }
3763 nfsmout:
3764         *mrq = info.mreq;
3765         if (vp)
3766                 vput(vp);
3767         return(error);
3768 }
3769
3770 /*
3771  * nfs fsinfo service
3772  */
3773 int
3774 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3775              struct thread *td, struct mbuf **mrq)
3776 {
3777         struct sockaddr *nam = nfsd->nd_nam;
3778         struct ucred *cred = &nfsd->nd_cr;
3779         struct nfsv3_fsinfo *sip;
3780         int error = 0, rdonly, getret = 1, pref;
3781         struct vnode *vp = NULL;
3782         struct mount *mp = NULL;
3783         struct vattr at;
3784         nfsfh_t nfh;
3785         fhandle_t *fhp;
3786         u_quad_t maxfsize;
3787         struct statfs sb;
3788         struct nfsm_info info;
3789
3790         info.mrep = nfsd->nd_mrep;
3791         info.mreq = NULL;
3792         info.md = nfsd->nd_md;
3793         info.dpos = nfsd->nd_dpos;
3794
3795         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3796         fhp = &nfh.fh_generic;
3797         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3798         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3799                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3800         if (error) {
3801                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3802                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3803                 error = 0;
3804                 goto nfsmout;
3805         }
3806
3807         /* XXX Try to make a guess on the max file size. */
3808         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3809         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3810
3811         getret = VOP_GETATTR(vp, &at);
3812         vput(vp);
3813         vp = NULL;
3814         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3815                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3816         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3817         sip = nfsm_build(&info, NFSX_V3FSINFO);
3818
3819         /*
3820          * XXX
3821          * There should be file system VFS OP(s) to get this information.
3822          * For now, assume ufs.
3823          */
3824         if (slp->ns_so->so_type == SOCK_DGRAM)
3825                 pref = NFS_MAXDGRAMDATA;
3826         else
3827                 pref = NFS_MAXDATA;
3828         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3829         sip->fs_rtpref = txdr_unsigned(pref);
3830         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3831         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3832         sip->fs_wtpref = txdr_unsigned(pref);
3833         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3834         sip->fs_dtpref = txdr_unsigned(pref);
3835         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3836         sip->fs_timedelta.nfsv3_sec = 0;
3837         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3838         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3839                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3840                 NFSV3FSINFO_CANSETTIME);
3841 nfsmout:
3842         *mrq = info.mreq;
3843         if (vp)
3844                 vput(vp);
3845         return(error);
3846 }
3847
3848 /*
3849  * nfs pathconf service
3850  */
3851 int
3852 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3853                struct thread *td, struct mbuf **mrq)
3854 {
3855         struct sockaddr *nam = nfsd->nd_nam;
3856         struct ucred *cred = &nfsd->nd_cr;
3857         struct nfsv3_pathconf *pc;
3858         int error = 0, rdonly, getret = 1;
3859         register_t linkmax, namemax, chownres, notrunc;
3860         struct vnode *vp = NULL;
3861         struct mount *mp = NULL;
3862         struct vattr at;
3863         nfsfh_t nfh;
3864         fhandle_t *fhp;
3865         struct nfsm_info info;
3866
3867         info.mrep = nfsd->nd_mrep;
3868         info.mreq = NULL;
3869         info.md = nfsd->nd_md;
3870         info.dpos = nfsd->nd_dpos;
3871
3872         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3873         fhp = &nfh.fh_generic;
3874         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3875         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3876                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3877         if (error) {
3878                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3879                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3880                 error = 0;
3881                 goto nfsmout;
3882         }
3883         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3884         if (!error)
3885                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3886         if (!error)
3887                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3888         if (!error)
3889                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3890         getret = VOP_GETATTR(vp, &at);
3891         vput(vp);
3892         vp = NULL;
3893         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3894                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3895                               &error));
3896         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3897         if (error) {
3898                 error = 0;
3899                 goto nfsmout;
3900         }
3901         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3902
3903         pc->pc_linkmax = txdr_unsigned(linkmax);
3904         pc->pc_namemax = txdr_unsigned(namemax);
3905         pc->pc_notrunc = txdr_unsigned(notrunc);
3906         pc->pc_chownrestricted = txdr_unsigned(chownres);
3907
3908         /*
3909          * These should probably be supported by VOP_PATHCONF(), but
3910          * until msdosfs is exportable (why would you want to?), the
3911          * Unix defaults should be ok.
3912          */
3913         pc->pc_caseinsensitive = nfs_false;
3914         pc->pc_casepreserving = nfs_true;
3915 nfsmout:
3916         *mrq = info.mreq;
3917         if (vp) 
3918                 vput(vp);
3919         return(error);
3920 }
3921
3922 /*
3923  * Null operation, used by clients to ping server
3924  */
3925 /* ARGSUSED */
3926 int
3927 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3928            struct thread *td, struct mbuf **mrq)
3929 {
3930         struct nfsm_info info;
3931         int error = NFSERR_RETVOID;
3932
3933         info.mrep = nfsd->nd_mrep;
3934         info.mreq = NULL;
3935
3936         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3937         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3938 nfsmout:
3939         *mrq = info.mreq;
3940         return (error);
3941 }
3942
3943 /*
3944  * No operation, used for obsolete procedures
3945  */
3946 /* ARGSUSED */
3947 int
3948 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3949            struct thread *td, struct mbuf **mrq)
3950 {
3951         struct nfsm_info info;
3952         int error;
3953
3954         info.mrep = nfsd->nd_mrep;
3955         info.mreq = NULL;
3956
3957         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3958         if (nfsd->nd_repstat)
3959                 error = nfsd->nd_repstat;
3960         else
3961                 error = EPROCUNAVAIL;
3962         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3963         error = 0;
3964 nfsmout:
3965         *mrq = info.mreq;
3966         return (error);
3967 }
3968
3969 /*
3970  * Perform access checking for vnodes obtained from file handles that would
3971  * refer to files already opened by a Unix client. You cannot just use
3972  * vn_writechk() and VOP_ACCESS() for two reasons.
3973  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3974  * 2 - The owner is to be given access irrespective of mode bits for some
3975  *     operations, so that processes that chmod after opening a file don't
3976  *     break. I don't like this because it opens a security hole, but since
3977  *     the nfs server opens a security hole the size of a barn door anyhow,
3978  *     what the heck.
3979  *
3980  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3981  * will return EPERM instead of EACCESS. EPERM is always an error.
3982  */
3983 static int
3984 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3985              int rdonly, struct thread *td, int override)
3986 {
3987         struct vattr vattr;
3988         int error;
3989
3990         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3991         if (flags & VWRITE) {
3992                 /* Just vn_writechk() changed to check rdonly */
3993                 /*
3994                  * Disallow write attempts on read-only file systems;
3995                  * unless the file is a socket or a block or character
3996                  * device resident on the file system.
3997                  */
3998                 if (rdonly || 
3999                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
4000                         switch (vp->v_type) {
4001                         case VREG:
4002                         case VDIR:
4003                         case VLNK:
4004                                 return (EROFS);
4005                         default:
4006                                 break;
4007                         }
4008                 }
4009                 /*
4010                  * If there's shared text associated with
4011                  * the inode, we can't allow writing.
4012                  */
4013                 if (vp->v_flag & VTEXT)
4014                         return (ETXTBSY);
4015         }
4016         error = VOP_GETATTR(vp, &vattr);
4017         if (error)
4018                 return (error);
4019         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
4020         /*
4021          * Allow certain operations for the owner (reads and writes
4022          * on files that are already open).
4023          */
4024         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
4025                 error = 0;
4026         return error;
4027 }
4028 #endif /* NFS_NOSERVER */
4029