kernel: Replace all usage of MALLOC()/FREE() with kmalloc()/kfree().
[dragonfly.git] / sys / vfs / nfs / nfs_serv.c
1 /*
2  * Copyright (c) 1989, 1993
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by the University of
19  *      California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *      @(#)nfs_serv.c  8.8 (Berkeley) 7/31/95
37  * $FreeBSD: src/sys/nfs/nfs_serv.c,v 1.93.2.6 2002/12/29 18:19:53 dillon Exp $
38  */
39
40 /*
41  * nfs version 2 and 3 server calls to vnode ops
42  * - these routines generally have 3 phases
43  *   1 - break down and validate rpc request in mbuf list
44  *   2 - do the vnode ops for the request
45  *       (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
46  *   3 - build the rpc reply in an mbuf list
47  *   nb:
48  *      - do not mix the phases, since the nfsm_?? macros can return failures
49  *        on a bad rpc or similar and do not do any vrele() or vput()'s
50  *
51  *      - the nfsm_reply() macro generates an nfs rpc reply with the nfs
52  *      error number iff error != 0 whereas
53  *      returning an error from the server function implies a fatal error
54  *      such as a badly constructed rpc request that should be dropped without
55  *      a reply.
56  *      For Version 3, nfsm_reply() does not return for the error case, since
57  *      most version 3 rpcs return more than the status for error cases.
58  *
59  * Other notes:
60  *      Warning: always pay careful attention to resource cleanup on return
61  *      and note that nfsm_*() macros can terminate a procedure on certain
62  *      errors.
63  */
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/proc.h>
68 #include <sys/priv.h>
69 #include <sys/nlookup.h>
70 #include <sys/namei.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
73 #include <sys/mount.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/malloc.h>
77 #include <sys/mbuf.h>
78 #include <sys/dirent.h>
79 #include <sys/stat.h>
80 #include <sys/kernel.h>
81 #include <sys/sysctl.h>
82 #include <sys/buf.h>
83
84 #include <vm/vm.h>
85 #include <vm/vm_extern.h>
86 #include <vm/vm_object.h>
87
88 #include <sys/buf2.h>
89
90 #include <sys/thread2.h>
91
92 #include "nfsproto.h"
93 #include "rpcv2.h"
94 #include "nfs.h"
95 #include "xdr_subs.h"
96 #include "nfsm_subs.h"
97
98 #ifdef NFSRV_DEBUG
99 #define nfsdbprintf(info)       kprintf info
100 #else
101 #define nfsdbprintf(info)
102 #endif
103
104 #define MAX_COMMIT_COUNT        (1024 * 1024)
105
106 #define NUM_HEURISTIC           1017
107 #define NHUSE_INIT              64
108 #define NHUSE_INC               16
109 #define NHUSE_MAX               2048
110
111 static struct nfsheur {
112     struct vnode *nh_vp;        /* vp to match (unreferenced pointer) */
113     off_t nh_nextr;             /* next offset for sequential detection */
114     int nh_use;                 /* use count for selection */
115     int nh_seqcount;            /* heuristic */
116 } nfsheur[NUM_HEURISTIC];
117
118 nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK,
119                       NFFIFO, NFNON };
120 #ifndef NFS_NOSERVER 
121 nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
122                       NFCHR, NFNON };
123
124 int nfsrvw_procrastinate = NFS_GATHERDELAY * 1000;
125 int nfsrvw_procrastinate_v3 = 0;
126
127 static struct timespec  nfsver;
128
129 SYSCTL_DECL(_vfs_nfs);
130
131 int nfs_async;
132 SYSCTL_INT(_vfs_nfs, OID_AUTO, async, CTLFLAG_RW, &nfs_async, 0,
133     "Enable unstable and fast writes");
134 static int nfs_commit_blks;
135 static int nfs_commit_miss;
136 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_blks, CTLFLAG_RW, &nfs_commit_blks, 0,
137     "Number of committed blocks");
138 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_miss, CTLFLAG_RW, &nfs_commit_miss, 0,
139     "Number of nfs blocks committed from dirty buffers");
140
141 static int nfsrv_access (struct mount *, struct vnode *, int,
142                         struct ucred *, int, struct thread *, int);
143 static void nfsrvw_coalesce (struct nfsrv_descript *,
144                 struct nfsrv_descript *);
145
146 /*
147  * nfs v3 access service
148  */
149 int
150 nfsrv3_access(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
151               struct thread *td, struct mbuf **mrq)
152 {
153         struct sockaddr *nam = nfsd->nd_nam;
154         struct ucred *cred = &nfsd->nd_cr;
155         struct vnode *vp = NULL;
156         struct mount *mp = NULL;
157         nfsfh_t nfh;
158         fhandle_t *fhp;
159         int error = 0, rdonly, getret;
160         struct vattr vattr, *vap = &vattr;
161         u_long testmode, nfsmode;
162         struct nfsm_info info;
163         u_int32_t *tl;
164
165         info.dpos = nfsd->nd_dpos;
166         info.md = nfsd->nd_md;
167         info.mrep = nfsd->nd_mrep;
168         info.mreq = NULL;
169
170         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
171         fhp = &nfh.fh_generic;
172         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
173         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
174         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
175             (nfsd->nd_flag & ND_KERBAUTH), TRUE);
176         if (error) {
177                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
178                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
179                 error = 0;
180                 goto nfsmout;
181         }
182         nfsmode = fxdr_unsigned(u_int32_t, *tl);
183         if ((nfsmode & NFSV3ACCESS_READ) &&
184                 nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 0))
185                 nfsmode &= ~NFSV3ACCESS_READ;
186         if (vp->v_type == VDIR)
187                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
188                         NFSV3ACCESS_DELETE);
189         else
190                 testmode = (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
191         if ((nfsmode & testmode) &&
192                 nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 0))
193                 nfsmode &= ~testmode;
194         if (vp->v_type == VDIR)
195                 testmode = NFSV3ACCESS_LOOKUP;
196         else
197                 testmode = NFSV3ACCESS_EXECUTE;
198         if ((nfsmode & testmode) &&
199                 nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0))
200                 nfsmode &= ~testmode;
201         getret = VOP_GETATTR(vp, vap);
202         vput(vp);
203         vp = NULL;
204         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
205                               NFSX_POSTOPATTR(1) + NFSX_UNSIGNED, &error));
206         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
207         tl = nfsm_build(&info, NFSX_UNSIGNED);
208         *tl = txdr_unsigned(nfsmode);
209 nfsmout:
210         *mrq = info.mreq;
211         if (vp)
212                 vput(vp);
213         return(error);
214 }
215
216 /*
217  * nfs getattr service
218  */
219 int
220 nfsrv_getattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
221               struct thread *td, struct mbuf **mrq)
222 {
223         struct sockaddr *nam = nfsd->nd_nam;
224         struct ucred *cred = &nfsd->nd_cr;
225         struct nfs_fattr *fp;
226         struct vattr va;
227         struct vattr *vap = &va;
228         struct vnode *vp = NULL;
229         struct mount *mp = NULL;
230         nfsfh_t nfh;
231         fhandle_t *fhp;
232         int error = 0, rdonly;
233         struct nfsm_info info;
234
235         info.mrep = nfsd->nd_mrep;
236         info.md = nfsd->nd_md;
237         info.dpos = nfsd->nd_dpos;
238         info.mreq = NULL;
239
240         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
241         fhp = &nfh.fh_generic;
242         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
243         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
244                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
245         if (error) {
246                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
247                 error = 0;
248                 goto nfsmout;
249         }
250         error = VOP_GETATTR(vp, vap);
251         vput(vp);
252         vp = NULL;
253         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
254                               NFSX_FATTR(nfsd->nd_flag & ND_NFSV3), &error));
255         if (error) {
256                 error = 0;
257                 goto nfsmout;
258         }
259         fp = nfsm_build(&info, NFSX_FATTR(nfsd->nd_flag & ND_NFSV3));
260         nfsm_srvfattr(nfsd, vap, fp);
261         /* fall through */
262
263 nfsmout:
264         *mrq = info.mreq;
265         if (vp)
266                 vput(vp);
267         return(error);
268 }
269
270 /*
271  * nfs setattr service
272  */
273 int
274 nfsrv_setattr(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
275               struct thread *td, struct mbuf **mrq)
276 {
277         struct sockaddr *nam = nfsd->nd_nam;
278         struct ucred *cred = &nfsd->nd_cr;
279         struct vattr va, preat;
280         struct vattr *vap = &va;
281         struct nfsv2_sattr *sp;
282         struct nfs_fattr *fp;
283         struct vnode *vp = NULL;
284         struct mount *mp = NULL;
285         nfsfh_t nfh;
286         fhandle_t *fhp;
287         u_int32_t *tl;
288         int error = 0, rdonly, preat_ret = 1, postat_ret = 1;
289         int gcheck = 0;
290         struct timespec guard;
291         struct nfsm_info info;
292
293         info.mrep = nfsd->nd_mrep;
294         info.mreq = NULL;
295         info.md = nfsd->nd_md;
296         info.dpos = nfsd->nd_dpos;
297         info.v3 = (nfsd->nd_flag & ND_NFSV3);
298
299         guard.tv_sec = 0;       /* fix compiler warning */
300         guard.tv_nsec = 0;
301
302         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
303         fhp = &nfh.fh_generic;
304         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
305         VATTR_NULL(vap);
306         if (info.v3) {
307                 ERROROUT(nfsm_srvsattr(&info, vap));
308                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
309                 gcheck = fxdr_unsigned(int, *tl);
310                 if (gcheck) {
311                         NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
312                         fxdr_nfsv3time(tl, &guard);
313                 }
314         } else {
315                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
316                 /*
317                  * Nah nah nah nah na nah
318                  * There is a bug in the Sun client that puts 0xffff in the mode
319                  * field of sattr when it should put in 0xffffffff. The u_short
320                  * doesn't sign extend.
321                  * --> check the low order 2 bytes for 0xffff
322                  */
323                 if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
324                         vap->va_mode = nfstov_mode(sp->sa_mode);
325                 if (sp->sa_uid != nfs_xdrneg1)
326                         vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
327                 if (sp->sa_gid != nfs_xdrneg1)
328                         vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
329                 if (sp->sa_size != nfs_xdrneg1)
330                         vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_size);
331                 if (sp->sa_atime.nfsv2_sec != nfs_xdrneg1) {
332 #ifdef notyet
333                         fxdr_nfsv2time(&sp->sa_atime, &vap->va_atime);
334 #else
335                         vap->va_atime.tv_sec =
336                                 fxdr_unsigned(int32_t, sp->sa_atime.nfsv2_sec);
337                         vap->va_atime.tv_nsec = 0;
338 #endif
339                 }
340                 if (sp->sa_mtime.nfsv2_sec != nfs_xdrneg1)
341                         fxdr_nfsv2time(&sp->sa_mtime, &vap->va_mtime);
342
343         }
344
345         /*
346          * Now that we have all the fields, lets do it.
347          */
348         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam, &rdonly,
349                 (nfsd->nd_flag & ND_KERBAUTH), TRUE);
350         if (error) {
351                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
352                                       2 * NFSX_UNSIGNED, &error));
353                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
354                                  postat_ret, vap);
355                 error = 0;
356                 goto nfsmout;
357         }
358
359         /*
360          * vp now an active resource, pay careful attention to cleanup
361          */
362
363         if (info.v3) {
364                 error = preat_ret = VOP_GETATTR(vp, &preat);
365                 if (!error && gcheck &&
366                         (preat.va_ctime.tv_sec != guard.tv_sec ||
367                          preat.va_ctime.tv_nsec != guard.tv_nsec))
368                         error = NFSERR_NOT_SYNC;
369                 if (error) {
370                         vput(vp);
371                         vp = NULL;
372                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
373                                               NFSX_WCCDATA(info.v3), &error));
374                         nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
375                                          postat_ret, vap);
376                         error = 0;
377                         goto nfsmout;
378                 }
379         }
380
381         /*
382          * If the size is being changed write acces is required, otherwise
383          * just check for a read only file system.
384          */
385         if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
386                 if (rdonly || (mp->mnt_flag & MNT_RDONLY)) {
387                         error = EROFS;
388                         goto out;
389                 }
390         } else {
391                 if (vp->v_type == VDIR) {
392                         error = EISDIR;
393                         goto out;
394                 } else if ((error = nfsrv_access(mp, vp, VWRITE, cred, rdonly,
395                             td, 0)) != 0){ 
396                         goto out;
397                 }
398         }
399         error = VOP_SETATTR(vp, vap, cred);
400         postat_ret = VOP_GETATTR(vp, vap);
401         if (!error)
402                 error = postat_ret;
403 out:
404         vput(vp);
405         vp = NULL;
406         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
407                    NFSX_WCCORFATTR(info.v3), &error));
408         if (info.v3) {
409                 nfsm_srvwcc_data(&info, nfsd, preat_ret, &preat,
410                                  postat_ret, vap);
411                 error = 0;
412                 goto nfsmout;
413         } else {
414                 fp = nfsm_build(&info, NFSX_V2FATTR);
415                 nfsm_srvfattr(nfsd, vap, fp);
416         }
417         /* fall through */
418
419 nfsmout:
420         *mrq = info.mreq;
421         if (vp)
422                 vput(vp);
423         return(error);
424 }
425
426 /*
427  * nfs lookup rpc
428  */
429 int
430 nfsrv_lookup(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
431              struct thread *td, struct mbuf **mrq)
432 {
433         struct sockaddr *nam = nfsd->nd_nam;
434         struct ucred *cred = &nfsd->nd_cr;
435         struct nfs_fattr *fp;
436         struct nlookupdata nd;
437         struct vnode *vp;
438         struct vnode *dirp;
439         struct nchandle nch;
440         nfsfh_t nfh;
441         fhandle_t *fhp;
442         int error = 0, len, dirattr_ret = 1;
443         int pubflag;
444         struct vattr va, dirattr, *vap = &va;
445         struct nfsm_info info;
446
447         info.mrep = nfsd->nd_mrep;
448         info.mreq = NULL;
449         info.md = nfsd->nd_md;
450         info.dpos = nfsd->nd_dpos;
451         info.v3 = (nfsd->nd_flag & ND_NFSV3);
452
453         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
454         nlookup_zero(&nd);
455         dirp = NULL;
456         vp = NULL;
457
458         fhp = &nfh.fh_generic;
459         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
460         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
461
462         pubflag = nfs_ispublicfh(fhp);
463
464         error = nfs_namei(&nd, cred, 0, NULL, &vp,
465                 fhp, len, slp, nam, &info.md, &info.dpos,
466                 &dirp, td, (nfsd->nd_flag & ND_KERBAUTH), pubflag);
467
468         /*
469          * namei failure, only dirp to cleanup.  Clear out garbarge from
470          * structure in case macros jump to nfsmout.
471          */
472
473         if (error) {
474                 if (dirp) {
475                         if (info.v3)
476                                 dirattr_ret = VOP_GETATTR(dirp, &dirattr);
477                         vrele(dirp);
478                         dirp = NULL;
479                 }
480                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
481                                       NFSX_POSTOPATTR(info.v3), &error));
482                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
483                 error = 0;
484                 goto nfsmout;
485         }
486
487         /*
488          * Locate index file for public filehandle
489          *
490          * error is 0 on entry and 0 on exit from this block.
491          */
492
493         if (pubflag) {
494                 if (vp->v_type == VDIR && nfs_pub.np_index != NULL) {
495                         /*
496                          * Setup call to lookup() to see if we can find
497                          * the index file. Arguably, this doesn't belong
498                          * in a kernel.. Ugh.  If an error occurs, do not
499                          * try to install an index file and then clear the
500                          * error.
501                          *
502                          * When we replace nd with ind and redirect ndp,
503                          * maintenance of ni_startdir and ni_vp shift to
504                          * ind and we have to clean them up in the old nd.
505                          * However, the cnd resource continues to be maintained
506                          * via the original nd.  Confused?  You aren't alone!
507                          */
508                         vn_unlock(vp);
509                         cache_copy(&nd.nl_nch, &nch);
510                         nlookup_done(&nd);
511                         error = nlookup_init_raw(&nd, nfs_pub.np_index,
512                                                 UIO_SYSSPACE, 0, cred, &nch);
513                         cache_drop(&nch);
514                         if (error == 0)
515                                 error = nlookup(&nd);
516
517                         if (error == 0) {
518                                 /*
519                                  * Found an index file. Get rid of
520                                  * the old references.  transfer vp and
521                                  * load up the new vp.  Fortunately we do
522                                  * not have to deal with dvp, that would be
523                                  * a huge mess.
524                                  */
525                                 if (dirp)       
526                                         vrele(dirp);
527                                 dirp = vp;
528                                 vp = NULL;
529                                 error = cache_vget(&nd.nl_nch, nd.nl_cred,
530                                                         LK_EXCLUSIVE, &vp);
531                                 KKASSERT(error == 0);
532                         }
533                         error = 0;
534                 }
535                 /*
536                  * If the public filehandle was used, check that this lookup
537                  * didn't result in a filehandle outside the publicly exported
538                  * filesystem.  We clear the poor vp here to avoid lockups due
539                  * to NFS I/O.
540                  */
541
542                 if (vp->v_mount != nfs_pub.np_mount) {
543                         vput(vp);
544                         vp = NULL;
545                         error = EPERM;
546                 }
547         }
548
549         if (dirp) {
550                 if (info.v3)
551                         dirattr_ret = VOP_GETATTR(dirp, &dirattr);
552                 vrele(dirp);
553                 dirp = NULL;
554         }
555
556         /*
557          * Resources at this point:
558          *      ndp->ni_vp      may not be NULL
559          *
560          */
561
562         if (error) {
563                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
564                                       NFSX_POSTOPATTR(info.v3), &error));
565                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
566                 error = 0;
567                 goto nfsmout;
568         }
569
570         /*
571          * Clear out some resources prior to potentially blocking.  This
572          * is not as critical as ni_dvp resources in other routines, but
573          * it helps.
574          */
575         nlookup_done(&nd);
576
577         /*
578          * Get underlying attribute, then release remaining resources ( for
579          * the same potential blocking reason ) and reply.
580          */
581         bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
582         error = VFS_VPTOFH(vp, &fhp->fh_fid);
583         if (!error)
584                 error = VOP_GETATTR(vp, vap);
585
586         vput(vp);
587         vp = NULL;
588         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
589                               NFSX_SRVFH(info.v3) +
590                               NFSX_POSTOPORFATTR(info.v3) +
591                               NFSX_POSTOPATTR(info.v3),
592                               &error));
593         if (error) {
594                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
595                 error = 0;
596                 goto nfsmout;
597         }
598         nfsm_srvfhtom(&info, fhp);
599         if (info.v3) {
600                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
601                 nfsm_srvpostop_attr(&info, nfsd, dirattr_ret, &dirattr);
602         } else {
603                 fp = nfsm_build(&info, NFSX_V2FATTR);
604                 nfsm_srvfattr(nfsd, vap, fp);
605         }
606
607 nfsmout:
608         *mrq = info.mreq;
609         if (dirp)
610                 vrele(dirp);
611         nlookup_done(&nd);              /* may be called twice */
612         if (vp)
613                 vput(vp);
614         return (error);
615 }
616
617 /*
618  * nfs readlink service
619  */
620 int
621 nfsrv_readlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
622                struct thread *td, struct mbuf **mrq)
623 {
624         struct sockaddr *nam = nfsd->nd_nam;
625         struct ucred *cred = &nfsd->nd_cr;
626         struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
627         struct iovec *ivp = iv;
628         u_int32_t *tl;
629         int error = 0, rdonly, i, tlen, len, getret;
630         struct mbuf *mp1, *mp2, *mp3;
631         struct vnode *vp = NULL;
632         struct mount *mp = NULL;
633         struct vattr attr;
634         nfsfh_t nfh;
635         fhandle_t *fhp;
636         struct uio io, *uiop = &io;
637         struct nfsm_info info;
638
639         info.mrep = nfsd->nd_mrep;
640         info.mreq = NULL;
641         info.md = nfsd->nd_md;
642         info.dpos = nfsd->nd_dpos;
643         info.v3 = (nfsd->nd_flag & ND_NFSV3);
644
645         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
646 #ifndef nolint
647         mp2 = NULL;
648 #endif
649         mp3 = NULL;
650         fhp = &nfh.fh_generic;
651         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
652         len = 0;
653         i = 0;
654         while (len < NFS_MAXPATHLEN) {
655                 mp1 = m_getcl(MB_WAIT, MT_DATA, 0);
656                 mp1->m_len = MCLBYTES;
657                 if (len == 0)
658                         mp3 = mp2 = mp1;
659                 else {
660                         mp2->m_next = mp1;
661                         mp2 = mp1;
662                 }
663                 if ((len + mp1->m_len) > NFS_MAXPATHLEN) {
664                         mp1->m_len = NFS_MAXPATHLEN-len;
665                         len = NFS_MAXPATHLEN;
666                 } else
667                         len += mp1->m_len;
668                 ivp->iov_base = mtod(mp1, caddr_t);
669                 ivp->iov_len = mp1->m_len;
670                 i++;
671                 ivp++;
672         }
673         uiop->uio_iov = iv;
674         uiop->uio_iovcnt = i;
675         uiop->uio_offset = 0;
676         uiop->uio_resid = len;
677         uiop->uio_rw = UIO_READ;
678         uiop->uio_segflg = UIO_SYSSPACE;
679         uiop->uio_td = NULL;
680         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
681                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
682         if (error) {
683                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
684                                       2 * NFSX_UNSIGNED, &error));
685                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
686                 error = 0;
687                 goto nfsmout;
688         }
689         if (vp->v_type != VLNK) {
690                 if (info.v3)
691                         error = EINVAL;
692                 else
693                         error = ENXIO;
694                 goto out;
695         }
696         error = VOP_READLINK(vp, uiop, cred);
697 out:
698         getret = VOP_GETATTR(vp, &attr);
699         vput(vp);
700         vp = NULL;
701         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
702                              NFSX_POSTOPATTR(info.v3) + NFSX_UNSIGNED,
703                              &error));
704         if (info.v3) {
705                 nfsm_srvpostop_attr(&info, nfsd, getret, &attr);
706                 if (error) {
707                         error = 0;
708                         goto nfsmout;
709                 }
710         }
711         if (uiop->uio_resid > 0) {
712                 len -= uiop->uio_resid;
713                 tlen = nfsm_rndup(len);
714                 nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
715         }
716         tl = nfsm_build(&info, NFSX_UNSIGNED);
717         *tl = txdr_unsigned(len);
718         info.mb->m_next = mp3;
719         mp3 = NULL;
720 nfsmout:
721         *mrq = info.mreq;
722         if (mp3)
723                 m_freem(mp3);
724         if (vp)
725                 vput(vp);
726         return(error);
727 }
728
729 /*
730  * nfs read service
731  */
732 int
733 nfsrv_read(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
734            struct thread *td, struct mbuf **mrq)
735 {
736         struct nfsm_info info;
737         struct sockaddr *nam = nfsd->nd_nam;
738         struct ucred *cred = &nfsd->nd_cr;
739         struct iovec *iv;
740         struct iovec *iv2;
741         struct mbuf *m;
742         struct nfs_fattr *fp;
743         u_int32_t *tl;
744         int i;
745         int reqlen;
746         int error = 0, rdonly, cnt, len, left, siz, tlen, getret;
747         struct mbuf *m2;
748         struct vnode *vp = NULL;
749         struct mount *mp = NULL;
750         nfsfh_t nfh;
751         fhandle_t *fhp;
752         struct uio io, *uiop = &io;
753         struct vattr va, *vap = &va;
754         struct nfsheur *nh;
755         off_t off;
756         int ioflag = 0;
757
758         info.mrep = nfsd->nd_mrep;
759         info.mreq = NULL;
760         info.md = nfsd->nd_md;
761         info.dpos = nfsd->nd_dpos;
762         info.v3 = (nfsd->nd_flag & ND_NFSV3);
763
764         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
765         fhp = &nfh.fh_generic;
766         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
767         if (info.v3) {
768                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
769                 off = fxdr_hyper(tl);
770         } else {
771                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
772                 off = (off_t)fxdr_unsigned(u_int32_t, *tl);
773         }
774         NEGREPLYOUT(reqlen = nfsm_srvstrsiz(&info,
775                                             NFS_SRVMAXDATA(nfsd), &error));
776
777         /*
778          * Reference vp.  If an error occurs, vp will be invalid, but we
779          * have to NULL it just in case.  The macros might goto nfsmout
780          * as well.
781          */
782
783         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
784                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
785         if (error) {
786                 vp = NULL;
787                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
788                                       2 * NFSX_UNSIGNED, &error));
789                 nfsm_srvpostop_attr(&info, nfsd, 1, NULL);
790                 error = 0;
791                 goto nfsmout;
792         }
793
794         if (vp->v_type != VREG) {
795                 if (info.v3)
796                         error = EINVAL;
797                 else
798                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
799         }
800         if (!error) {
801             if ((error = nfsrv_access(mp, vp, VREAD, cred, rdonly, td, 1)) != 0)
802                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 1);
803         }
804         getret = VOP_GETATTR(vp, vap);
805         if (!error)
806                 error = getret;
807         if (error) {
808                 vput(vp);
809                 vp = NULL;
810                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
811                                       NFSX_POSTOPATTR(info.v3), &error));
812                 nfsm_srvpostop_attr(&info, nfsd, getret, vap);
813                 error = 0;
814                 goto nfsmout;
815         }
816
817         /*
818          * Calculate byte count to read
819          */
820
821         if (off >= vap->va_size)
822                 cnt = 0;
823         else if ((off + reqlen) > vap->va_size)
824                 cnt = vap->va_size - off;
825         else
826                 cnt = reqlen;
827
828         /*
829          * Calculate seqcount for heuristic
830          */
831
832         {
833                 int hi;
834                 int try = 32;
835
836                 /*
837                  * Locate best candidate
838                  */
839
840                 hi = ((int)(vm_offset_t)vp / sizeof(struct vnode)) % NUM_HEURISTIC;
841                 nh = &nfsheur[hi];
842
843                 while (try--) {
844                         if (nfsheur[hi].nh_vp == vp) {
845                                 nh = &nfsheur[hi];
846                                 break;
847                         }
848                         if (nfsheur[hi].nh_use > 0)
849                                 --nfsheur[hi].nh_use;
850                         hi = (hi + 1) % NUM_HEURISTIC;
851                         if (nfsheur[hi].nh_use < nh->nh_use)
852                                 nh = &nfsheur[hi];
853                 }
854
855                 if (nh->nh_vp != vp) {
856                         nh->nh_vp = vp;
857                         nh->nh_nextr = off;
858                         nh->nh_use = NHUSE_INIT;
859                         if (off == 0)
860                                 nh->nh_seqcount = 4;
861                         else
862                                 nh->nh_seqcount = 1;
863                 }
864
865                 /*
866                  * Calculate heuristic
867                  */
868
869                 if ((off == 0 && nh->nh_seqcount > 0) || off == nh->nh_nextr) {
870                         if (++nh->nh_seqcount > IO_SEQMAX)
871                                 nh->nh_seqcount = IO_SEQMAX;
872                 } else if (nh->nh_seqcount > 1) {
873                         nh->nh_seqcount = 1;
874                 } else {
875                         nh->nh_seqcount = 0;
876                 }
877                 nh->nh_use += NHUSE_INC;
878                 if (nh->nh_use > NHUSE_MAX)
879                         nh->nh_use = NHUSE_MAX;
880                 ioflag |= nh->nh_seqcount << IO_SEQSHIFT;
881         }
882
883         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
884                               NFSX_POSTOPORFATTR(info.v3) +
885                               3 * NFSX_UNSIGNED + nfsm_rndup(cnt),
886                               &error));
887         if (info.v3) {
888                 tl = nfsm_build(&info, NFSX_V3FATTR + 4 * NFSX_UNSIGNED);
889                 *tl++ = nfs_true;
890                 fp = (struct nfs_fattr *)tl;
891                 tl += (NFSX_V3FATTR / sizeof (u_int32_t));
892         } else {
893                 tl = nfsm_build(&info, NFSX_V2FATTR + NFSX_UNSIGNED);
894                 fp = (struct nfs_fattr *)tl;
895                 tl += (NFSX_V2FATTR / sizeof (u_int32_t));
896         }
897         len = left = nfsm_rndup(cnt);
898         if (cnt > 0) {
899                 /*
900                  * Generate the mbuf list with the uio_iov ref. to it.
901                  */
902                 i = 0;
903                 m = m2 = info.mb;
904                 while (left > 0) {
905                         siz = min(M_TRAILINGSPACE(m), left);
906                         if (siz > 0) {
907                                 left -= siz;
908                                 i++;
909                         }
910                         if (left > 0) {
911                                 m = m_getcl(MB_WAIT, MT_DATA, 0);
912                                 m->m_len = 0;
913                                 m2->m_next = m;
914                                 m2 = m;
915                         }
916                 }
917                 iv = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
918                 uiop->uio_iov = iv2 = iv;
919                 m = info.mb;
920                 left = len;
921                 i = 0;
922                 while (left > 0) {
923                         if (m == NULL)
924                                 panic("nfsrv_read iov");
925                         siz = min(M_TRAILINGSPACE(m), left);
926                         if (siz > 0) {
927                                 iv->iov_base = mtod(m, caddr_t) + m->m_len;
928                                 iv->iov_len = siz;
929                                 m->m_len += siz;
930                                 left -= siz;
931                                 iv++;
932                                 i++;
933                         }
934                         m = m->m_next;
935                 }
936                 uiop->uio_iovcnt = i;
937                 uiop->uio_offset = off;
938                 uiop->uio_resid = len;
939                 uiop->uio_rw = UIO_READ;
940                 uiop->uio_segflg = UIO_SYSSPACE;
941                 error = VOP_READ(vp, uiop, IO_NODELOCKED | ioflag, cred);
942                 off = uiop->uio_offset;
943                 nh->nh_nextr = off;
944                 kfree((caddr_t)iv2, M_TEMP);
945                 if (error || (getret = VOP_GETATTR(vp, vap))) {
946                         if (!error)
947                                 error = getret;
948                         m_freem(info.mreq);
949                         info.mreq = NULL;
950                         vput(vp);
951                         vp = NULL;
952                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
953                                               NFSX_POSTOPATTR(info.v3),
954                                               &error));
955                         nfsm_srvpostop_attr(&info, nfsd, getret, vap);
956                         error = 0;
957                         goto nfsmout;
958                 }
959         } else {
960                 uiop->uio_resid = 0;
961         }
962         vput(vp);
963         vp = NULL;
964         nfsm_srvfattr(nfsd, vap, fp);
965         tlen = len - uiop->uio_resid;
966         cnt = cnt < tlen ? cnt : tlen;
967         tlen = nfsm_rndup(cnt);
968         if (len != tlen || tlen != cnt)
969                 nfsm_adj(info.mb, len - tlen, tlen - cnt);
970         if (info.v3) {
971                 *tl++ = txdr_unsigned(cnt);
972                 if (len < reqlen)
973                         *tl++ = nfs_true;
974                 else
975                         *tl++ = nfs_false;
976         }
977         *tl = txdr_unsigned(cnt);
978 nfsmout:
979         *mrq = info.mreq;
980         if (vp)
981                 vput(vp);
982         return(error);
983 }
984
985 /*
986  * nfs write service
987  */
988 int
989 nfsrv_write(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
990             struct thread *td, struct mbuf **mrq)
991 {
992         struct sockaddr *nam = nfsd->nd_nam;
993         struct ucred *cred = &nfsd->nd_cr;
994         struct iovec *ivp;
995         int i, cnt;
996         struct mbuf *mp1;
997         struct nfs_fattr *fp;
998         struct iovec *iv;
999         struct vattr va, forat;
1000         struct vattr *vap = &va;
1001         u_int32_t *tl;
1002         int error = 0, rdonly, len, forat_ret = 1;
1003         int ioflags, aftat_ret = 1, retlen, zeroing, adjust;
1004         int stable = NFSV3WRITE_FILESYNC;
1005         struct vnode *vp = NULL;
1006         struct mount *mp = NULL;
1007         nfsfh_t nfh;
1008         fhandle_t *fhp;
1009         struct uio io, *uiop = &io;
1010         struct nfsm_info info;
1011         off_t off;
1012
1013         info.mrep = nfsd->nd_mrep;
1014         info.mreq = NULL;
1015         info.md = nfsd->nd_md;
1016         info.dpos = nfsd->nd_dpos;
1017         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1018
1019         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1020         if (info.mrep == NULL) {
1021                 error = 0;
1022                 goto nfsmout;
1023         }
1024         fhp = &nfh.fh_generic;
1025         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1026         if (info.v3) {
1027                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1028                 off = fxdr_hyper(tl);
1029                 tl += 3;
1030                 stable = fxdr_unsigned(int, *tl++);
1031         } else {
1032                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1033                 off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1034                 tl += 2;
1035                 if (nfs_async)
1036                         stable = NFSV3WRITE_UNSTABLE;
1037         }
1038         retlen = len = fxdr_unsigned(int32_t, *tl);
1039         cnt = i = 0;
1040
1041         /*
1042          * For NFS Version 2, it is not obvious what a write of zero length
1043          * should do, but I might as well be consistent with Version 3,
1044          * which is to return ok so long as there are no permission problems.
1045          */
1046         if (len > 0) {
1047             zeroing = 1;
1048             mp1 = info.mrep;
1049             while (mp1) {
1050                 if (mp1 == info.md) {
1051                         zeroing = 0;
1052                         adjust = info.dpos - mtod(mp1, caddr_t);
1053                         mp1->m_len -= adjust;
1054                         if (mp1->m_len > 0 && adjust > 0)
1055                                 mp1->m_data += adjust;
1056                 }
1057                 if (zeroing)
1058                         mp1->m_len = 0;
1059                 else if (mp1->m_len > 0) {
1060                         i += mp1->m_len;
1061                         if (i > len) {
1062                                 mp1->m_len -= (i - len);
1063                                 zeroing = 1;
1064                         }
1065                         if (mp1->m_len > 0)
1066                                 cnt++;
1067                 }
1068                 mp1 = mp1->m_next;
1069             }
1070         }
1071         if (len > NFS_MAXDATA || len < 0 || i < len) {
1072                 error = EIO;
1073                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1074                                       2 * NFSX_UNSIGNED, &error));
1075                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1076                                  aftat_ret, vap);
1077                 error = 0;
1078                 goto nfsmout;
1079         }
1080         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
1081                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1082         if (error) {
1083                 vp = NULL;
1084                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1085                                       2 * NFSX_UNSIGNED, &error));
1086                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1087                                  aftat_ret, vap);
1088                 error = 0;
1089                 goto nfsmout;
1090         }
1091         if (info.v3)
1092                 forat_ret = VOP_GETATTR(vp, &forat);
1093         if (vp->v_type != VREG) {
1094                 if (info.v3)
1095                         error = EINVAL;
1096                 else
1097                         error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1098         }
1099         if (!error) {
1100                 error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1101         }
1102         if (error) {
1103                 vput(vp);
1104                 vp = NULL;
1105                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1106                                       NFSX_WCCDATA(info.v3), &error));
1107                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1108                                  aftat_ret, vap);
1109                 error = 0;
1110                 goto nfsmout;
1111         }
1112
1113         if (len > 0) {
1114             ivp = kmalloc(cnt * sizeof(struct iovec), M_TEMP, M_WAITOK);
1115             uiop->uio_iov = iv = ivp;
1116             uiop->uio_iovcnt = cnt;
1117             mp1 = info.mrep;
1118             while (mp1) {
1119                 if (mp1->m_len > 0) {
1120                         ivp->iov_base = mtod(mp1, caddr_t);
1121                         ivp->iov_len = mp1->m_len;
1122                         ivp++;
1123                 }
1124                 mp1 = mp1->m_next;
1125             }
1126
1127             /*
1128              * XXX
1129              * The IO_METASYNC flag indicates that all metadata (and not just
1130              * enough to ensure data integrity) mus be written to stable storage
1131              * synchronously.
1132              * (IO_METASYNC is not yet implemented in 4.4BSD-Lite.)
1133              */
1134             if (stable == NFSV3WRITE_UNSTABLE)
1135                 ioflags = IO_NODELOCKED;
1136             else if (stable == NFSV3WRITE_DATASYNC)
1137                 ioflags = (IO_SYNC | IO_NODELOCKED);
1138             else
1139                 ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1140             uiop->uio_resid = len;
1141             uiop->uio_rw = UIO_WRITE;
1142             uiop->uio_segflg = UIO_SYSSPACE;
1143             uiop->uio_td = NULL;
1144             uiop->uio_offset = off;
1145             error = VOP_WRITE(vp, uiop, ioflags, cred);
1146             nfsstats.srvvop_writes++;
1147             kfree((caddr_t)iv, M_TEMP);
1148         }
1149         aftat_ret = VOP_GETATTR(vp, vap);
1150         vput(vp);
1151         vp = NULL;
1152         if (!error)
1153                 error = aftat_ret;
1154         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1155                               NFSX_PREOPATTR(info.v3) +
1156                               NFSX_POSTOPORFATTR(info.v3) +
1157                               2 * NFSX_UNSIGNED + NFSX_WRITEVERF(info.v3),
1158                               &error));
1159         if (info.v3) {
1160                 nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1161                                  aftat_ret, vap);
1162                 if (error) {
1163                         error = 0;
1164                         goto nfsmout;
1165                 }
1166                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1167                 *tl++ = txdr_unsigned(retlen);
1168                 /*
1169                  * If nfs_async is set, then pretend the write was FILESYNC.
1170                  */
1171                 if (stable == NFSV3WRITE_UNSTABLE && !nfs_async)
1172                         *tl++ = txdr_unsigned(stable);
1173                 else
1174                         *tl++ = txdr_unsigned(NFSV3WRITE_FILESYNC);
1175                 /*
1176                  * Actually, there is no need to txdr these fields,
1177                  * but it may make the values more human readable,
1178                  * for debugging purposes.
1179                  */
1180                 if (nfsver.tv_sec == 0)
1181                         nfsver = boottime;
1182                 *tl++ = txdr_unsigned(nfsver.tv_sec);
1183                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1184         } else {
1185                 fp = nfsm_build(&info, NFSX_V2FATTR);
1186                 nfsm_srvfattr(nfsd, vap, fp);
1187         }
1188 nfsmout:
1189         *mrq = info.mreq;
1190         if (vp)
1191                 vput(vp);
1192         return(error);
1193 }
1194
1195 /*
1196  * NFS write service with write gathering support. Called when
1197  * nfsrvw_procrastinate > 0.
1198  * See: Chet Juszczak, "Improving the Write Performance of an NFS Server",
1199  * in Proc. of the Winter 1994 Usenix Conference, pg. 247-259, San Franscisco,
1200  * Jan. 1994.
1201  */
1202 int
1203 nfsrv_writegather(struct nfsrv_descript **ndp, struct nfssvc_sock *slp,
1204                   struct thread *td, struct mbuf **mrq)
1205 {
1206         struct iovec *ivp;
1207         struct nfsrv_descript *wp, *nfsd, *owp, *swp;
1208         struct nfs_fattr *fp;
1209         int i;
1210         struct iovec *iov;
1211         struct nfsrvw_delayhash *wpp;
1212         struct ucred *cred;
1213         struct vattr va, forat;
1214         u_int32_t *tl;
1215         int error = 0, rdonly, len, forat_ret = 1;
1216         int ioflags, aftat_ret = 1, adjust, zeroing;
1217         struct mbuf *mp1;
1218         struct vnode *vp = NULL;
1219         struct mount *mp = NULL;
1220         struct uio io, *uiop = &io;
1221         u_quad_t cur_usec;
1222         struct nfsm_info info;
1223
1224         info.mreq = NULL;
1225
1226         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1227 #ifndef nolint
1228         i = 0;
1229         len = 0;
1230 #endif
1231         if (*ndp) {
1232             nfsd = *ndp;
1233             *ndp = NULL;
1234             info.mrep = nfsd->nd_mrep;
1235             info.mreq = NULL;
1236             info.md = nfsd->nd_md;
1237             info.dpos = nfsd->nd_dpos;
1238             info.v3 = (nfsd->nd_flag & ND_NFSV3);
1239             cred = &nfsd->nd_cr;
1240             LIST_INIT(&nfsd->nd_coalesce);
1241             nfsd->nd_mreq = NULL;
1242             nfsd->nd_stable = NFSV3WRITE_FILESYNC;
1243             cur_usec = nfs_curusec();
1244             nfsd->nd_time = cur_usec +
1245                 (info.v3 ? nfsrvw_procrastinate_v3 : nfsrvw_procrastinate);
1246     
1247             /*
1248              * Now, get the write header..
1249              */
1250             NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, &nfsd->nd_fh, &error));
1251             if (info.v3) {
1252                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
1253                 nfsd->nd_off = fxdr_hyper(tl);
1254                 tl += 3;
1255                 nfsd->nd_stable = fxdr_unsigned(int, *tl++);
1256             } else {
1257                 NULLOUT(tl = nfsm_dissect(&info, 4 * NFSX_UNSIGNED));
1258                 nfsd->nd_off = (off_t)fxdr_unsigned(u_int32_t, *++tl);
1259                 tl += 2;
1260                 if (nfs_async)
1261                         nfsd->nd_stable = NFSV3WRITE_UNSTABLE;
1262             }
1263             len = fxdr_unsigned(int32_t, *tl);
1264             nfsd->nd_len = len;
1265             nfsd->nd_eoff = nfsd->nd_off + len;
1266     
1267             /*
1268              * Trim the header out of the mbuf list and trim off any trailing
1269              * junk so that the mbuf list has only the write data.
1270              */
1271             zeroing = 1;
1272             i = 0;
1273             mp1 = info.mrep;
1274             while (mp1) {
1275                 if (mp1 == info.md) {
1276                     zeroing = 0;
1277                     adjust = info.dpos - mtod(mp1, caddr_t);
1278                     mp1->m_len -= adjust;
1279                     if (mp1->m_len > 0 && adjust > 0)
1280                         mp1->m_data += adjust;
1281                 }
1282                 if (zeroing)
1283                     mp1->m_len = 0;
1284                 else {
1285                     i += mp1->m_len;
1286                     if (i > len) {
1287                         mp1->m_len -= (i - len);
1288                         zeroing = 1;
1289                     }
1290                 }
1291                 mp1 = mp1->m_next;
1292             }
1293             if (len > NFS_MAXDATA || len < 0  || i < len) {
1294 nfsmout:
1295                 m_freem(info.mrep);
1296                 info.mrep = NULL;
1297                 error = EIO;
1298                 nfsm_writereply(&info, nfsd, slp, error, 2 * NFSX_UNSIGNED);
1299                 if (info.v3) {
1300                     nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1301                                      aftat_ret, &va);
1302                 }
1303                 nfsd->nd_mreq = info.mreq;
1304                 nfsd->nd_mrep = NULL;
1305                 nfsd->nd_time = 0;
1306             }
1307     
1308             /*
1309              * Add this entry to the hash and time queues.
1310              */
1311             owp = NULL;
1312             wp = slp->ns_tq.lh_first;
1313             while (wp && wp->nd_time < nfsd->nd_time) {
1314                 owp = wp;
1315                 wp = wp->nd_tq.le_next;
1316             }
1317             NFS_DPF(WG, ("Q%03x", nfsd->nd_retxid & 0xfff));
1318             if (owp) {
1319                 LIST_INSERT_AFTER(owp, nfsd, nd_tq);
1320             } else {
1321                 LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1322             }
1323             if (nfsd->nd_mrep) {
1324                 wpp = NWDELAYHASH(slp, nfsd->nd_fh.fh_fid.fid_data);
1325                 owp = NULL;
1326                 wp = wpp->lh_first;
1327                 while (wp &&
1328                     bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1329                     owp = wp;
1330                     wp = wp->nd_hash.le_next;
1331                 }
1332                 while (wp && wp->nd_off < nfsd->nd_off &&
1333                     !bcmp((caddr_t)&nfsd->nd_fh,(caddr_t)&wp->nd_fh,NFSX_V3FH)) {
1334                     owp = wp;
1335                     wp = wp->nd_hash.le_next;
1336                 }
1337                 if (owp) {
1338                     LIST_INSERT_AFTER(owp, nfsd, nd_hash);
1339
1340                     /*
1341                      * Search the hash list for overlapping entries and
1342                      * coalesce.
1343                      */
1344                     for(; nfsd && NFSW_CONTIG(owp, nfsd); nfsd = wp) {
1345                         wp = nfsd->nd_hash.le_next;
1346                         if (NFSW_SAMECRED(owp, nfsd))
1347                             nfsrvw_coalesce(owp, nfsd);
1348                     }
1349                 } else {
1350                     LIST_INSERT_HEAD(wpp, nfsd, nd_hash);
1351                 }
1352             }
1353         }
1354     
1355         /*
1356          * Now, do VOP_WRITE()s for any one(s) that need to be done now
1357          * and generate the associated reply mbuf list(s).
1358          */
1359 loop1:
1360         cur_usec = nfs_curusec();
1361         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = owp) {
1362                 owp = nfsd->nd_tq.le_next;
1363                 if (nfsd->nd_time > cur_usec)
1364                     break;
1365                 if (nfsd->nd_mreq)
1366                     continue;
1367                 NFS_DPF(WG, ("P%03x", nfsd->nd_retxid & 0xfff));
1368                 LIST_REMOVE(nfsd, nd_tq);
1369                 LIST_REMOVE(nfsd, nd_hash);
1370                 info.mrep = nfsd->nd_mrep;
1371                 info.mreq = NULL;
1372                 info.v3 = (nfsd->nd_flag & ND_NFSV3);
1373                 nfsd->nd_mrep = NULL;
1374                 cred = &nfsd->nd_cr;
1375                 forat_ret = aftat_ret = 1;
1376                 error = nfsrv_fhtovp(&nfsd->nd_fh, 1, &mp, &vp, cred, slp, 
1377                                      nfsd->nd_nam, &rdonly,
1378                                      (nfsd->nd_flag & ND_KERBAUTH), TRUE);
1379                 if (!error) {
1380                     if (info.v3)
1381                         forat_ret = VOP_GETATTR(vp, &forat);
1382                     if (vp->v_type != VREG) {
1383                         if (info.v3)
1384                             error = EINVAL;
1385                         else
1386                             error = (vp->v_type == VDIR) ? EISDIR : EACCES;
1387                     }
1388                 } else {
1389                     vp = NULL;
1390                 }
1391                 if (!error) {
1392                     error = nfsrv_access(mp, vp, VWRITE, cred, rdonly, td, 1);
1393                 }
1394     
1395                 if (nfsd->nd_stable == NFSV3WRITE_UNSTABLE)
1396                     ioflags = IO_NODELOCKED;
1397                 else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC)
1398                     ioflags = (IO_SYNC | IO_NODELOCKED);
1399                 else
1400                     ioflags = (IO_METASYNC | IO_SYNC | IO_NODELOCKED);
1401                 uiop->uio_rw = UIO_WRITE;
1402                 uiop->uio_segflg = UIO_SYSSPACE;
1403                 uiop->uio_td = NULL;
1404                 uiop->uio_offset = nfsd->nd_off;
1405                 uiop->uio_resid = nfsd->nd_eoff - nfsd->nd_off;
1406                 if (uiop->uio_resid > 0) {
1407                     mp1 = info.mrep;
1408                     i = 0;
1409                     while (mp1) {
1410                         if (mp1->m_len > 0)
1411                             i++;
1412                         mp1 = mp1->m_next;
1413                     }
1414                     uiop->uio_iovcnt = i;
1415                     iov = kmalloc(i * sizeof(struct iovec), M_TEMP, M_WAITOK);
1416                     uiop->uio_iov = ivp = iov;
1417                     mp1 = info.mrep;
1418                     while (mp1) {
1419                         if (mp1->m_len > 0) {
1420                             ivp->iov_base = mtod(mp1, caddr_t);
1421                             ivp->iov_len = mp1->m_len;
1422                             ivp++;
1423                         }
1424                         mp1 = mp1->m_next;
1425                     }
1426                     if (!error) {
1427                         error = VOP_WRITE(vp, uiop, ioflags, cred);
1428                         nfsstats.srvvop_writes++;
1429                     }
1430                     kfree((caddr_t)iov, M_TEMP);
1431                 }
1432                 m_freem(info.mrep);
1433                 info.mrep = NULL;
1434                 if (vp) {
1435                     aftat_ret = VOP_GETATTR(vp, &va);
1436                     vput(vp);
1437                     vp = NULL;
1438                 }
1439
1440                 /*
1441                  * Loop around generating replies for all write rpcs that have
1442                  * now been completed.
1443                  */
1444                 swp = nfsd;
1445                 do {
1446                     NFS_DPF(WG, ("R%03x", nfsd->nd_retxid & 0xfff));
1447                     if (error) {
1448                         nfsm_writereply(&info, nfsd, slp, error,
1449                                         NFSX_WCCDATA(info.v3));
1450                         if (info.v3) {
1451                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1452                                              aftat_ret, &va);
1453                         }
1454                     } else {
1455                         nfsm_writereply(&info, nfsd, slp, error,
1456                                         NFSX_PREOPATTR(info.v3) +
1457                                         NFSX_POSTOPORFATTR(info.v3) +
1458                                         2 * NFSX_UNSIGNED +
1459                                         NFSX_WRITEVERF(info.v3));
1460                         if (info.v3) {
1461                             nfsm_srvwcc_data(&info, nfsd, forat_ret, &forat,
1462                                              aftat_ret, &va);
1463                             tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
1464                             *tl++ = txdr_unsigned(nfsd->nd_len);
1465                             *tl++ = txdr_unsigned(swp->nd_stable);
1466                             /*
1467                              * Actually, there is no need to txdr these fields,
1468                              * but it may make the values more human readable,
1469                              * for debugging purposes.
1470                              */
1471                             if (nfsver.tv_sec == 0)
1472                                     nfsver = boottime;
1473                             *tl++ = txdr_unsigned(nfsver.tv_sec);
1474                             *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
1475                         } else {
1476                             fp = nfsm_build(&info, NFSX_V2FATTR);
1477                             nfsm_srvfattr(nfsd, &va, fp);
1478                         }
1479                     }
1480                     nfsd->nd_mreq = info.mreq;
1481                     if (nfsd->nd_mrep)
1482                         panic("nfsrv_write: nd_mrep not free");
1483
1484                     /*
1485                      * Done. Put it at the head of the timer queue so that
1486                      * the final phase can return the reply.
1487                      */
1488                     if (nfsd != swp) {
1489                         nfsd->nd_time = 0;
1490                         LIST_INSERT_HEAD(&slp->ns_tq, nfsd, nd_tq);
1491                     }
1492                     nfsd = swp->nd_coalesce.lh_first;
1493                     if (nfsd) {
1494                         LIST_REMOVE(nfsd, nd_tq);
1495                     }
1496                 } while (nfsd);
1497                 swp->nd_time = 0;
1498                 LIST_INSERT_HEAD(&slp->ns_tq, swp, nd_tq);
1499                 goto loop1;
1500         }
1501
1502         /*
1503          * Search for a reply to return.
1504          */
1505         for (nfsd = slp->ns_tq.lh_first; nfsd; nfsd = nfsd->nd_tq.le_next) {
1506                 if (nfsd->nd_mreq) {
1507                     NFS_DPF(WG, ("X%03x", nfsd->nd_retxid & 0xfff));
1508                     LIST_REMOVE(nfsd, nd_tq);
1509                     break;
1510                 }
1511         }
1512         if (nfsd) {
1513                 *ndp = nfsd;
1514                 *mrq = nfsd->nd_mreq;
1515         } else {
1516                 *ndp = NULL;
1517                 *mrq = NULL;
1518         }
1519         return (0);
1520 }
1521
1522 /*
1523  * Coalesce the write request nfsd into owp. To do this we must:
1524  * - remove nfsd from the queues
1525  * - merge nfsd->nd_mrep into owp->nd_mrep
1526  * - update the nd_eoff and nd_stable for owp
1527  * - put nfsd on owp's nd_coalesce list
1528  * NB: Must be called at splsoftclock().
1529  */
1530 static void
1531 nfsrvw_coalesce(struct nfsrv_descript *owp, struct nfsrv_descript *nfsd)
1532 {
1533         int overlap;
1534         struct mbuf *mp1;
1535         struct nfsrv_descript *p;
1536
1537         NFS_DPF(WG, ("C%03x-%03x",
1538                      nfsd->nd_retxid & 0xfff, owp->nd_retxid & 0xfff));
1539         LIST_REMOVE(nfsd, nd_hash);
1540         LIST_REMOVE(nfsd, nd_tq);
1541         if (owp->nd_eoff < nfsd->nd_eoff) {
1542             overlap = owp->nd_eoff - nfsd->nd_off;
1543             if (overlap < 0)
1544                 panic("nfsrv_coalesce: bad off");
1545             if (overlap > 0)
1546                 m_adj(nfsd->nd_mrep, overlap);
1547             mp1 = owp->nd_mrep;
1548             while (mp1->m_next)
1549                 mp1 = mp1->m_next;
1550             mp1->m_next = nfsd->nd_mrep;
1551             owp->nd_eoff = nfsd->nd_eoff;
1552         } else
1553             m_freem(nfsd->nd_mrep);
1554         nfsd->nd_mrep = NULL;
1555         if (nfsd->nd_stable == NFSV3WRITE_FILESYNC)
1556             owp->nd_stable = NFSV3WRITE_FILESYNC;
1557         else if (nfsd->nd_stable == NFSV3WRITE_DATASYNC &&
1558             owp->nd_stable == NFSV3WRITE_UNSTABLE)
1559             owp->nd_stable = NFSV3WRITE_DATASYNC;
1560         LIST_INSERT_HEAD(&owp->nd_coalesce, nfsd, nd_tq);
1561
1562         /*
1563          * If nfsd had anything else coalesced into it, transfer them
1564          * to owp, otherwise their replies will never get sent.
1565          */
1566         for (p = nfsd->nd_coalesce.lh_first; p;
1567              p = nfsd->nd_coalesce.lh_first) {
1568             LIST_REMOVE(p, nd_tq);
1569             LIST_INSERT_HEAD(&owp->nd_coalesce, p, nd_tq);
1570         }
1571 }
1572
1573 /*
1574  * nfs create service
1575  * now does a truncate to 0 length via. setattr if it already exists
1576  */
1577 int
1578 nfsrv_create(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1579              struct thread *td, struct mbuf **mrq)
1580 {
1581         struct sockaddr *nam = nfsd->nd_nam;
1582         struct ucred *cred = &nfsd->nd_cr;
1583         struct nfs_fattr *fp;
1584         struct vattr va, dirfor, diraft;
1585         struct vattr *vap = &va;
1586         struct nfsv2_sattr *sp;
1587         u_int32_t *tl;
1588         struct nlookupdata nd;
1589         int error = 0, len, tsize, dirfor_ret = 1, diraft_ret = 1;
1590         udev_t rdev = NOUDEV;
1591         caddr_t cp;
1592         int how, exclusive_flag = 0;
1593         struct vnode *dirp;
1594         struct vnode *dvp;
1595         struct vnode *vp;
1596         struct mount *mp;
1597         nfsfh_t nfh;
1598         fhandle_t *fhp;
1599         u_quad_t tempsize;
1600         u_char cverf[NFSX_V3CREATEVERF];
1601         struct nfsm_info info;
1602
1603         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1604         nlookup_zero(&nd);
1605         dirp = NULL;
1606         dvp = NULL;
1607         vp = NULL;
1608
1609         info.mrep = nfsd->nd_mrep;
1610         info.mreq = NULL;
1611         info.md = nfsd->nd_md;
1612         info.dpos = nfsd->nd_dpos;
1613         info.v3 = (nfsd->nd_flag & ND_NFSV3);
1614
1615         fhp = &nfh.fh_generic;
1616         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1617         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1618
1619         /*
1620          * Call namei and do initial cleanup to get a few things
1621          * out of the way.  If we get an initial error we cleanup
1622          * and return here to avoid special-casing the invalid nd
1623          * structure through the rest of the case.  dirp may be
1624          * set even if an error occurs, but the nd structure will not
1625          * be valid at all if an error occurs so we have to invalidate it
1626          * prior to calling nfsm_reply ( which might goto nfsmout ).
1627          */
1628         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1629                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1630                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1631         mp = vfs_getvfs(&fhp->fh_fsid);
1632
1633         if (dirp) {
1634                 if (info.v3) {
1635                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1636                 } else {
1637                         vrele(dirp);
1638                         dirp = NULL;
1639                 }
1640         }
1641         if (error) {
1642                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1643                                       NFSX_WCCDATA(info.v3), &error));
1644                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1645                                  diraft_ret, &diraft);
1646                 error = 0;
1647                 goto nfsmout;
1648         }
1649
1650         /*
1651          * No error.  Continue.  State:
1652          *
1653          *      dirp            may be valid
1654          *      vp              may be valid or NULL if the target does not
1655          *                      exist.
1656          *      dvp             is valid
1657          *
1658          * The error state is set through the code and we may also do some
1659          * opportunistic releasing of vnodes to avoid holding locks through
1660          * NFS I/O.  The cleanup at the end is a catch-all
1661          */
1662
1663         VATTR_NULL(vap);
1664         if (info.v3) {
1665                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1666                 how = fxdr_unsigned(int, *tl);
1667                 switch (how) {
1668                 case NFSV3CREATE_GUARDED:
1669                         if (vp) {
1670                                 error = EEXIST;
1671                                 break;
1672                         }
1673                         /* fall through */
1674                 case NFSV3CREATE_UNCHECKED:
1675                         ERROROUT(nfsm_srvsattr(&info, vap));
1676                         break;
1677                 case NFSV3CREATE_EXCLUSIVE:
1678                         NULLOUT(cp = nfsm_dissect(&info, NFSX_V3CREATEVERF));
1679                         bcopy(cp, cverf, NFSX_V3CREATEVERF);
1680                         exclusive_flag = 1;
1681                         break;
1682                 };
1683                 vap->va_type = VREG;
1684         } else {
1685                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
1686                 vap->va_type = IFTOVT(fxdr_unsigned(u_int32_t, sp->sa_mode));
1687                 if (vap->va_type == VNON)
1688                         vap->va_type = VREG;
1689                 vap->va_mode = nfstov_mode(sp->sa_mode);
1690                 switch (vap->va_type) {
1691                 case VREG:
1692                         tsize = fxdr_unsigned(int32_t, sp->sa_size);
1693                         if (tsize != -1)
1694                                 vap->va_size = (u_quad_t)tsize;
1695                         break;
1696                 case VCHR:
1697                 case VBLK:
1698                 case VFIFO:
1699                         rdev = fxdr_unsigned(long, sp->sa_size);
1700                         break;
1701                 default:
1702                         break;
1703                 };
1704         }
1705
1706         /*
1707          * Iff doesn't exist, create it
1708          * otherwise just truncate to 0 length
1709          *   should I set the mode too ?
1710          *
1711          * The only possible error we can have at this point is EEXIST. 
1712          * nd.ni_vp will also be non-NULL in that case.
1713          */
1714         if (vp == NULL) {
1715                 if (vap->va_mode == (mode_t)VNOVAL)
1716                         vap->va_mode = 0;
1717                 if (vap->va_type == VREG || vap->va_type == VSOCK) {
1718                         vn_unlock(dvp);
1719                         error = VOP_NCREATE(&nd.nl_nch, dvp, &vp,
1720                                             nd.nl_cred, vap);
1721                         vrele(dvp);
1722                         dvp = NULL;
1723                         if (error == 0) {
1724                                 if (exclusive_flag) {
1725                                         exclusive_flag = 0;
1726                                         VATTR_NULL(vap);
1727                                         bcopy(cverf, (caddr_t)&vap->va_atime,
1728                                                 NFSX_V3CREATEVERF);
1729                                         error = VOP_SETATTR(vp, vap, cred);
1730                                 }
1731                         }
1732                 } else if (
1733                         vap->va_type == VCHR || 
1734                         vap->va_type == VBLK ||
1735                         vap->va_type == VFIFO
1736                 ) {
1737                         /*
1738                          * Handle SysV FIFO node special cases.  All other
1739                          * devices require super user to access.
1740                          */
1741                         if (vap->va_type == VCHR && rdev == 0xffffffff)
1742                                 vap->va_type = VFIFO;
1743                         if (vap->va_type != VFIFO &&
1744                             (error = priv_check_cred(cred, PRIV_ROOT, 0))) {
1745                                 goto nfsmreply0;
1746                         }
1747                         vap->va_rmajor = umajor(rdev);
1748                         vap->va_rminor = uminor(rdev);
1749
1750                         vn_unlock(dvp);
1751                         error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1752                         vrele(dvp);
1753                         dvp = NULL;
1754                         if (error)
1755                                 goto nfsmreply0;
1756 #if 0
1757                         /*
1758                          * XXX what is this junk supposed to do ?
1759                          */
1760
1761                         vput(vp);
1762                         vp = NULL;
1763
1764                         /*
1765                          * release dvp prior to lookup
1766                          */
1767                         vput(dvp);
1768                         dvp = NULL;
1769
1770                         /*
1771                          * Setup for lookup. 
1772                          *
1773                          * Even though LOCKPARENT was cleared, ni_dvp may
1774                          * be garbage. 
1775                          */
1776                         nd.ni_cnd.cn_nameiop = NAMEI_LOOKUP;
1777                         nd.ni_cnd.cn_flags &= ~(CNP_LOCKPARENT);
1778                         nd.ni_cnd.cn_td = td;
1779                         nd.ni_cnd.cn_cred = cred;
1780
1781                         error = lookup(&nd);
1782                         nd.ni_dvp = NULL;
1783
1784                         if (error != 0) {
1785                                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1786                                                       0, &error));
1787                                 /* fall through on certain errors */
1788                         }
1789                         nfsrv_object_create(nd.ni_vp);
1790                         if (nd.ni_cnd.cn_flags & CNP_ISSYMLINK) {
1791                                 error = EINVAL;
1792                                 goto nfsmreply0;
1793                         }
1794 #endif
1795                 } else {
1796                         error = ENXIO;
1797                 }
1798         } else {
1799                 if (vap->va_size != -1) {
1800                         error = nfsrv_access(mp, vp, VWRITE, cred,
1801                             (nd.nl_flags & NLC_NFS_RDONLY), td, 0);
1802                         if (!error) {
1803                                 tempsize = vap->va_size;
1804                                 VATTR_NULL(vap);
1805                                 vap->va_size = tempsize;
1806                                 error = VOP_SETATTR(vp, vap, cred);
1807                         }
1808                 }
1809         }
1810
1811         if (!error) {
1812                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1813                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1814                 if (!error)
1815                         error = VOP_GETATTR(vp, vap);
1816         }
1817         if (info.v3) {
1818                 if (exclusive_flag && !error &&
1819                         bcmp(cverf, (caddr_t)&vap->va_atime, NFSX_V3CREATEVERF))
1820                         error = EEXIST;
1821                 diraft_ret = VOP_GETATTR(dirp, &diraft);
1822                 vrele(dirp);
1823                 dirp = NULL;
1824         }
1825         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1826                               NFSX_SRVFH(info.v3) + NFSX_FATTR(info.v3) +
1827                               NFSX_WCCDATA(info.v3),
1828                               &error));
1829         if (info.v3) {
1830                 if (!error) {
1831                         nfsm_srvpostop_fh(&info, fhp);
1832                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1833                 }
1834                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1835                                  diraft_ret, &diraft);
1836                 error = 0;
1837         } else {
1838                 nfsm_srvfhtom(&info, fhp);
1839                 fp = nfsm_build(&info, NFSX_V2FATTR);
1840                 nfsm_srvfattr(nfsd, vap, fp);
1841         }
1842         goto nfsmout;
1843
1844 nfsmreply0:
1845         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
1846         error = 0;
1847         /* fall through */
1848
1849 nfsmout:
1850         *mrq = info.mreq;
1851         if (dirp)
1852                 vrele(dirp);
1853         nlookup_done(&nd);
1854         if (dvp) {
1855                 if (dvp == vp)
1856                         vrele(dvp);
1857                 else
1858                         vput(dvp);
1859         }
1860         if (vp)
1861                 vput(vp);
1862         return (error);
1863 }
1864
1865 /*
1866  * nfs v3 mknod service
1867  */
1868 int
1869 nfsrv_mknod(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
1870             struct thread *td, struct mbuf **mrq)
1871 {
1872         struct sockaddr *nam = nfsd->nd_nam;
1873         struct ucred *cred = &nfsd->nd_cr;
1874         struct vattr va, dirfor, diraft;
1875         struct vattr *vap = &va;
1876         u_int32_t *tl;
1877         struct nlookupdata nd;
1878         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
1879         enum vtype vtyp;
1880         struct vnode *dirp;
1881         struct vnode *dvp;
1882         struct vnode *vp;
1883         nfsfh_t nfh;
1884         fhandle_t *fhp;
1885         struct nfsm_info info;
1886
1887         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
1888         nlookup_zero(&nd);
1889         dirp = NULL;
1890         dvp = NULL;
1891         vp = NULL;
1892
1893         info.mrep = nfsd->nd_mrep;
1894         info.mreq = NULL;
1895         info.md = nfsd->nd_md;
1896         info.dpos = nfsd->nd_dpos;
1897
1898         fhp = &nfh.fh_generic;
1899         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
1900         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
1901
1902         /*
1903          * Handle nfs_namei() call.  If an error occurs, the nd structure
1904          * is not valid.  However, nfsm_*() routines may still jump to
1905          * nfsmout.
1906          */
1907
1908         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
1909                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
1910                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
1911         if (dirp)
1912                 dirfor_ret = VOP_GETATTR(dirp, &dirfor);
1913         if (error) {
1914                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1915                            NFSX_WCCDATA(1), &error));
1916                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1917                                  diraft_ret, &diraft);
1918                 error = 0;
1919                 goto nfsmout;
1920         }
1921         NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
1922         vtyp = nfsv3tov_type(*tl);
1923         if (vtyp != VCHR && vtyp != VBLK && vtyp != VSOCK && vtyp != VFIFO) {
1924                 error = NFSERR_BADTYPE;
1925                 goto out;
1926         }
1927         VATTR_NULL(vap);
1928         ERROROUT(nfsm_srvsattr(&info, vap));
1929         if (vtyp == VCHR || vtyp == VBLK) {
1930                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
1931                 vap->va_rmajor = fxdr_unsigned(u_int32_t, *tl++);
1932                 vap->va_rminor = fxdr_unsigned(u_int32_t, *tl);
1933         }
1934
1935         /*
1936          * Iff doesn't exist, create it.
1937          */
1938         if (vp) {
1939                 error = EEXIST;
1940                 goto out;
1941         }
1942         vap->va_type = vtyp;
1943         if (vap->va_mode == (mode_t)VNOVAL)
1944                 vap->va_mode = 0;
1945         if (vtyp == VSOCK) {
1946                 vn_unlock(dvp);
1947                 error = VOP_NCREATE(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1948                 vrele(dvp);
1949                 dvp = NULL;
1950         } else {
1951                 if (vtyp != VFIFO && (error = priv_check_cred(cred, PRIV_ROOT, 0)))
1952                         goto out;
1953
1954                 vn_unlock(dvp);
1955                 error = VOP_NMKNOD(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
1956                 vrele(dvp);
1957                 dvp = NULL;
1958                 if (error)
1959                         goto out;
1960         }
1961
1962         /*
1963          * send response, cleanup, return.
1964          */
1965 out:
1966         nlookup_done(&nd);
1967         if (dvp) {
1968                 if (dvp == vp)
1969                         vrele(dvp);
1970                 else
1971                         vput(dvp);
1972                 dvp = NULL;
1973         }
1974         if (!error) {
1975                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
1976                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
1977                 if (!error)
1978                         error = VOP_GETATTR(vp, vap);
1979         }
1980         if (vp) {
1981                 vput(vp);
1982                 vp = NULL;
1983         }
1984         diraft_ret = VOP_GETATTR(dirp, &diraft);
1985         if (dirp) {
1986                 vrele(dirp);
1987                 dirp = NULL;
1988         }
1989         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
1990                               NFSX_SRVFH(1) + NFSX_POSTOPATTR(1) +
1991                               NFSX_WCCDATA(1), &error));
1992         if (!error) {
1993                 nfsm_srvpostop_fh(&info, fhp);
1994                 nfsm_srvpostop_attr(&info, nfsd, 0, vap);
1995         }
1996         nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
1997                          diraft_ret, &diraft);
1998         *mrq = info.mreq;
1999         return (0);
2000 nfsmout:
2001         *mrq = info.mreq;
2002         if (dirp)
2003                 vrele(dirp);
2004         nlookup_done(&nd);
2005         if (dvp) {
2006                 if (dvp == vp)
2007                         vrele(dvp);
2008                 else
2009                         vput(dvp);
2010         }
2011         if (vp)
2012                 vput(vp);
2013         return (error);
2014 }
2015
2016 /*
2017  * nfs remove service
2018  */
2019 int
2020 nfsrv_remove(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2021              struct thread *td, struct mbuf **mrq)
2022 {
2023         struct sockaddr *nam = nfsd->nd_nam;
2024         struct ucred *cred = &nfsd->nd_cr;
2025         struct nlookupdata nd;
2026         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2027         struct vnode *dirp;
2028         struct vnode *dvp;
2029         struct vnode *vp;
2030         struct vattr dirfor, diraft;
2031         nfsfh_t nfh;
2032         fhandle_t *fhp;
2033         struct nfsm_info info;
2034
2035         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2036         nlookup_zero(&nd);
2037         dirp = NULL;
2038         dvp = NULL;
2039         vp = NULL;
2040
2041         info.mrep = nfsd->nd_mrep;
2042         info.mreq = NULL;
2043         info.md = nfsd->nd_md;
2044         info.dpos = nfsd->nd_dpos;
2045         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2046
2047         fhp = &nfh.fh_generic;
2048         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2049         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2050
2051         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2052                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2053                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2054         if (dirp) {
2055                 if (info.v3)
2056                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2057         }
2058         if (error == 0) {
2059                 if (vp->v_type == VDIR) {
2060                         error = EPERM;          /* POSIX */
2061                         goto out;
2062                 }
2063                 /*
2064                  * The root of a mounted filesystem cannot be deleted.
2065                  */
2066                 if (vp->v_flag & VROOT) {
2067                         error = EBUSY;
2068                         goto out;
2069                 }
2070 out:
2071                 if (!error) {
2072                         if (dvp != vp)
2073                                 vn_unlock(dvp);
2074                         if (vp) {
2075                                 vput(vp);
2076                                 vp = NULL;
2077                         }
2078                         error = VOP_NREMOVE(&nd.nl_nch, dvp, nd.nl_cred);
2079                         vrele(dvp);
2080                         dvp = NULL;
2081                 }
2082         }
2083         if (dirp && info.v3)
2084                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2085         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2086         if (info.v3) {
2087                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2088                                  diraft_ret, &diraft);
2089                 error = 0;
2090         }
2091 nfsmout:
2092         *mrq = info.mreq;
2093         nlookup_done(&nd);
2094         if (dirp)
2095                 vrele(dirp);
2096         if (dvp) {
2097                 if (dvp == vp)
2098                         vrele(dvp);
2099                 else
2100                         vput(dvp);
2101         }
2102         if (vp)
2103                 vput(vp);
2104         return(error);
2105 }
2106
2107 /*
2108  * nfs rename service
2109  */
2110 int
2111 nfsrv_rename(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2112              struct thread *td, struct mbuf **mrq)
2113 {
2114         struct sockaddr *nam = nfsd->nd_nam;
2115         struct ucred *cred = &nfsd->nd_cr;
2116         int error = 0, len, len2, fdirfor_ret = 1, fdiraft_ret = 1;
2117         int tdirfor_ret = 1, tdiraft_ret = 1;
2118         struct nlookupdata fromnd, tond;
2119         struct vnode *fvp, *fdirp, *fdvp;
2120         struct vnode *tvp, *tdirp, *tdvp;
2121         struct namecache *ncp;
2122         struct vattr fdirfor, fdiraft, tdirfor, tdiraft;
2123         nfsfh_t fnfh, tnfh;
2124         fhandle_t *ffhp, *tfhp;
2125         uid_t saved_uid;
2126         struct nfsm_info info;
2127
2128         info.mrep = nfsd->nd_mrep;
2129         info.mreq = NULL;
2130         info.md = nfsd->nd_md;
2131         info.dpos = nfsd->nd_dpos;
2132         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2133
2134         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2135 #ifndef nolint
2136         fvp = NULL;
2137 #endif
2138         ffhp = &fnfh.fh_generic;
2139         tfhp = &tnfh.fh_generic;
2140
2141         /*
2142          * Clear fields incase goto nfsmout occurs from macro.
2143          */
2144
2145         nlookup_zero(&fromnd);
2146         nlookup_zero(&tond);
2147         fdirp = NULL;
2148         tdirp = NULL;
2149
2150         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, ffhp, &error));
2151         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2152
2153         /*
2154          * Remember our original uid so that we can reset cr_uid before
2155          * the second nfs_namei() call, in case it is remapped.
2156          */
2157         saved_uid = cred->cr_uid;
2158         error = nfs_namei(&fromnd, cred, NLC_RENAME_SRC,
2159                           NULL, NULL,
2160                           ffhp, len, slp, nam, &info.md, &info.dpos, &fdirp,
2161                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2162         if (fdirp) {
2163                 if (info.v3)
2164                         fdirfor_ret = VOP_GETATTR(fdirp, &fdirfor);
2165         }
2166         if (error) {
2167                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2168                                       2 * NFSX_WCCDATA(info.v3), &error));
2169                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2170                                  fdiraft_ret, &fdiraft);
2171                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2172                                  tdiraft_ret, &tdiraft);
2173                 error = 0;
2174                 goto nfsmout;
2175         }
2176
2177         /*
2178          * We have to unlock the from ncp before we can safely lookup
2179          * the target ncp.
2180          */
2181         KKASSERT(fromnd.nl_flags & NLC_NCPISLOCKED);
2182         cache_unlock(&fromnd.nl_nch);
2183         fromnd.nl_flags &= ~NLC_NCPISLOCKED;
2184         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, tfhp, &error));
2185         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXNAMLEN));
2186         cred->cr_uid = saved_uid;
2187
2188         error = nfs_namei(&tond, cred, NLC_RENAME_DST, NULL, NULL,
2189                           tfhp, len2, slp, nam, &info.md, &info.dpos, &tdirp,
2190                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2191         if (tdirp) {
2192                 if (info.v3)
2193                         tdirfor_ret = VOP_GETATTR(tdirp, &tdirfor);
2194         }
2195         if (error)
2196                 goto out1;
2197
2198         /*
2199          * relock the source
2200          */
2201         if (cache_lock_nonblock(&fromnd.nl_nch) == 0) {
2202                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2203         } else if (fromnd.nl_nch.ncp > tond.nl_nch.ncp) {
2204                 cache_lock(&fromnd.nl_nch);
2205                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2206         } else {
2207                 cache_unlock(&tond.nl_nch);
2208                 cache_lock(&fromnd.nl_nch);
2209                 cache_resolve(&fromnd.nl_nch, fromnd.nl_cred);
2210                 cache_lock(&tond.nl_nch);
2211                 cache_resolve(&tond.nl_nch, tond.nl_cred);
2212         }
2213         fromnd.nl_flags |= NLC_NCPISLOCKED;
2214
2215         fvp = fromnd.nl_nch.ncp->nc_vp;
2216         tvp = tond.nl_nch.ncp->nc_vp;
2217
2218         /*
2219          * Set fdvp and tdvp.  We haven't done all the topology checks
2220          * so these can wind up NULL (e.g. if either fvp or tvp is a mount
2221          * point).  If we get through the checks these will be guarenteed
2222          * to be non-NULL.
2223          *
2224          * Holding the children ncp's should be sufficient to prevent
2225          * fdvp and tdvp ripouts.
2226          */
2227         if (fromnd.nl_nch.ncp->nc_parent)
2228                 fdvp = fromnd.nl_nch.ncp->nc_parent->nc_vp;
2229         else
2230                 fdvp = NULL;
2231         if (tond.nl_nch.ncp->nc_parent)
2232                 tdvp = tond.nl_nch.ncp->nc_parent->nc_vp;
2233         else
2234                 tdvp = NULL;
2235
2236         if (tvp != NULL) {
2237                 if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
2238                         if (info.v3)
2239                                 error = EEXIST;
2240                         else
2241                                 error = EISDIR;
2242                         goto out;
2243                 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
2244                         if (info.v3)
2245                                 error = EEXIST;
2246                         else
2247                                 error = ENOTDIR;
2248                         goto out;
2249                 }
2250                 if (tvp->v_type == VDIR && (tond.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2251                         if (info.v3)
2252                                 error = EXDEV;
2253                         else
2254                                 error = ENOTEMPTY;
2255                         goto out;
2256                 }
2257         }
2258         if (fvp->v_type == VDIR && (fromnd.nl_nch.ncp->nc_flag & NCF_ISMOUNTPT)) {
2259                 if (info.v3)
2260                         error = EXDEV;
2261                 else
2262                         error = ENOTEMPTY;
2263                 goto out;
2264         }
2265         if (fromnd.nl_nch.mount != tond.nl_nch.mount) {
2266                 if (info.v3)
2267                         error = EXDEV;
2268                 else
2269                         error = ENOTEMPTY;
2270                 goto out;
2271         }
2272         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp->nc_parent) {
2273                 if (info.v3)
2274                         error = EINVAL;
2275                 else
2276                         error = ENOTEMPTY;
2277         }
2278
2279         /*
2280          * You cannot rename a source into itself or a subdirectory of itself.
2281          * We check this by travsering the target directory upwards looking
2282          * for a match against the source.
2283          */
2284         if (error == 0) {
2285                 for (ncp = tond.nl_nch.ncp; ncp; ncp = ncp->nc_parent) {
2286                         if (fromnd.nl_nch.ncp == ncp) {
2287                                 error = EINVAL;
2288                                 break;
2289                         }
2290                 }
2291         }
2292
2293         /*
2294          * If source is the same as the destination (that is the
2295          * same vnode with the same name in the same directory),
2296          * then there is nothing to do.
2297          */
2298         if (fromnd.nl_nch.ncp == tond.nl_nch.ncp)
2299                 error = -1;
2300 out:
2301         if (!error) {
2302                 /*
2303                  * The VOP_NRENAME function releases all vnode references &
2304                  * locks prior to returning so we need to clear the pointers
2305                  * to bypass cleanup code later on.
2306                  */
2307                 error = VOP_NRENAME(&fromnd.nl_nch, &tond.nl_nch,
2308                                     fdvp, tdvp, tond.nl_cred);
2309         } else {
2310                 if (error == -1)
2311                         error = 0;
2312         }
2313         /* fall through */
2314
2315 out1:
2316         if (fdirp)
2317                 fdiraft_ret = VOP_GETATTR(fdirp, &fdiraft);
2318         if (tdirp)
2319                 tdiraft_ret = VOP_GETATTR(tdirp, &tdiraft);
2320         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2321                               2 * NFSX_WCCDATA(info.v3), &error));
2322         if (info.v3) {
2323                 nfsm_srvwcc_data(&info, nfsd, fdirfor_ret, &fdirfor,
2324                                  fdiraft_ret, &fdiraft);
2325                 nfsm_srvwcc_data(&info, nfsd, tdirfor_ret, &tdirfor,
2326                                  tdiraft_ret, &tdiraft);
2327         }
2328         error = 0;
2329         /* fall through */
2330
2331 nfsmout:
2332         *mrq = info.mreq;
2333         if (tdirp)
2334                 vrele(tdirp);
2335         nlookup_done(&tond);
2336         if (fdirp)
2337                 vrele(fdirp);
2338         nlookup_done(&fromnd);
2339         return (error);
2340 }
2341
2342 /*
2343  * nfs link service
2344  */
2345 int
2346 nfsrv_link(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2347            struct thread *td, struct mbuf **mrq)
2348 {
2349         struct sockaddr *nam = nfsd->nd_nam;
2350         struct ucred *cred = &nfsd->nd_cr;
2351         struct nlookupdata nd;
2352         int error = 0, rdonly, len, dirfor_ret = 1, diraft_ret = 1;
2353         int getret = 1;
2354         struct vnode *dirp;
2355         struct vnode *dvp;
2356         struct vnode *vp;
2357         struct vnode *xp;
2358         struct mount *mp;
2359         struct mount *xmp;
2360         struct vattr dirfor, diraft, at;
2361         nfsfh_t nfh, dnfh;
2362         fhandle_t *fhp, *dfhp;
2363         struct nfsm_info info;
2364
2365         info.mrep = nfsd->nd_mrep;
2366         info.mreq = NULL;
2367         info.md = nfsd->nd_md;
2368         info.dpos = nfsd->nd_dpos;
2369         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2370
2371         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2372         nlookup_zero(&nd);
2373         dirp = dvp = vp = xp = NULL;
2374         mp = xmp = NULL;
2375
2376         fhp = &nfh.fh_generic;
2377         dfhp = &dnfh.fh_generic;
2378         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2379         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, dfhp, &error));
2380         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2381
2382         error = nfsrv_fhtovp(fhp, FALSE, &xmp, &xp, cred, slp, nam,
2383                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2384         if (error) {
2385                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2386                                       NFSX_POSTOPATTR(info.v3) +
2387                                       NFSX_WCCDATA(info.v3),
2388                                       &error));
2389                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2390                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2391                                  diraft_ret, &diraft);
2392                 xp = NULL;
2393                 error = 0;
2394                 goto nfsmout;
2395         }
2396         if (xp->v_type == VDIR) {
2397                 error = EPERM;          /* POSIX */
2398                 goto out1;
2399         }
2400
2401         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2402                           dfhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2403                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2404         if (dirp) {
2405                 if (info.v3)
2406                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2407         }
2408         if (error)
2409                 goto out1;
2410
2411         if (vp != NULL) {
2412                 error = EEXIST;
2413                 goto out;
2414         }
2415         if (xp->v_mount != dvp->v_mount)
2416                 error = EXDEV;
2417 out:
2418         if (!error) {
2419                 vn_unlock(dvp);
2420                 error = VOP_NLINK(&nd.nl_nch, dvp, xp, nd.nl_cred);
2421                 vrele(dvp);
2422                 dvp = NULL;
2423         }
2424         /* fall through */
2425
2426 out1:
2427         if (info.v3)
2428                 getret = VOP_GETATTR(xp, &at);
2429         if (dirp)
2430                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2431         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2432                               NFSX_POSTOPATTR(info.v3) + NFSX_WCCDATA(info.v3),
2433                               &error));
2434         if (info.v3) {
2435                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2436                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2437                                  diraft_ret, &diraft);
2438                 error = 0;
2439         }
2440         /* fall through */
2441
2442 nfsmout:
2443         *mrq = info.mreq;
2444         nlookup_done(&nd);
2445         if (dirp)
2446                 vrele(dirp);
2447         if (xp)
2448                 vrele(xp);
2449         if (dvp) {
2450                 if (dvp == vp)
2451                         vrele(dvp);
2452                 else
2453                         vput(dvp);
2454         }
2455         if (vp)
2456                 vput(vp);
2457         return(error);
2458 }
2459
2460 /*
2461  * nfs symbolic link service
2462  */
2463 int
2464 nfsrv_symlink(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2465               struct thread *td, struct mbuf **mrq)
2466 {
2467         struct sockaddr *nam = nfsd->nd_nam;
2468         struct ucred *cred = &nfsd->nd_cr;
2469         struct vattr va, dirfor, diraft;
2470         struct nlookupdata nd;
2471         struct vattr *vap = &va;
2472         struct nfsv2_sattr *sp;
2473         char *pathcp = NULL;
2474         struct uio io;
2475         struct iovec iv;
2476         int error = 0, len, len2, dirfor_ret = 1, diraft_ret = 1;
2477         struct vnode *dirp;
2478         struct vnode *vp;
2479         struct vnode *dvp;
2480         nfsfh_t nfh;
2481         fhandle_t *fhp;
2482         struct nfsm_info info;
2483
2484         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2485         nlookup_zero(&nd);
2486         dirp = NULL;
2487         dvp = NULL;
2488         vp = NULL;
2489
2490         info.mrep = nfsd->nd_mrep;
2491         info.mreq =  NULL;
2492         info.md = nfsd->nd_md;
2493         info.dpos = nfsd->nd_dpos;
2494         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2495
2496         fhp = &nfh.fh_generic;
2497         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2498         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2499
2500         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2501                         fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2502                         td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2503         if (dirp) {
2504                 if (info.v3)
2505                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2506         }
2507         if (error)
2508                 goto out;
2509
2510         VATTR_NULL(vap);
2511         if (info.v3) {
2512                 ERROROUT(nfsm_srvsattr(&info, vap));
2513         }
2514         NEGATIVEOUT(len2 = nfsm_strsiz(&info, NFS_MAXPATHLEN));
2515         pathcp = kmalloc(len2 + 1, M_TEMP, M_WAITOK);
2516         iv.iov_base = pathcp;
2517         iv.iov_len = len2;
2518         io.uio_resid = len2;
2519         io.uio_offset = 0;
2520         io.uio_iov = &iv;
2521         io.uio_iovcnt = 1;
2522         io.uio_segflg = UIO_SYSSPACE;
2523         io.uio_rw = UIO_READ;
2524         io.uio_td = NULL;
2525         ERROROUT(nfsm_mtouio(&info, &io, len2));
2526         if (info.v3 == 0) {
2527                 NULLOUT(sp = nfsm_dissect(&info, NFSX_V2SATTR));
2528                 vap->va_mode = nfstov_mode(sp->sa_mode);
2529         }
2530         *(pathcp + len2) = '\0';
2531         if (vp) {
2532                 error = EEXIST;
2533                 goto out;
2534         }
2535
2536         if (vap->va_mode == (mode_t)VNOVAL)
2537                 vap->va_mode = 0;
2538         if (dvp != vp)
2539                 vn_unlock(dvp);
2540         error = VOP_NSYMLINK(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap, pathcp);
2541         vrele(dvp);
2542         dvp = NULL;
2543         if (error == 0) {
2544                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2545                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2546                 if (!error)
2547                         error = VOP_GETATTR(vp, vap);
2548         }
2549
2550 out:
2551         if (dvp) {
2552                 if (dvp == vp)
2553                         vrele(dvp);
2554                 else
2555                         vput(dvp);
2556         }
2557         if (vp) {
2558                 vput(vp);
2559                 vp = NULL;
2560         }
2561         if (pathcp) {
2562                 kfree(pathcp, M_TEMP);
2563                 pathcp = NULL;
2564         }
2565         if (dirp) {
2566                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2567                 vrele(dirp);
2568                 dirp = NULL;
2569         }
2570         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2571                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2572                               NFSX_WCCDATA(info.v3),
2573                               &error));
2574         if (info.v3) {
2575                 if (!error) {
2576                         nfsm_srvpostop_fh(&info, fhp);
2577                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2578                 }
2579                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2580                                  diraft_ret, &diraft);
2581         }
2582         error = 0;
2583         /* fall through */
2584
2585 nfsmout:
2586         *mrq = info.mreq;
2587         nlookup_done(&nd);
2588         if (vp)
2589                 vput(vp);
2590         if (dirp)
2591                 vrele(dirp);
2592         if (pathcp)
2593                 kfree(pathcp, M_TEMP);
2594         return (error);
2595 }
2596
2597 /*
2598  * nfs mkdir service
2599  */
2600 int
2601 nfsrv_mkdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2602             struct thread *td, struct mbuf **mrq)
2603 {
2604         struct sockaddr *nam = nfsd->nd_nam;
2605         struct ucred *cred = &nfsd->nd_cr;
2606         struct vattr va, dirfor, diraft;
2607         struct vattr *vap = &va;
2608         struct nfs_fattr *fp;
2609         struct nlookupdata nd;
2610         u_int32_t *tl;
2611         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2612         struct vnode *dirp;
2613         struct vnode *dvp;
2614         struct vnode *vp;
2615         nfsfh_t nfh;
2616         fhandle_t *fhp;
2617         struct nfsm_info info;
2618
2619         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2620         nlookup_zero(&nd);
2621         dirp = NULL;
2622         dvp = NULL;
2623         vp = NULL;
2624
2625         info.dpos = nfsd->nd_dpos;
2626         info.mrep = nfsd->nd_mrep;
2627         info.mreq =  NULL;
2628         info.md = nfsd->nd_md;
2629         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2630
2631         fhp = &nfh.fh_generic;
2632         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2633         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2634
2635         error = nfs_namei(&nd, cred, NLC_CREATE, &dvp, &vp,
2636                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2637                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2638         if (dirp) {
2639                 if (info.v3)
2640                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2641         }
2642         if (error) {
2643                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2644                                       NFSX_WCCDATA(info.v3), &error));
2645                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2646                                  diraft_ret, &diraft);
2647                 error = 0;
2648                 goto nfsmout;
2649         }
2650         VATTR_NULL(vap);
2651         if (info.v3) {
2652                 ERROROUT(nfsm_srvsattr(&info, vap));
2653         } else {
2654                 NULLOUT(tl = nfsm_dissect(&info, NFSX_UNSIGNED));
2655                 vap->va_mode = nfstov_mode(*tl++);
2656         }
2657
2658         /*
2659          * At this point nd.ni_dvp is referenced and exclusively locked and
2660          * nd.ni_vp, if it exists, is referenced but not locked.
2661          */
2662
2663         vap->va_type = VDIR;
2664         if (vp != NULL) {
2665                 error = EEXIST;
2666                 goto out;
2667         }
2668
2669         /*
2670          * Issue mkdir op.  Since SAVESTART is not set, the pathname 
2671          * component is freed by the VOP call.  This will fill-in
2672          * nd.ni_vp, reference, and exclusively lock it.
2673          */
2674         if (vap->va_mode == (mode_t)VNOVAL)
2675                 vap->va_mode = 0;
2676         vn_unlock(dvp);
2677         error = VOP_NMKDIR(&nd.nl_nch, dvp, &vp, nd.nl_cred, vap);
2678         vrele(dvp);
2679         dvp = NULL;
2680
2681         if (error == 0) {
2682                 bzero(&fhp->fh_fid, sizeof(fhp->fh_fid));
2683                 error = VFS_VPTOFH(vp, &fhp->fh_fid);
2684                 if (error == 0)
2685                         error = VOP_GETATTR(vp, vap);
2686         }
2687 out:
2688         if (dirp)
2689                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2690         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2691                               NFSX_SRVFH(info.v3) + NFSX_POSTOPATTR(info.v3) +
2692                               NFSX_WCCDATA(info.v3),
2693                               &error));
2694         if (info.v3) {
2695                 if (!error) {
2696                         nfsm_srvpostop_fh(&info, fhp);
2697                         nfsm_srvpostop_attr(&info, nfsd, 0, vap);
2698                 }
2699                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2700                                  diraft_ret, &diraft);
2701         } else {
2702                 nfsm_srvfhtom(&info, fhp);
2703                 fp = nfsm_build(&info, NFSX_V2FATTR);
2704                 nfsm_srvfattr(nfsd, vap, fp);
2705         }
2706         error = 0;
2707         /* fall through */
2708
2709 nfsmout:
2710         *mrq = info.mreq;
2711         nlookup_done(&nd);
2712         if (dirp)
2713                 vrele(dirp);
2714         if (dvp) {
2715                 if (dvp == vp)
2716                         vrele(dvp);
2717                 else
2718                         vput(dvp);
2719         }
2720         if (vp)
2721                 vput(vp);
2722         return (error);
2723 }
2724
2725 /*
2726  * nfs rmdir service
2727  */
2728 int
2729 nfsrv_rmdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2730             struct thread *td, struct mbuf **mrq)
2731 {
2732         struct sockaddr *nam = nfsd->nd_nam;
2733         struct ucred *cred = &nfsd->nd_cr;
2734         int error = 0, len, dirfor_ret = 1, diraft_ret = 1;
2735         struct vnode *dirp;
2736         struct vnode *dvp;
2737         struct vnode *vp;
2738         struct vattr dirfor, diraft;
2739         nfsfh_t nfh;
2740         fhandle_t *fhp;
2741         struct nlookupdata nd;
2742         struct nfsm_info info;
2743
2744         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2745         nlookup_zero(&nd);
2746         dirp = NULL;
2747         dvp = NULL;
2748         vp = NULL;
2749
2750         info.mrep = nfsd->nd_mrep;
2751         info.mreq = NULL;
2752         info.md = nfsd->nd_md;
2753         info.dpos = nfsd->nd_dpos;
2754         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2755
2756         fhp = &nfh.fh_generic;
2757         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2758         NEGREPLYOUT(len = nfsm_srvnamesiz(&info, &error));
2759
2760         error = nfs_namei(&nd, cred, NLC_DELETE, &dvp, &vp,
2761                           fhp, len, slp, nam, &info.md, &info.dpos, &dirp,
2762                           td, (nfsd->nd_flag & ND_KERBAUTH), FALSE);
2763         if (dirp) {
2764                 if (info.v3)
2765                         dirfor_ret = VOP_GETATTR(dirp, &dirfor);
2766         }
2767         if (error) {
2768                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2769                                       NFSX_WCCDATA(info.v3), &error));
2770                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2771                                  diraft_ret, &diraft);
2772                 error = 0;
2773                 goto nfsmout;
2774         }
2775         if (vp->v_type != VDIR) {
2776                 error = ENOTDIR;
2777                 goto out;
2778         }
2779
2780         /*
2781          * The root of a mounted filesystem cannot be deleted.
2782          */
2783         if (vp->v_flag & VROOT)
2784                 error = EBUSY;
2785 out:
2786         /*
2787          * Issue or abort op.  Since SAVESTART is not set, path name
2788          * component is freed by the VOP after either.
2789          */
2790         if (!error) {
2791                 if (dvp != vp)
2792                         vn_unlock(dvp);
2793                 vput(vp);
2794                 vp = NULL;
2795                 error = VOP_NRMDIR(&nd.nl_nch, dvp, nd.nl_cred);
2796                 vrele(dvp);
2797                 dvp = NULL;
2798         }
2799         nlookup_done(&nd);
2800
2801         if (dirp)
2802                 diraft_ret = VOP_GETATTR(dirp, &diraft);
2803         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_WCCDATA(info.v3), &error));
2804         if (info.v3) {
2805                 nfsm_srvwcc_data(&info, nfsd, dirfor_ret, &dirfor,
2806                                  diraft_ret, &diraft);
2807                 error = 0;
2808         }
2809         /* fall through */
2810
2811 nfsmout:
2812         *mrq = info.mreq;
2813         if (dvp) {
2814                 if (dvp == vp)
2815                         vrele(dvp);
2816                 else
2817                         vput(dvp);
2818         }
2819         nlookup_done(&nd);
2820         if (dirp)
2821                 vrele(dirp);
2822         if (vp)
2823                 vput(vp);
2824         return(error);
2825 }
2826
2827 /*
2828  * nfs readdir service
2829  * - mallocs what it thinks is enough to read
2830  *      count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
2831  * - calls VOP_READDIR()
2832  * - loops around building the reply
2833  *      if the output generated exceeds count break out of loop
2834  *      The nfsm_clget macro is used here so that the reply will be packed
2835  *      tightly in mbuf clusters.
2836  * - it only knows that it has encountered eof when the VOP_READDIR()
2837  *      reads nothing
2838  * - as such one readdir rpc will return eof false although you are there
2839  *      and then the next will return eof
2840  * - it trims out records with d_fileno == 0
2841  *      this doesn't matter for Unix clients, but they might confuse clients
2842  *      for other os'.
2843  * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
2844  *      than requested, but this may not apply to all filesystems. For
2845  *      example, client NFS does not { although it is never remote mounted
2846  *      anyhow }
2847  *     The alternate call nfsrv_readdirplus() does lookups as well.
2848  * PS: The NFS protocol spec. does not clarify what the "count" byte
2849  *      argument is a count of.. just name strings and file id's or the
2850  *      entire reply rpc or ...
2851  *      I tried just file name and id sizes and it confused the Sun client,
2852  *      so I am using the full rpc size now. The "paranoia.." comment refers
2853  *      to including the status longwords that are not a part of the dir.
2854  *      "entry" structures, but are in the rpc.
2855  */
2856 struct flrep {
2857         nfsuint64       fl_off;
2858         u_int32_t       fl_postopok;
2859         u_int32_t       fl_fattr[NFSX_V3FATTR / sizeof (u_int32_t)];
2860         u_int32_t       fl_fhok;
2861         u_int32_t       fl_fhsize;
2862         u_int32_t       fl_nfh[NFSX_V3FH / sizeof (u_int32_t)];
2863 };
2864
2865 int
2866 nfsrv_readdir(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
2867               struct thread *td, struct mbuf **mrq)
2868 {
2869         struct sockaddr *nam = nfsd->nd_nam;
2870         struct ucred *cred = &nfsd->nd_cr;
2871         char *bp, *be;
2872         struct dirent *dp;
2873         caddr_t cp;
2874         u_int32_t *tl;
2875         struct mbuf *mp1, *mp2;
2876         char *cpos, *cend, *rbuf;
2877         struct vnode *vp = NULL;
2878         struct mount *mp = NULL;
2879         struct vattr at;
2880         nfsfh_t nfh;
2881         fhandle_t *fhp;
2882         struct uio io;
2883         struct iovec iv;
2884         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
2885         int siz, cnt, fullsiz, eofflag, rdonly, ncookies;
2886         u_quad_t off, toff, verf;
2887         off_t *cookies = NULL, *cookiep;
2888         struct nfsm_info info;
2889
2890         info.mrep = nfsd->nd_mrep;
2891         info.mreq = NULL;
2892         info.md = nfsd->nd_md;
2893         info.dpos = nfsd->nd_dpos;
2894         info.v3 = (nfsd->nd_flag & ND_NFSV3);
2895
2896         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
2897         fhp = &nfh.fh_generic;
2898         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
2899         if (info.v3) {
2900                 NULLOUT(tl = nfsm_dissect(&info, 5 * NFSX_UNSIGNED));
2901                 toff = fxdr_hyper(tl);
2902                 tl += 2;
2903                 verf = fxdr_hyper(tl);
2904                 tl += 2;
2905         } else {
2906                 NULLOUT(tl = nfsm_dissect(&info, 2 * NFSX_UNSIGNED));
2907                 toff = fxdr_unsigned(u_quad_t, *tl++);
2908                 verf = 0;       /* shut up gcc */
2909         }
2910         off = toff;
2911         cnt = fxdr_unsigned(int, *tl);
2912         siz = ((cnt + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
2913         xfer = NFS_SRVMAXDATA(nfsd);
2914         if ((unsigned)cnt > xfer)
2915                 cnt = xfer;
2916         if ((unsigned)siz > xfer)
2917                 siz = xfer;
2918         fullsiz = siz;
2919         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
2920                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
2921         if (!error && vp->v_type != VDIR) {
2922                 error = ENOTDIR;
2923                 vput(vp);
2924                 vp = NULL;
2925         }
2926         if (error) {
2927                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
2928                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2929                 error = 0;
2930                 goto nfsmout;
2931         }
2932
2933         /*
2934          * Obtain lock on vnode for this section of the code
2935          */
2936
2937         if (info.v3) {
2938                 error = getret = VOP_GETATTR(vp, &at);
2939 #if 0
2940                 /*
2941                  * XXX This check may be too strict for Solaris 2.5 clients.
2942                  */
2943                 if (!error && toff && verf && verf != at.va_filerev)
2944                         error = NFSERR_BAD_COOKIE;
2945 #endif
2946         }
2947         if (!error)
2948                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
2949         if (error) {
2950                 vput(vp);
2951                 vp = NULL;
2952                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2953                                       NFSX_POSTOPATTR(info.v3), &error));
2954                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2955                 error = 0;
2956                 goto nfsmout;
2957         }
2958         vn_unlock(vp);
2959
2960         /*
2961          * end section.  Allocate rbuf and continue
2962          */
2963         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
2964 again:
2965         iv.iov_base = rbuf;
2966         iv.iov_len = fullsiz;
2967         io.uio_iov = &iv;
2968         io.uio_iovcnt = 1;
2969         io.uio_offset = (off_t)off;
2970         io.uio_resid = fullsiz;
2971         io.uio_segflg = UIO_SYSSPACE;
2972         io.uio_rw = UIO_READ;
2973         io.uio_td = NULL;
2974         eofflag = 0;
2975         if (cookies) {
2976                 kfree((caddr_t)cookies, M_TEMP);
2977                 cookies = NULL;
2978         }
2979         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
2980         off = (off_t)io.uio_offset;
2981         if (!cookies && !error)
2982                 error = NFSERR_PERM;
2983         if (info.v3) {
2984                 getret = VOP_GETATTR(vp, &at);
2985                 if (!error)
2986                         error = getret;
2987         }
2988         if (error) {
2989                 vrele(vp);
2990                 vp = NULL;
2991                 kfree((caddr_t)rbuf, M_TEMP);
2992                 if (cookies)
2993                         kfree((caddr_t)cookies, M_TEMP);
2994                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
2995                                       NFSX_POSTOPATTR(info.v3), &error));
2996                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
2997                 error = 0;
2998                 goto nfsmout;
2999         }
3000         if (io.uio_resid) {
3001                 siz -= io.uio_resid;
3002
3003                 /*
3004                  * If nothing read, return eof
3005                  * rpc reply
3006                  */
3007                 if (siz == 0) {
3008                         vrele(vp);
3009                         vp = NULL;
3010                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3011                                               NFSX_POSTOPATTR(info.v3) +
3012                                               NFSX_COOKIEVERF(info.v3) +
3013                                               2 * NFSX_UNSIGNED,
3014                                               &error));
3015                         if (info.v3) {
3016                                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3017                                 tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3018                                 txdr_hyper(at.va_filerev, tl);
3019                                 tl += 2;
3020                         } else
3021                                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3022                         *tl++ = nfs_false;
3023                         *tl = nfs_true;
3024                         kfree((caddr_t)rbuf, M_TEMP);
3025                         kfree((caddr_t)cookies, M_TEMP);
3026                         error = 0;
3027                         goto nfsmout;
3028                 }
3029         }
3030
3031         /*
3032          * Check for degenerate cases of nothing useful read.
3033          * If so go try again
3034          */
3035         cpos = rbuf;
3036         cend = rbuf + siz;
3037         dp = (struct dirent *)cpos;
3038         cookiep = cookies;
3039         /*
3040          * For some reason FreeBSD's ufs_readdir() chooses to back the
3041          * directory offset up to a block boundary, so it is necessary to
3042          * skip over the records that preceed the requested offset. This
3043          * requires the assumption that file offset cookies monotonically
3044          * increase.
3045          */
3046         while (cpos < cend && ncookies > 0 &&
3047                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3048                  ((u_quad_t)(*cookiep)) <= toff)) {
3049                 dp = _DIRENT_NEXT(dp);
3050                 cpos = (char *)dp;
3051                 cookiep++;
3052                 ncookies--;
3053         }
3054         if (cpos >= cend || ncookies == 0) {
3055                 toff = off;
3056                 siz = fullsiz;
3057                 goto again;
3058         }
3059
3060         len = 3 * NFSX_UNSIGNED;        /* paranoia, probably can be 0 */
3061         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3062                               NFSX_POSTOPATTR(info.v3) +
3063                               NFSX_COOKIEVERF(info.v3) + siz,
3064                               &error));
3065         if (info.v3) {
3066                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3067                 tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3068                 txdr_hyper(at.va_filerev, tl);
3069         }
3070         mp1 = mp2 = info.mb;
3071         bp = info.bpos;
3072         be = bp + M_TRAILINGSPACE(mp1);
3073
3074         /* Loop through the records and build reply */
3075         while (cpos < cend && ncookies > 0) {
3076                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3077                         nlen = dp->d_namlen;
3078                         rem = nfsm_rndup(nlen) - nlen;
3079                         len += (4 * NFSX_UNSIGNED + nlen + rem);
3080                         if (info.v3)
3081                                 len += 2 * NFSX_UNSIGNED;
3082                         if (len > cnt) {
3083                                 eofflag = 0;
3084                                 break;
3085                         }
3086                         /*
3087                          * Build the directory record xdr from
3088                          * the dirent entry.
3089                          */
3090                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3091                         *tl = nfs_true;
3092                         bp += NFSX_UNSIGNED;
3093                         if (info.v3) {
3094                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3095                                 *tl = txdr_unsigned(dp->d_ino >> 32);
3096                                 bp += NFSX_UNSIGNED;
3097                         }
3098                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3099                         *tl = txdr_unsigned(dp->d_ino);
3100                         bp += NFSX_UNSIGNED;
3101                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3102                         *tl = txdr_unsigned(nlen);
3103                         bp += NFSX_UNSIGNED;
3104
3105                         /* And loop around copying the name */
3106                         xfer = nlen;
3107                         cp = dp->d_name;
3108                         while (xfer > 0) {
3109                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3110                                 if ((bp+xfer) > be)
3111                                         tsiz = be-bp;
3112                                 else
3113                                         tsiz = xfer;
3114                                 bcopy(cp, bp, tsiz);
3115                                 bp += tsiz;
3116                                 xfer -= tsiz;
3117                                 if (xfer > 0)
3118                                         cp += tsiz;
3119                         }
3120                         /* And null pad to a int32_t boundary */
3121                         for (i = 0; i < rem; i++)
3122                                 *bp++ = '\0';
3123                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3124
3125                         /* Finish off the record */
3126                         if (info.v3) {
3127                                 *tl = txdr_unsigned(*cookiep >> 32);
3128                                 bp += NFSX_UNSIGNED;
3129                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3130                         }
3131                         *tl = txdr_unsigned(*cookiep);
3132                         bp += NFSX_UNSIGNED;
3133                 }
3134                 dp = _DIRENT_NEXT(dp);
3135                 cpos = (char *)dp;
3136                 cookiep++;
3137                 ncookies--;
3138         }
3139         vrele(vp);
3140         vp = NULL;
3141         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3142         *tl = nfs_false;
3143         bp += NFSX_UNSIGNED;
3144         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3145         if (eofflag)
3146                 *tl = nfs_true;
3147         else
3148                 *tl = nfs_false;
3149         bp += NFSX_UNSIGNED;
3150         if (mp1 != info.mb) {
3151                 if (bp < be)
3152                         mp1->m_len = bp - mtod(mp1, caddr_t);
3153         } else
3154                 mp1->m_len += bp - info.bpos;
3155         kfree((caddr_t)rbuf, M_TEMP);
3156         kfree((caddr_t)cookies, M_TEMP);
3157
3158 nfsmout:
3159         *mrq = info.mreq;
3160         if (vp)
3161                 vrele(vp);
3162         return(error);
3163 }
3164
3165 int
3166 nfsrv_readdirplus(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3167                   struct thread *td, struct mbuf **mrq)
3168 {
3169         struct sockaddr *nam = nfsd->nd_nam;
3170         struct ucred *cred = &nfsd->nd_cr;
3171         char *bp, *be;
3172         struct dirent *dp;
3173         caddr_t cp;
3174         u_int32_t *tl;
3175         struct mbuf *mp1, *mp2;
3176         char *cpos, *cend, *rbuf;
3177         struct vnode *vp = NULL, *nvp;
3178         struct mount *mp = NULL;
3179         struct flrep fl;
3180         nfsfh_t nfh;
3181         fhandle_t *fhp, *nfhp = (fhandle_t *)fl.fl_nfh;
3182         struct uio io;
3183         struct iovec iv;
3184         struct vattr va, at, *vap = &va;
3185         struct nfs_fattr *fp;
3186         int len, nlen, rem, xfer, tsiz, i, error = 0, getret = 1;
3187         int siz, cnt, fullsiz, eofflag, rdonly, dirlen, ncookies;
3188         u_quad_t off, toff, verf;
3189         off_t *cookies = NULL, *cookiep; /* needs to be int64_t or off_t */
3190         struct nfsm_info info;
3191
3192         info.mrep = nfsd->nd_mrep;
3193         info.mreq = NULL;
3194         info.md = nfsd->nd_md;
3195         info.dpos = nfsd->nd_dpos;
3196         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3197
3198         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3199         fhp = &nfh.fh_generic;
3200         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3201         NULLOUT(tl = nfsm_dissect(&info, 6 * NFSX_UNSIGNED));
3202         toff = fxdr_hyper(tl);
3203         tl += 2;
3204         verf = fxdr_hyper(tl);
3205         tl += 2;
3206         siz = fxdr_unsigned(int, *tl++);
3207         cnt = fxdr_unsigned(int, *tl);
3208         off = toff;
3209         siz = ((siz + DIRBLKSIZ - 1) & ~(DIRBLKSIZ - 1));
3210         xfer = NFS_SRVMAXDATA(nfsd);
3211         if ((unsigned)cnt > xfer)
3212                 cnt = xfer;
3213         if ((unsigned)siz > xfer)
3214                 siz = xfer;
3215         fullsiz = siz;
3216         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3217                              &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3218         if (!error && vp->v_type != VDIR) {
3219                 error = ENOTDIR;
3220                 vput(vp);
3221                 vp = NULL;
3222         }
3223         if (error) {
3224                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3225                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3226                 error = 0;
3227                 goto nfsmout;
3228         }
3229         error = getret = VOP_GETATTR(vp, &at);
3230 #if 0
3231         /*
3232          * XXX This check may be too strict for Solaris 2.5 clients.
3233          */
3234         if (!error && toff && verf && verf != at.va_filerev)
3235                 error = NFSERR_BAD_COOKIE;
3236 #endif
3237         if (!error) {
3238                 error = nfsrv_access(mp, vp, VEXEC, cred, rdonly, td, 0);
3239         }
3240         if (error) {
3241                 vput(vp);
3242                 vp = NULL;
3243                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3244                                       NFSX_V3POSTOPATTR, &error));
3245                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3246                 error = 0;
3247                 goto nfsmout;
3248         }
3249         vn_unlock(vp);
3250         rbuf = kmalloc(siz, M_TEMP, M_WAITOK);
3251 again:
3252         iv.iov_base = rbuf;
3253         iv.iov_len = fullsiz;
3254         io.uio_iov = &iv;
3255         io.uio_iovcnt = 1;
3256         io.uio_offset = (off_t)off;
3257         io.uio_resid = fullsiz;
3258         io.uio_segflg = UIO_SYSSPACE;
3259         io.uio_rw = UIO_READ;
3260         io.uio_td = NULL;
3261         eofflag = 0;
3262         if (cookies) {
3263                 kfree((caddr_t)cookies, M_TEMP);
3264                 cookies = NULL;
3265         }
3266         error = VOP_READDIR(vp, &io, cred, &eofflag, &ncookies, &cookies);
3267         off = (u_quad_t)io.uio_offset;
3268         getret = VOP_GETATTR(vp, &at);
3269         if (!cookies && !error)
3270                 error = NFSERR_PERM;
3271         if (!error)
3272                 error = getret;
3273         if (error) {
3274                 vrele(vp);
3275                 vp = NULL;
3276                 if (cookies)
3277                         kfree((caddr_t)cookies, M_TEMP);
3278                 kfree((caddr_t)rbuf, M_TEMP);
3279                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3280                                       NFSX_V3POSTOPATTR, &error));
3281                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3282                 error = 0;
3283                 goto nfsmout;
3284         }
3285         if (io.uio_resid) {
3286                 siz -= io.uio_resid;
3287
3288                 /*
3289                  * If nothing read, return eof
3290                  * rpc reply
3291                  */
3292                 if (siz == 0) {
3293                         vrele(vp);
3294                         vp = NULL;
3295                         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3296                                               NFSX_V3POSTOPATTR +
3297                                               NFSX_V3COOKIEVERF +
3298                                               2 * NFSX_UNSIGNED,
3299                                               &error));
3300                         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3301                         tl = nfsm_build(&info, 4 * NFSX_UNSIGNED);
3302                         txdr_hyper(at.va_filerev, tl);
3303                         tl += 2;
3304                         *tl++ = nfs_false;
3305                         *tl = nfs_true;
3306                         kfree((caddr_t)cookies, M_TEMP);
3307                         kfree((caddr_t)rbuf, M_TEMP);
3308                         error = 0;
3309                         goto nfsmout;
3310                 }
3311         }
3312
3313         /*
3314          * Check for degenerate cases of nothing useful read.
3315          * If so go try again
3316          */
3317         cpos = rbuf;
3318         cend = rbuf + siz;
3319         dp = (struct dirent *)cpos;
3320         cookiep = cookies;
3321         /*
3322          * For some reason FreeBSD's ufs_readdir() chooses to back the
3323          * directory offset up to a block boundary, so it is necessary to
3324          * skip over the records that preceed the requested offset. This
3325          * requires the assumption that file offset cookies monotonically
3326          * increase.
3327          */
3328         while (cpos < cend && ncookies > 0 &&
3329                 (dp->d_ino == 0 || dp->d_type == DT_WHT ||
3330                  ((u_quad_t)(*cookiep)) <= toff)) {
3331                 dp = _DIRENT_NEXT(dp);
3332                 cpos = (char *)dp;
3333                 cookiep++;
3334                 ncookies--;
3335         }
3336         if (cpos >= cend || ncookies == 0) {
3337                 toff = off;
3338                 siz = fullsiz;
3339                 goto again;
3340         }
3341
3342         /*
3343          * Probe one of the directory entries to see if the filesystem
3344          * supports VGET.
3345          */
3346         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp) == EOPNOTSUPP) {
3347                 error = NFSERR_NOTSUPP;
3348                 vrele(vp);
3349                 vp = NULL;
3350                 kfree((caddr_t)cookies, M_TEMP);
3351                 kfree((caddr_t)rbuf, M_TEMP);
3352                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3353                                       NFSX_V3POSTOPATTR, &error));
3354                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3355                 error = 0;
3356                 goto nfsmout;
3357         }
3358         if (nvp) {
3359                 vput(nvp);
3360                 nvp = NULL;
3361         }
3362             
3363         dirlen = len = NFSX_V3POSTOPATTR + NFSX_V3COOKIEVERF +
3364                         2 * NFSX_UNSIGNED;
3365         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, cnt, &error));
3366         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3367         tl = nfsm_build(&info, 2 * NFSX_UNSIGNED);
3368         txdr_hyper(at.va_filerev, tl);
3369         mp1 = mp2 = info.mb;
3370         bp = info.bpos;
3371         be = bp + M_TRAILINGSPACE(mp1);
3372
3373         /* Loop through the records and build reply */
3374         while (cpos < cend && ncookies > 0) {
3375                 if (dp->d_ino != 0 && dp->d_type != DT_WHT) {
3376                         nlen = dp->d_namlen;
3377                         rem = nfsm_rndup(nlen) - nlen;
3378
3379                         /*
3380                          * For readdir_and_lookup get the vnode using
3381                          * the file number.
3382                          */
3383                         if (VFS_VGET(vp->v_mount, vp, dp->d_ino, &nvp))
3384                                 goto invalid;
3385                         bzero((caddr_t)nfhp, NFSX_V3FH);
3386                         nfhp->fh_fsid = fhp->fh_fsid;
3387                         if (VFS_VPTOFH(nvp, &nfhp->fh_fid)) {
3388                                 vput(nvp);
3389                                 nvp = NULL;
3390                                 goto invalid;
3391                         }
3392                         if (VOP_GETATTR(nvp, vap)) {
3393                                 vput(nvp);
3394                                 nvp = NULL;
3395                                 goto invalid;
3396                         }
3397                         vput(nvp);
3398                         nvp = NULL;
3399
3400                         /*
3401                          * If either the dircount or maxcount will be
3402                          * exceeded, get out now. Both of these lengths
3403                          * are calculated conservatively, including all
3404                          * XDR overheads.
3405                          */
3406                         len += (8 * NFSX_UNSIGNED + nlen + rem + NFSX_V3FH +
3407                                 NFSX_V3POSTOPATTR);
3408                         dirlen += (6 * NFSX_UNSIGNED + nlen + rem);
3409                         if (len > cnt || dirlen > fullsiz) {
3410                                 eofflag = 0;
3411                                 break;
3412                         }
3413
3414                         /*
3415                          * Build the directory record xdr from
3416                          * the dirent entry.
3417                          */
3418                         fp = (struct nfs_fattr *)&fl.fl_fattr;
3419                         nfsm_srvfattr(nfsd, vap, fp);
3420                         fl.fl_off.nfsuquad[0] = txdr_unsigned(*cookiep >> 32);
3421                         fl.fl_off.nfsuquad[1] = txdr_unsigned(*cookiep);
3422                         fl.fl_postopok = nfs_true;
3423                         fl.fl_fhok = nfs_true;
3424                         fl.fl_fhsize = txdr_unsigned(NFSX_V3FH);
3425
3426                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3427                         *tl = nfs_true;
3428                         bp += NFSX_UNSIGNED;
3429                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3430                         *tl = txdr_unsigned(dp->d_ino >> 32);
3431                         bp += NFSX_UNSIGNED;
3432                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3433                         *tl = txdr_unsigned(dp->d_ino);
3434                         bp += NFSX_UNSIGNED;
3435                         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3436                         *tl = txdr_unsigned(nlen);
3437                         bp += NFSX_UNSIGNED;
3438
3439                         /* And loop around copying the name */
3440                         xfer = nlen;
3441                         cp = dp->d_name;
3442                         while (xfer > 0) {
3443                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3444                                 if ((bp + xfer) > be)
3445                                         tsiz = be - bp;
3446                                 else
3447                                         tsiz = xfer;
3448                                 bcopy(cp, bp, tsiz);
3449                                 bp += tsiz;
3450                                 xfer -= tsiz;
3451                                 cp += tsiz;
3452                         }
3453                         /* And null pad to a int32_t boundary */
3454                         for (i = 0; i < rem; i++)
3455                                 *bp++ = '\0';
3456         
3457                         /*
3458                          * Now copy the flrep structure out.
3459                          */
3460                         xfer = sizeof (struct flrep);
3461                         cp = (caddr_t)&fl;
3462                         while (xfer > 0) {
3463                                 tl = nfsm_clget(&info, mp1, mp2, bp, be);
3464                                 if ((bp + xfer) > be)
3465                                         tsiz = be - bp;
3466                                 else
3467                                         tsiz = xfer;
3468                                 bcopy(cp, bp, tsiz);
3469                                 bp += tsiz;
3470                                 xfer -= tsiz;
3471                                 cp += tsiz;
3472                         }
3473                 }
3474 invalid:
3475                 dp = _DIRENT_NEXT(dp);
3476                 cpos = (char *)dp;
3477                 cookiep++;
3478                 ncookies--;
3479         }
3480         vrele(vp);
3481         vp = NULL;
3482         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3483         *tl = nfs_false;
3484         bp += NFSX_UNSIGNED;
3485         tl = nfsm_clget(&info, mp1, mp2, bp, be);
3486         if (eofflag)
3487                 *tl = nfs_true;
3488         else
3489                 *tl = nfs_false;
3490         bp += NFSX_UNSIGNED;
3491         if (mp1 != info.mb) {
3492                 if (bp < be)
3493                         mp1->m_len = bp - mtod(mp1, caddr_t);
3494         } else
3495                 mp1->m_len += bp - info.bpos;
3496         kfree((caddr_t)cookies, M_TEMP);
3497         kfree((caddr_t)rbuf, M_TEMP);
3498 nfsmout:
3499         *mrq = info.mreq;
3500         if (vp)
3501                 vrele(vp);
3502         return(error);
3503 }
3504
3505 /*
3506  * nfs commit service
3507  */
3508 int
3509 nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3510              struct thread *td, struct mbuf **mrq)
3511 {
3512         struct sockaddr *nam = nfsd->nd_nam;
3513         struct ucred *cred = &nfsd->nd_cr;
3514         struct vattr bfor, aft;
3515         struct vnode *vp = NULL;
3516         struct mount *mp = NULL;
3517         nfsfh_t nfh;
3518         fhandle_t *fhp;
3519         u_int32_t *tl;
3520         int error = 0, rdonly, for_ret = 1, aft_ret = 1, cnt;
3521         u_quad_t off;
3522         struct nfsm_info info;
3523
3524         info.mrep = nfsd->nd_mrep;
3525         info.mreq = NULL;
3526         info.md = nfsd->nd_md;
3527         info.dpos = nfsd->nd_dpos;
3528
3529         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3530         fhp = &nfh.fh_generic;
3531         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3532         NULLOUT(tl = nfsm_dissect(&info, 3 * NFSX_UNSIGNED));
3533
3534         /*
3535          * XXX At this time VOP_FSYNC() does not accept offset and byte
3536          * count parameters, so these arguments are useless (someday maybe).
3537          */
3538         off = fxdr_hyper(tl);
3539         tl += 2;
3540         cnt = fxdr_unsigned(int, *tl);
3541         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3542                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3543         if (error) {
3544                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3545                                       2 * NFSX_UNSIGNED, &error));
3546                 nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3547                                  aft_ret, &aft);
3548                 error = 0;
3549                 goto nfsmout;
3550         }
3551         for_ret = VOP_GETATTR(vp, &bfor);
3552
3553         if (cnt > MAX_COMMIT_COUNT) {
3554                 /*
3555                  * Give up and do the whole thing
3556                  */
3557                 if (vp->v_object &&
3558                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3559                         vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
3560                 }
3561                 error = VOP_FSYNC(vp, MNT_WAIT, 0);
3562         } else {
3563                 /*
3564                  * Locate and synchronously write any buffers that fall
3565                  * into the requested range.  Note:  we are assuming that
3566                  * f_iosize is a power of 2.
3567                  */
3568                 int iosize = vp->v_mount->mnt_stat.f_iosize;
3569                 int iomask = iosize - 1;
3570                 off_t loffset;
3571
3572                 /*
3573                  * Align to iosize boundry, super-align to page boundry.
3574                  */
3575                 if (off & iomask) {
3576                         cnt += off & iomask;
3577                         off &= ~(u_quad_t)iomask;
3578                 }
3579                 if (off & PAGE_MASK) {
3580                         cnt += off & PAGE_MASK;
3581                         off &= ~(u_quad_t)PAGE_MASK;
3582                 }
3583                 loffset = off;
3584
3585                 if (vp->v_object &&
3586                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
3587                         vm_object_page_clean(vp->v_object, off / PAGE_SIZE,
3588                             (cnt + PAGE_MASK) / PAGE_SIZE, OBJPC_SYNC);
3589                 }
3590
3591                 crit_enter();
3592                 while (cnt > 0) {
3593                         struct buf *bp;
3594
3595                         /*
3596                          * If we have a buffer and it is marked B_DELWRI we
3597                          * have to lock and write it.  Otherwise the prior
3598                          * write is assumed to have already been committed.
3599                          *
3600                          * WARNING: FINDBLK_TEST buffers represent stable
3601                          *          storage but not necessarily stable
3602                          *          content.  It is ok in this case.
3603                          */
3604                         if ((bp = findblk(vp, loffset, FINDBLK_TEST)) != NULL) {
3605                                 if (bp->b_flags & B_DELWRI)
3606                                         bp = findblk(vp, loffset, 0);
3607                                 else
3608                                         bp = NULL;
3609                         }
3610                         if (bp) {
3611                                 if (bp->b_flags & B_DELWRI) {
3612                                         bremfree(bp);
3613                                         bwrite(bp);
3614                                         ++nfs_commit_miss;
3615                                 } else {
3616                                         BUF_UNLOCK(bp);
3617                                 }
3618                         }
3619                         ++nfs_commit_blks;
3620                         if (cnt < iosize)
3621                                 break;
3622                         cnt -= iosize;
3623                         loffset += iosize;
3624                 }
3625                 crit_exit();
3626         }
3627
3628         aft_ret = VOP_GETATTR(vp, &aft);
3629         vput(vp);
3630         vp = NULL;
3631         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3632                               NFSX_V3WCCDATA + NFSX_V3WRITEVERF,
3633                               &error));
3634         nfsm_srvwcc_data(&info, nfsd, for_ret, &bfor,
3635                          aft_ret, &aft);
3636         if (!error) {
3637                 tl = nfsm_build(&info, NFSX_V3WRITEVERF);
3638                 if (nfsver.tv_sec == 0)
3639                         nfsver = boottime;
3640                 *tl++ = txdr_unsigned(nfsver.tv_sec);
3641                 *tl = txdr_unsigned(nfsver.tv_nsec / 1000);
3642         } else {
3643                 error = 0;
3644         }
3645 nfsmout:
3646         *mrq = info.mreq;
3647         if (vp)
3648                 vput(vp);
3649         return(error);
3650 }
3651
3652 /*
3653  * nfs statfs service
3654  */
3655 int
3656 nfsrv_statfs(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3657              struct thread *td, struct mbuf **mrq)
3658 {
3659         struct sockaddr *nam = nfsd->nd_nam;
3660         struct ucred *cred = &nfsd->nd_cr;
3661         struct statfs *sf;
3662         struct nfs_statfs *sfp;
3663         int error = 0, rdonly, getret = 1;
3664         struct vnode *vp = NULL;
3665         struct mount *mp = NULL;
3666         struct vattr at;
3667         nfsfh_t nfh;
3668         fhandle_t *fhp;
3669         struct statfs statfs;
3670         u_quad_t tval;
3671         struct nfsm_info info;
3672
3673         info.mrep = nfsd->nd_mrep;
3674         info.mreq = NULL;
3675         info.md = nfsd->nd_md;
3676         info.dpos = nfsd->nd_dpos;
3677         info.v3 = (nfsd->nd_flag & ND_NFSV3);
3678
3679         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3680         fhp = &nfh.fh_generic;
3681         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3682         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3683                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3684         if (error) {
3685                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3686                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3687                 error = 0;
3688                 goto nfsmout;
3689         }
3690         sf = &statfs;
3691         error = VFS_STATFS(vp->v_mount, sf, proc0.p_ucred);
3692         getret = VOP_GETATTR(vp, &at);
3693         vput(vp);
3694         vp = NULL;
3695         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3696                               NFSX_POSTOPATTR(info.v3) + NFSX_STATFS(info.v3),
3697                               &error));
3698         if (info.v3)
3699                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3700         if (error) {
3701                 error = 0;
3702                 goto nfsmout;
3703         }
3704         sfp = nfsm_build(&info, NFSX_STATFS(info.v3));
3705         if (info.v3) {
3706                 tval = (u_quad_t)sf->f_blocks;
3707                 tval *= (u_quad_t)sf->f_bsize;
3708                 txdr_hyper(tval, &sfp->sf_tbytes);
3709                 tval = (u_quad_t)sf->f_bfree;
3710                 tval *= (u_quad_t)sf->f_bsize;
3711                 txdr_hyper(tval, &sfp->sf_fbytes);
3712                 tval = (u_quad_t)sf->f_bavail;
3713                 tval *= (u_quad_t)sf->f_bsize;
3714                 txdr_hyper(tval, &sfp->sf_abytes);
3715                 sfp->sf_tfiles.nfsuquad[0] = 0;
3716                 sfp->sf_tfiles.nfsuquad[1] = txdr_unsigned(sf->f_files);
3717                 sfp->sf_ffiles.nfsuquad[0] = 0;
3718                 sfp->sf_ffiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3719                 sfp->sf_afiles.nfsuquad[0] = 0;
3720                 sfp->sf_afiles.nfsuquad[1] = txdr_unsigned(sf->f_ffree);
3721                 sfp->sf_invarsec = 0;
3722         } else {
3723                 sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
3724                 sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
3725                 sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
3726                 sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
3727                 sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
3728         }
3729 nfsmout:
3730         *mrq = info.mreq;
3731         if (vp)
3732                 vput(vp);
3733         return(error);
3734 }
3735
3736 /*
3737  * nfs fsinfo service
3738  */
3739 int
3740 nfsrv_fsinfo(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3741              struct thread *td, struct mbuf **mrq)
3742 {
3743         struct sockaddr *nam = nfsd->nd_nam;
3744         struct ucred *cred = &nfsd->nd_cr;
3745         struct nfsv3_fsinfo *sip;
3746         int error = 0, rdonly, getret = 1, pref;
3747         struct vnode *vp = NULL;
3748         struct mount *mp = NULL;
3749         struct vattr at;
3750         nfsfh_t nfh;
3751         fhandle_t *fhp;
3752         u_quad_t maxfsize;
3753         struct statfs sb;
3754         struct nfsm_info info;
3755
3756         info.mrep = nfsd->nd_mrep;
3757         info.mreq = NULL;
3758         info.md = nfsd->nd_md;
3759         info.dpos = nfsd->nd_dpos;
3760
3761         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3762         fhp = &nfh.fh_generic;
3763         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3764         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3765                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3766         if (error) {
3767                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3768                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3769                 error = 0;
3770                 goto nfsmout;
3771         }
3772
3773         /* XXX Try to make a guess on the max file size. */
3774         VFS_STATFS(vp->v_mount, &sb, proc0.p_ucred);
3775         maxfsize = (u_quad_t)0x80000000 * sb.f_bsize - 1;
3776
3777         getret = VOP_GETATTR(vp, &at);
3778         vput(vp);
3779         vp = NULL;
3780         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3781                               NFSX_V3POSTOPATTR + NFSX_V3FSINFO, &error));
3782         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3783         sip = nfsm_build(&info, NFSX_V3FSINFO);
3784
3785         /*
3786          * XXX
3787          * There should be file system VFS OP(s) to get this information.
3788          * For now, assume ufs.
3789          */
3790         if (slp->ns_so->so_type == SOCK_DGRAM)
3791                 pref = NFS_MAXDGRAMDATA;
3792         else
3793                 pref = NFS_MAXDATA;
3794         sip->fs_rtmax = txdr_unsigned(NFS_MAXDATA);
3795         sip->fs_rtpref = txdr_unsigned(pref);
3796         sip->fs_rtmult = txdr_unsigned(NFS_FABLKSIZE);
3797         sip->fs_wtmax = txdr_unsigned(NFS_MAXDATA);
3798         sip->fs_wtpref = txdr_unsigned(pref);
3799         sip->fs_wtmult = txdr_unsigned(NFS_FABLKSIZE);
3800         sip->fs_dtpref = txdr_unsigned(pref);
3801         txdr_hyper(maxfsize, &sip->fs_maxfilesize);
3802         sip->fs_timedelta.nfsv3_sec = 0;
3803         sip->fs_timedelta.nfsv3_nsec = txdr_unsigned(1);
3804         sip->fs_properties = txdr_unsigned(NFSV3FSINFO_LINK |
3805                 NFSV3FSINFO_SYMLINK | NFSV3FSINFO_HOMOGENEOUS |
3806                 NFSV3FSINFO_CANSETTIME);
3807 nfsmout:
3808         *mrq = info.mreq;
3809         if (vp)
3810                 vput(vp);
3811         return(error);
3812 }
3813
3814 /*
3815  * nfs pathconf service
3816  */
3817 int
3818 nfsrv_pathconf(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3819                struct thread *td, struct mbuf **mrq)
3820 {
3821         struct sockaddr *nam = nfsd->nd_nam;
3822         struct ucred *cred = &nfsd->nd_cr;
3823         struct nfsv3_pathconf *pc;
3824         int error = 0, rdonly, getret = 1;
3825         register_t linkmax, namemax, chownres, notrunc;
3826         struct vnode *vp = NULL;
3827         struct mount *mp = NULL;
3828         struct vattr at;
3829         nfsfh_t nfh;
3830         fhandle_t *fhp;
3831         struct nfsm_info info;
3832
3833         info.mrep = nfsd->nd_mrep;
3834         info.mreq = NULL;
3835         info.md = nfsd->nd_md;
3836         info.dpos = nfsd->nd_dpos;
3837
3838         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3839         fhp = &nfh.fh_generic;
3840         NEGREPLYOUT(nfsm_srvmtofh(&info, nfsd, fhp, &error));
3841         error = nfsrv_fhtovp(fhp, 1, &mp, &vp, cred, slp, nam,
3842                  &rdonly, (nfsd->nd_flag & ND_KERBAUTH), TRUE);
3843         if (error) {
3844                 NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, NFSX_UNSIGNED, &error));
3845                 nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3846                 error = 0;
3847                 goto nfsmout;
3848         }
3849         error = VOP_PATHCONF(vp, _PC_LINK_MAX, &linkmax);
3850         if (!error)
3851                 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &namemax);
3852         if (!error)
3853                 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &chownres);
3854         if (!error)
3855                 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &notrunc);
3856         getret = VOP_GETATTR(vp, &at);
3857         vput(vp);
3858         vp = NULL;
3859         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp,
3860                               NFSX_V3POSTOPATTR + NFSX_V3PATHCONF,
3861                               &error));
3862         nfsm_srvpostop_attr(&info, nfsd, getret, &at);
3863         if (error) {
3864                 error = 0;
3865                 goto nfsmout;
3866         }
3867         pc = nfsm_build(&info, NFSX_V3PATHCONF);
3868
3869         pc->pc_linkmax = txdr_unsigned(linkmax);
3870         pc->pc_namemax = txdr_unsigned(namemax);
3871         pc->pc_notrunc = txdr_unsigned(notrunc);
3872         pc->pc_chownrestricted = txdr_unsigned(chownres);
3873
3874         /*
3875          * These should probably be supported by VOP_PATHCONF(), but
3876          * until msdosfs is exportable (why would you want to?), the
3877          * Unix defaults should be ok.
3878          */
3879         pc->pc_caseinsensitive = nfs_false;
3880         pc->pc_casepreserving = nfs_true;
3881 nfsmout:
3882         *mrq = info.mreq;
3883         if (vp) 
3884                 vput(vp);
3885         return(error);
3886 }
3887
3888 /*
3889  * Null operation, used by clients to ping server
3890  */
3891 /* ARGSUSED */
3892 int
3893 nfsrv_null(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3894            struct thread *td, struct mbuf **mrq)
3895 {
3896         struct nfsm_info info;
3897         int error = NFSERR_RETVOID;
3898
3899         info.mrep = nfsd->nd_mrep;
3900         info.mreq = NULL;
3901
3902         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3903         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3904 nfsmout:
3905         *mrq = info.mreq;
3906         return (error);
3907 }
3908
3909 /*
3910  * No operation, used for obsolete procedures
3911  */
3912 /* ARGSUSED */
3913 int
3914 nfsrv_noop(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
3915            struct thread *td, struct mbuf **mrq)
3916 {
3917         struct nfsm_info info;
3918         int error;
3919
3920         info.mrep = nfsd->nd_mrep;
3921         info.mreq = NULL;
3922
3923         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3924         if (nfsd->nd_repstat)
3925                 error = nfsd->nd_repstat;
3926         else
3927                 error = EPROCUNAVAIL;
3928         NEGKEEPOUT(nfsm_reply(&info, nfsd, slp, 0, &error));
3929         error = 0;
3930 nfsmout:
3931         *mrq = info.mreq;
3932         return (error);
3933 }
3934
3935 /*
3936  * Perform access checking for vnodes obtained from file handles that would
3937  * refer to files already opened by a Unix client. You cannot just use
3938  * vn_writechk() and VOP_ACCESS() for two reasons.
3939  * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
3940  * 2 - The owner is to be given access irrespective of mode bits for some
3941  *     operations, so that processes that chmod after opening a file don't
3942  *     break. I don't like this because it opens a security hole, but since
3943  *     the nfs server opens a security hole the size of a barn door anyhow,
3944  *     what the heck.
3945  *
3946  * The exception to rule 2 is EPERM. If a file is IMMUTABLE, VOP_ACCESS()
3947  * will return EPERM instead of EACCESS. EPERM is always an error.
3948  */
3949 static int
3950 nfsrv_access(struct mount *mp, struct vnode *vp, int flags, struct ucred *cred,
3951              int rdonly, struct thread *td, int override)
3952 {
3953         struct vattr vattr;
3954         int error;
3955
3956         nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
3957         if (flags & VWRITE) {
3958                 /* Just vn_writechk() changed to check rdonly */
3959                 /*
3960                  * Disallow write attempts on read-only file systems;
3961                  * unless the file is a socket or a block or character
3962                  * device resident on the file system.
3963                  */
3964                 if (rdonly || 
3965                     ((mp->mnt_flag | vp->v_mount->mnt_flag) & MNT_RDONLY)) {
3966                         switch (vp->v_type) {
3967                         case VREG:
3968                         case VDIR:
3969                         case VLNK:
3970                                 return (EROFS);
3971                         default:
3972                                 break;
3973                         }
3974                 }
3975                 /*
3976                  * If there's shared text associated with
3977                  * the inode, we can't allow writing.
3978                  */
3979                 if (vp->v_flag & VTEXT)
3980                         return (ETXTBSY);
3981         }
3982         error = VOP_GETATTR(vp, &vattr);
3983         if (error)
3984                 return (error);
3985         error = VOP_ACCESS(vp, flags, cred);    /* XXX ruid/rgid vs uid/gid */
3986         /*
3987          * Allow certain operations for the owner (reads and writes
3988          * on files that are already open).
3989          */
3990         if (override && error == EACCES && cred->cr_uid == vattr.va_uid)
3991                 error = 0;
3992         return error;
3993 }
3994 #endif /* NFS_NOSERVER */
3995